32-bit processes running on a 64-bit kernel are not always detected
correctly, causing the process to crash when uretprobes are installed.
The reason for the crash is that in_ia32_syscall() is used to determine
the process's mode, which only works correctly when called from a
syscall. In the case of uretprobes, however, the function is called from
a software interrupt and always returns 'false' (on a 64-bit kernel). In
consequence this leads to corruption of the process's return address.
This can be fixed by using user_64bit_mode(), which should always be
correct.
Signed-off-by: Sebastian Mayr <[email protected]>
---
Please note that I just stumbled over this bug and am not really
familiar with all the internals. So take the patch and, in particular,
the commit message with a grain of salt. Thanks!
arch/x86/kernel/uprobes.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 918b5092a85f..d6e261202c6b 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -508,9 +508,9 @@ struct uprobe_xol_ops {
void (*abort)(struct arch_uprobe *, struct pt_regs *);
};
-static inline int sizeof_long(void)
+static inline int sizeof_long(struct pt_regs *regs)
{
- return in_ia32_syscall() ? 4 : 8;
+ return user_64bit_mode(regs) ? 8 : 4;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -521,9 +521,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
{
- unsigned long new_sp = regs->sp - sizeof_long();
+ unsigned long new_sp = regs->sp - sizeof_long(regs);
- if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
+ if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs)))
return -EFAULT;
regs->sp = new_sp;
@@ -556,7 +556,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
long correction = utask->vaddr - utask->xol_vaddr;
regs->ip += correction;
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
- regs->sp += sizeof_long(); /* Pop incorrect return address */
+ regs->sp += sizeof_long(regs); /* Pop incorrect return address */
if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
return -ERESTART;
}
@@ -675,7 +675,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
* "call" insn was executed out-of-line. Just restore ->sp and restart.
* We could also restore ->ip and try to call branch_emulate_op() again.
*/
- regs->sp += sizeof_long();
+ regs->sp += sizeof_long(regs);
return -ERESTART;
}
@@ -1056,7 +1056,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
- int rasize = sizeof_long(), nleft;
+ int rasize = sizeof_long(regs), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
--
2.22.0
On Sun, 2019-07-28 at 17:26 +0200, Sebastian Mayr wrote:
> 32-bit processes running on a 64-bit kernel are not always detected
> correctly, causing the process to crash when uretprobes are
> installed.
> The reason for the crash is that in_ia32_syscall() is used to
> determine
> the process's mode, which only works correctly when called from a
> syscall. In the case of uretprobes, however, the function is called
> from
> a software interrupt and always returns 'false' (on a 64-bit kernel).
> In
> consequence this leads to corruption of the process's return address.
>
> This can be fixed by using user_64bit_mode(), which should always be
> correct.
>
> Signed-off-by: Sebastian Mayr <[email protected]>
> ---
>
> Please note that I just stumbled over this bug and am not really
> familiar with all the internals. So take the patch and, in
> particular,
> the commit message with a grain of salt. Thanks!
>
> arch/x86/kernel/uprobes.c | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> index 918b5092a85f..d6e261202c6b 100644
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c
> @@ -508,9 +508,9 @@ struct uprobe_xol_ops {
> void (*abort)(struct arch_uprobe *, struct pt_regs *);
> };
>
> -static inline int sizeof_long(void)
> +static inline int sizeof_long(struct pt_regs *regs)
> {
> - return in_ia32_syscall() ? 4 : 8;
> + return user_64bit_mode(regs) ? 8 : 4;
> }
>
> static int default_pre_xol_op(struct arch_uprobe *auprobe, struct
> pt_regs *regs)
> @@ -521,9 +521,9 @@ static int default_pre_xol_op(struct arch_uprobe
> *auprobe, struct pt_regs *regs)
>
> static int emulate_push_stack(struct pt_regs *regs, unsigned long
> val)
> {
> - unsigned long new_sp = regs->sp - sizeof_long();
> + unsigned long new_sp = regs->sp - sizeof_long(regs);
>
> - if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
> + if (copy_to_user((void __user *)new_sp, &val,
> sizeof_long(regs)))
> return -EFAULT;
>
> regs->sp = new_sp;
> @@ -556,7 +556,7 @@ static int default_post_xol_op(struct arch_uprobe
> *auprobe, struct pt_regs *regs
> long correction = utask->vaddr - utask->xol_vaddr;
> regs->ip += correction;
> } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
> - regs->sp += sizeof_long(); /* Pop incorrect return
> address */
> + regs->sp += sizeof_long(regs); /* Pop incorrect return
> address */
> if (emulate_push_stack(regs, utask->vaddr + auprobe-
> >defparam.ilen))
> return -ERESTART;
> }
> @@ -675,7 +675,7 @@ static int branch_post_xol_op(struct arch_uprobe
> *auprobe, struct pt_regs *regs)
> * "call" insn was executed out-of-line. Just restore ->sp and
> restart.
> * We could also restore ->ip and try to call
> branch_emulate_op() again.
> */
> - regs->sp += sizeof_long();
> + regs->sp += sizeof_long(regs);
> return -ERESTART;
> }
>
> @@ -1056,7 +1056,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe
> *auprobe, struct pt_regs *regs)
> unsigned long
> arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
> struct pt_regs *regs)
> {
> - int rasize = sizeof_long(), nleft;
> + int rasize = sizeof_long(regs), nleft;
> unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit
> apps */
>
> if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp,
> rasize))
Any feedback on this patch would be greatly appreciated.
Thanks,
Sebastian
On Mon, 19 Aug 2019, Sebastian Mayr wrote:
> > @@ -1056,7 +1056,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe
> > *auprobe, struct pt_regs *regs)
> > unsigned long
> > arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
> > struct pt_regs *regs)
> > {
> > - int rasize = sizeof_long(), nleft;
> > + int rasize = sizeof_long(regs), nleft;
> > unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit
> > apps */
> >
> > if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp,
> > rasize))
>
> Any feedback on this patch would be greatly appreciated.
Sorry, fell through the cracks. Thanks for the reminder!
Sebastian,
On Sun, 28 Jul 2019, Sebastian Mayr wrote:
sorry for the delay..
> 32-bit processes running on a 64-bit kernel are not always detected
> correctly, causing the process to crash when uretprobes are installed.
> The reason for the crash is that in_ia32_syscall() is used to determine
> the process's mode, which only works correctly when called from a
> syscall. In the case of uretprobes, however, the function is called from
> a software interrupt and always returns 'false' (on a 64-bit kernel). In
s/software interrupt/exception/
> consequence this leads to corruption of the process's return address.
Nice detective work and well written changelog!
> This can be fixed by using user_64bit_mode(), which should always be
> correct.
This wants to be:
Fix this by using user_64bit_mode() which is always correct.
Be imperative, even if not entirely sure. You nicely put a disclaimer into
the discard section.
This also wants a Fixes tag and cc stable. Interestingly enough this should
have been detected by the rename with
abfb9498ee13 ("x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()")
which states in the changelog:
The is_ia32_task()/is_x32_task() function names are a big misnomer: they
suggests that the compat-ness of a system call is a task property, which
is not true, the compatness of a system call purely depends on how it
was invoked through the system call layer.
.....
and then it went and blindly renamed every call site ....
And sadly this was already mentioned here:
8faaed1b9f50 ("uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and arch_uretprobe_hijack_return_addr()")
where the changelog says:
TODO: is_ia32_task() is not what we actually want, TS_COMPAT does
not necessarily mean 32bit. Fortunately syscall-like insns can't be
probed so it actually works, but it would be better to rename and
use is_ia32_frame().
and goes all the way back to:
0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions")
Oh well. 7 years until someone actually tried a uretprobe on a 32bit
process on a 64bit kernel....
> -static inline int sizeof_long(void)
> +static inline int sizeof_long(struct pt_regs *regs)
> {
> - return in_ia32_syscall() ? 4 : 8;
This wants a comment.
> + return user_64bit_mode(regs) ? 8 : 4;
> }
No need to resend, I'll fix this up while applying.
Thank you very much for digging into this!
tglx
On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
>
> > -static inline int sizeof_long(void)
> > +static inline int sizeof_long(struct pt_regs *regs)
> > {
> > - return in_ia32_syscall() ? 4 : 8;
>
> This wants a comment.
>
> > + return user_64bit_mode(regs) ? 8 : 4;
The more simpler one liner is to check
test_thread_flag(TIF_IA32)
which is only true for IA32 and independent of syscalls, exceptions ...
Thanks,
tglx
> On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
>
>> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
>>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
>>>
>>> -static inline int sizeof_long(void)
>>> +static inline int sizeof_long(struct pt_regs *regs)
>>> {
>>> - return in_ia32_syscall() ? 4 : 8;
>>
>> This wants a comment.
>>
>>> + return user_64bit_mode(regs) ? 8 : 4;
>
> The more simpler one liner is to check
>
> test_thread_flag(TIF_IA32)
I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> > On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
> >
> >> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> >>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
> >>>
> >>> -static inline int sizeof_long(void)
> >>> +static inline int sizeof_long(struct pt_regs *regs)
> >>> {
> >>> - return in_ia32_syscall() ? 4 : 8;
> >>
> >> This wants a comment.
> >>
> >>> + return user_64bit_mode(regs) ? 8 : 4;
> >
> > The more simpler one liner is to check
> >
> > test_thread_flag(TIF_IA32)
>
> I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
Well, yes and no. This needs to be backported ....
On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> > > On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
> > >
> > >> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> > >>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
> > >>>
> > >>> -static inline int sizeof_long(void)
> > >>> +static inline int sizeof_long(struct pt_regs *regs)
> > >>> {
> > >>> - return in_ia32_syscall() ? 4 : 8;
> > >>
> > >> This wants a comment.
> > >>
> > >>> + return user_64bit_mode(regs) ? 8 : 4;
> > >
> > > The more simpler one liner is to check
> > >
> > > test_thread_flag(TIF_IA32)
> >
> > I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
>
> Well, yes and no. This needs to be backported ....
And TBH the magic in user_64bit_mode() is not pretty either.
Thanks,
tglx
> On Aug 23, 2019, at 5:03 PM, Thomas Gleixner <[email protected]> wrote:
>
>> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
>> On Fri, 23 Aug 2019, Andy Lutomirski wrote:
>>>> On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
>>>>
>>>>>> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
>>>>>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
>>>>>>
>>>>>> -static inline int sizeof_long(void)
>>>>>> +static inline int sizeof_long(struct pt_regs *regs)
>>>>>> {
>>>>>> - return in_ia32_syscall() ? 4 : 8;
>>>>>
>>>>> This wants a comment.
>>>>>
>>>>>> + return user_64bit_mode(regs) ? 8 : 4;
>>>>
>>>> The more simpler one liner is to check
>>>>
>>>> test_thread_flag(TIF_IA32)
>>>
>>> I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
>>
>> Well, yes and no. This needs to be backported ....
>
> And TBH the magic in user_64bit_mode() is not pretty either.
>
>
It’s only magic on Xen. I should probably stick a cpu_feature_enabled(X86_FEATURE_XENPV) in there instead.
On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> > On Aug 23, 2019, at 5:03 PM, Thomas Gleixner <[email protected]> wrote:
> >
> >> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> >> On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> >>>> On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
> >>>>
> >>>>>> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> >>>>>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
> >>>>>>
> >>>>>> -static inline int sizeof_long(void)
> >>>>>> +static inline int sizeof_long(struct pt_regs *regs)
> >>>>>> {
> >>>>>> - return in_ia32_syscall() ? 4 : 8;
> >>>>>
> >>>>> This wants a comment.
> >>>>>
> >>>>>> + return user_64bit_mode(regs) ? 8 : 4;
> >>>>
> >>>> The more simpler one liner is to check
> >>>>
> >>>> test_thread_flag(TIF_IA32)
> >>>
> >>> I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
> >>
> >> Well, yes and no. This needs to be backported ....
> >
> > And TBH the magic in user_64bit_mode() is not pretty either.
> >
> It’s only magic on Xen. I should probably stick a
> cpu_feature_enabled(X86_FEATURE_XENPV) in there instead.
For backporting sake I really prefer the TIF version. One usage site more
is not the end of the world. We can add the user_64bit_mode() variant from
Sebastian on top as a cleanup right away so mainline is clean.
Thanks,
tglx
On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> > > On Aug 23, 2019, at 5:03 PM, Thomas Gleixner <[email protected]> wrote:
> > >
> > >> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> > >> On Fri, 23 Aug 2019, Andy Lutomirski wrote:
> > >>>> On Aug 23, 2019, at 4:44 PM, Thomas Gleixner <[email protected]> wrote:
> > >>>>
> > >>>>>> On Sat, 24 Aug 2019, Thomas Gleixner wrote:
> > >>>>>> On Sun, 28 Jul 2019, Sebastian Mayr wrote:
> > >>>>>>
> > >>>>>> -static inline int sizeof_long(void)
> > >>>>>> +static inline int sizeof_long(struct pt_regs *regs)
> > >>>>>> {
> > >>>>>> - return in_ia32_syscall() ? 4 : 8;
> > >>>>>
> > >>>>> This wants a comment.
> > >>>>>
> > >>>>>> + return user_64bit_mode(regs) ? 8 : 4;
> > >>>>
> > >>>> The more simpler one liner is to check
> > >>>>
> > >>>> test_thread_flag(TIF_IA32)
> > >>>
> > >>> I still want to finish killing TIF_IA32 some day. Let’s please not add new users.
> > >>
> > >> Well, yes and no. This needs to be backported ....
> > >
> > > And TBH the magic in user_64bit_mode() is not pretty either.
> > >
> > It’s only magic on Xen. I should probably stick a
> > cpu_feature_enabled(X86_FEATURE_XENPV) in there instead.
>
> For backporting sake I really prefer the TIF version. One usage site more
> is not the end of the world. We can add the user_64bit_mode() variant from
> Sebastian on top as a cleanup right away so mainline is clean.
Bah, scratch it. I take the proper one right away.
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: 9212ec7d8357ea630031e89d0d399c761421c83b
Gitweb: https://git.kernel.org/tip/9212ec7d8357ea630031e89d0d399c761421c83b
Author: Sebastian Mayr <[email protected]>
AuthorDate: Sun, 28 Jul 2019 17:26:17 +02:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Mon, 26 Aug 2019 15:55:09 +02:00
uprobes/x86: Fix detection of 32-bit user mode
32-bit processes running on a 64-bit kernel are not always detected
correctly, causing the process to crash when uretprobes are installed.
The reason for the crash is that in_ia32_syscall() is used to determine the
process's mode, which only works correctly when called from a syscall.
In the case of uretprobes, however, the function is called from a exception
and always returns 'false' on a 64-bit kernel. In consequence this leads to
corruption of the process's return address.
Fix this by using user_64bit_mode() instead of in_ia32_syscall(), which
is correct in any situation.
[ tglx: Add a comment and the following historical info ]
This should have been detected by the rename which happened in commit
abfb9498ee13 ("x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall()")
which states in the changelog:
The is_ia32_task()/is_x32_task() function names are a big misnomer: they
suggests that the compat-ness of a system call is a task property, which
is not true, the compatness of a system call purely depends on how it
was invoked through the system call layer.
.....
and then it went and blindly renamed every call site.
Sadly enough this was already mentioned here:
8faaed1b9f50 ("uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and
arch_uretprobe_hijack_return_addr()")
where the changelog says:
TODO: is_ia32_task() is not what we actually want, TS_COMPAT does
not necessarily mean 32bit. Fortunately syscall-like insns can't be
probed so it actually works, but it would be better to rename and
use is_ia32_frame().
and goes all the way back to:
0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions")
Oh well. 7+ years until someone actually tried a uretprobe on a 32bit
process on a 64bit kernel....
Fixes: 0326f5a94dde ("uprobes/core: Handle breakpoint and singlestep exceptions")
Signed-off-by: Sebastian Mayr <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Cc: Masami Hiramatsu <[email protected]>
Cc: Dmitry Safonov <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Srikar Dronamraju <[email protected]>
Cc: [email protected]
Link: https://lkml.kernel.org/r/[email protected]
---
arch/x86/kernel/uprobes.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index d8359eb..8cd745e 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -508,9 +508,12 @@ struct uprobe_xol_ops {
void (*abort)(struct arch_uprobe *, struct pt_regs *);
};
-static inline int sizeof_long(void)
+static inline int sizeof_long(struct pt_regs *regs)
{
- return in_ia32_syscall() ? 4 : 8;
+ /*
+ * Check registers for mode as in_xxx_syscall() does not apply here.
+ */
+ return user_64bit_mode(regs) ? 8 : 4;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
@@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
static int emulate_push_stack(struct pt_regs *regs, unsigned long val)
{
- unsigned long new_sp = regs->sp - sizeof_long();
+ unsigned long new_sp = regs->sp - sizeof_long(regs);
- if (copy_to_user((void __user *)new_sp, &val, sizeof_long()))
+ if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs)))
return -EFAULT;
regs->sp = new_sp;
@@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
long correction = utask->vaddr - utask->xol_vaddr;
regs->ip += correction;
} else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
- regs->sp += sizeof_long(); /* Pop incorrect return address */
+ regs->sp += sizeof_long(regs); /* Pop incorrect return address */
if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen))
return -ERESTART;
}
@@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
* "call" insn was executed out-of-line. Just restore ->sp and restart.
* We could also restore ->ip and try to call branch_emulate_op() again.
*/
- regs->sp += sizeof_long();
+ regs->sp += sizeof_long(regs);
return -ERESTART;
}
@@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
unsigned long
arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs)
{
- int rasize = sizeof_long(), nleft;
+ int rasize = sizeof_long(regs), nleft;
unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */
if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize))
Sorry for delay, vacation.
On 08/24, Thomas Gleixner wrote:
>
> And sadly this was already mentioned here:
>
> 8faaed1b9f50 ("uprobes/x86: Introduce sizeof_long(), cleanup adjust_ret_addr() and arch_uretprobe_hijack_return_addr()")
Yes, and I even posted a similar fix but forgot to send it officially ...
Thanks Sebastian! I am sure it was not easy to debug this problem.
But to remind, there is another problem with in_ia32_syscall() && uprobes.
get_unmapped_area() paths use in_ia32_syscall() and this is wrong in case
when the caller is xol_add_vma(), in this case TS_COMPAT won't be set.
Usually the addr = TASK_SIZE - PAGE_SIZE passed to get_unmapped_area() should
work, mm->get_unmapped_area() won't be even called. But if this addr is already
occupied get_area() can return addr > TASK_SIZE.
Test-case:
#include <sys/mman.h>
void func(void)
{
}
int main(void)
{
// 0xffffd000 == TASK_SIZE - PAGE_SIZE
mmap((void*)0xffffd000, 4096, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
func();
return 0;
}
$ cc -m32 -Wall -g T.c -o ./t
$ perf probe -x ./t func+1 # +1 to avoid push_emulate_op()
$ perf record -e probe_t:func -aR ./t
perf-record "hangs" because ./t endlessly restarts the probed insn while
get_xol_area() can't succeed.
I verified that the "patch" below fixes the problem, any idea how to fix
it properly?
Oleg.
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1387,6 +1387,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags);
}
+#include <asm/mmu_context.h>
+
/* Slot allocation for XOL */
static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
{
@@ -1402,9 +1404,13 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
}
if (!area->vaddr) {
+ if(!is_64bit_mm(mm))
+ current_thread_info()->status |= TS_COMPAT;
/* Try to map as high as possible, this is only a hint. */
area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
PAGE_SIZE, 0, 0);
+ if(!is_64bit_mm(mm))
+ current_thread_info()->status &= ~TS_COMPAT;;
if (area->vaddr & ~PAGE_MASK) {
ret = area->vaddr;
goto fail;
Hi Oleg,
On 8/27/19 3:00 PM, Oleg Nesterov wrote:
[..]
> But to remind, there is another problem with in_ia32_syscall() && uprobes.
>
> get_unmapped_area() paths use in_ia32_syscall() and this is wrong in case
> when the caller is xol_add_vma(), in this case TS_COMPAT won't be set.>
> Usually the addr = TASK_SIZE - PAGE_SIZE passed to get_unmapped_area() should
> work, mm->get_unmapped_area() won't be even called. But if this addr is already
> occupied get_area() can return addr > TASK_SIZE.
Technically, it's not bigger than TASK_SIZE that's supplied
get_unmapped_area() as an argument..
[..]
> if (!area->vaddr) {
> + if(!is_64bit_mm(mm))
> + current_thread_info()->status |= TS_COMPAT;
> /* Try to map as high as possible, this is only a hint. */
> area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
> PAGE_SIZE, 0, 0);
> + if(!is_64bit_mm(mm))
> + current_thread_info()->status &= ~TS_COMPAT;;
> if (area->vaddr & ~PAGE_MASK) {
> ret = area->vaddr;
> goto fail;
It could have been TASK_SIZE_OF(), but that would be not much better in
my POV. I see that arch_uprobe_analyze_insn() uses is_64bit_mm() which
is correct the majority of time, but not for processes those jump
switching CS.. Except criu afair there are at least wine, dosemu.
I had it in my TODO to fix this :)
Do I read the code properly and xol is always one page?
Could that page be reserved on the top of mmap_base/mmap_compat_base at
the binfmt loading time? (I would need than to add .mremap() for
restoring sake). Probably, not reserving it if personality doesn't allow
randomization or providing a way to disable it..
Thanks,
Dmitry
-Cc my old @virtuozzo email.
Previously it just ignored emails and now sends those ugly html replies.
Sorry about that - I've updated .mailmap now.
On 8/27/19 6:03 PM, Dmitry Safonov wrote:
> Hi Oleg,
>
> On 8/27/19 3:00 PM, Oleg Nesterov wrote:
> [..]
>> But to remind, there is another problem with in_ia32_syscall() && uprobes.
>>
>> get_unmapped_area() paths use in_ia32_syscall() and this is wrong in case
>> when the caller is xol_add_vma(), in this case TS_COMPAT won't be set.>
>> Usually the addr = TASK_SIZE - PAGE_SIZE passed to get_unmapped_area() should
>> work, mm->get_unmapped_area() won't be even called. But if this addr is already
>> occupied get_area() can return addr > TASK_SIZE.
>
> Technically, it's not bigger than TASK_SIZE that's supplied
> get_unmapped_area() as an argument..
>
> [..]
>> if (!area->vaddr) {
>> + if(!is_64bit_mm(mm))
>> + current_thread_info()->status |= TS_COMPAT;
>> /* Try to map as high as possible, this is only a hint. */
>> area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
>> PAGE_SIZE, 0, 0);
>> + if(!is_64bit_mm(mm))
>> + current_thread_info()->status &= ~TS_COMPAT;;
>> if (area->vaddr & ~PAGE_MASK) {
>> ret = area->vaddr;
>> goto fail;
>
> It could have been TASK_SIZE_OF(), but that would be not much better in
> my POV. I see that arch_uprobe_analyze_insn() uses is_64bit_mm() which
> is correct the majority of time, but not for processes those jump
> switching CS.. Except criu afair there are at least wine, dosemu.
> I had it in my TODO to fix this :)
>
> Do I read the code properly and xol is always one page?
> Could that page be reserved on the top of mmap_base/mmap_compat_base at
> the binfmt loading time? (I would need than to add .mremap() for
> restoring sake). Probably, not reserving it if personality doesn't allow
> randomization or providing a way to disable it..
If no one has concerns over such approach, I'll cook a fix just after
Plumbers week.
Thanks,
Dmitry
On 08/28, Dmitry Safonov wrote:
>
> > On 8/27/19 3:00 PM, Oleg Nesterov wrote:
> > [..]
> >> But to remind, there is another problem with in_ia32_syscall() && uprobes.
> >>
> >> get_unmapped_area() paths use in_ia32_syscall() and this is wrong in case
> >> when the caller is xol_add_vma(), in this case TS_COMPAT won't be set.>
> >> Usually the addr = TASK_SIZE - PAGE_SIZE passed to get_unmapped_area() should
> >> work, mm->get_unmapped_area() won't be even called. But if this addr is already
> >> occupied get_area() can return addr > TASK_SIZE.
> >
> > Technically, it's not bigger than TASK_SIZE that's supplied
> > get_unmapped_area() as an argument..
Hmm. What do you mean?
Just in case, TASK_SIZE checks TIF_ADDR32, not TS_COMPAT.
> >> if (!area->vaddr) {
> >> + if(!is_64bit_mm(mm))
> >> + current_thread_info()->status |= TS_COMPAT;
> >> /* Try to map as high as possible, this is only a hint. */
> >> area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE,
> >> PAGE_SIZE, 0, 0);
> >> + if(!is_64bit_mm(mm))
> >> + current_thread_info()->status &= ~TS_COMPAT;;
> >> if (area->vaddr & ~PAGE_MASK) {
> >> ret = area->vaddr;
> >> goto fail;
> >
> > It could have been TASK_SIZE_OF(),
tsk is always current, why do we need TASK_SIZE_OF() ?
> > I see that arch_uprobe_analyze_insn() uses is_64bit_mm() which
> > is correct the majority of time, but not for processes those jump
> > switching CS..
Heh. it's actually even worse. Just suppose a 32-bit application simply
mmaps a 64-bit executable which has a probe. But this is off-topic.
> > Do I read the code properly and xol is always one page?
Yes,
> > Could that page be reserved on the top of mmap_base/mmap_compat_base at
> > the binfmt loading time?
How? I don't understand...
> (I would need than to add .mremap() for
> > restoring sake).
for what? I don't think you can restore a probed process anyway... OK,
right now this is off-topic too.
Oleg.