From: Andr? Hentschel <[email protected]>
Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
register on ARM is preserved per thread.
This patch does it analogous to the ARM patch, but for compat mode on ARM64.
Signed-off-by: Andr? Hentschel <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Jonathan Austin <[email protected]>
---
This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 20e9591..cd7b8c9 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -78,7 +78,7 @@ struct cpu_context {
struct thread_struct {
struct cpu_context cpu_context; /* cpu context */
- unsigned long tp_value;
+ unsigned long tp_value[2]; /* TLS registers */
struct fpsimd_state fpsimd_state;
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c6b1f3b..fc7cc6bc 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -218,7 +218,8 @@ static void tls_thread_flush(void)
asm ("msr tpidr_el0, xzr");
if (is_compat_task()) {
- current->thread.tp_value = 0;
+ current->thread.tp_value[0] = 0;
+ current->thread.tp_value[1] = 0;
/*
* We need to ensure ordering between the shadow state and the
@@ -254,7 +255,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
unsigned long stk_sz, struct task_struct *p)
{
struct pt_regs *childregs = task_pt_regs(p);
- unsigned long tls = p->thread.tp_value;
+ unsigned long tls = p->thread.tp_value[0];
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
@@ -283,6 +284,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
*/
if (clone_flags & CLONE_SETTLS)
tls = childregs->regs[3];
+ if (is_compat_thread(task_thread_info(p))) {
+ unsigned long tpuser;
+ asm("mrs %0, tpidr_el0" : "=r" (tpuser));
+ p->thread.tp_value[1] = tpuser;
+ }
} else {
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
@@ -291,7 +297,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
}
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
- p->thread.tp_value = tls;
+ p->thread.tp_value[0] = tls;
ptrace_hw_copy_thread(p);
@@ -302,16 +308,17 @@ static void tls_thread_switch(struct task_struct *next)
{
unsigned long tpidr, tpidrro;
- if (!is_compat_task()) {
- asm("mrs %0, tpidr_el0" : "=r" (tpidr));
- current->thread.tp_value = tpidr;
- }
+ asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+ if (is_compat_task())
+ current->thread.tp_value[1] = tpidr;
+ else
+ current->thread.tp_value[0] = tpidr;
if (is_compat_thread(task_thread_info(next))) {
- tpidr = 0;
- tpidrro = next->thread.tp_value;
+ tpidr = next->thread.tp_value[1];
+ tpidrro = next->thread.tp_value[0];
} else {
- tpidr = next->thread.tp_value;
+ tpidr = next->thread.tp_value[0];
tpidrro = 0;
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d882b83..1eec962 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -533,7 +533,7 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned long *tls = &target->thread.tp_value;
+ unsigned long *tls = &target->thread.tp_value[0];
return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1);
}
@@ -548,7 +548,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;
- target->thread.tp_value = tls;
+ target->thread.tp_value[0] = tls;
return ret;
}
@@ -1061,7 +1061,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
break;
case COMPAT_PTRACE_GET_THREAD_AREA:
- ret = put_user((compat_ulong_t)child->thread.tp_value,
+ ret = put_user((compat_ulong_t)child->thread.tp_value[0],
(compat_ulong_t __user *)datap);
break;
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 28c511b..fd4330c 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -87,7 +87,7 @@ long compat_arm_syscall(struct pt_regs *regs)
return do_compat_cache_op(regs->regs[0], regs->regs[1], regs->regs[2]);
case __ARM_NR_compat_set_tls:
- current->thread.tp_value = regs->regs[0];
+ current->thread.tp_value[0] = regs->regs[0];
/*
* Protect against register corruption from context switch.
On Sun, May 03, 2015 at 05:24:18PM +0100, Andr? Hentschel wrote:
> From: Andr? Hentschel <[email protected]>
>
> Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
> register on ARM is preserved per thread.
>
> This patch does it analogous to the ARM patch, but for compat mode on ARM64.
>
> Signed-off-by: Andr? Hentschel <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Jonathan Austin <[email protected]>
>
> ---
> This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
Curious, but why do you need this? iirc, we added this for arch/arm/ because
of some windows rt (?) emulation in wine. Is that still the case here and is
anybody actually using that?
Will
Am 05.05.2015 um 12:51 schrieb Will Deacon:
> On Sun, May 03, 2015 at 05:24:18PM +0100, Andr? Hentschel wrote:
>> From: Andr? Hentschel <[email protected]>
>>
>> Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
>> register on ARM is preserved per thread.
>>
>> This patch does it analogous to the ARM patch, but for compat mode on ARM64.
>>
>> Signed-off-by: Andr? Hentschel <[email protected]>
>> Cc: Will Deacon <[email protected]>
>> Cc: Jonathan Austin <[email protected]>
>>
>> ---
>> This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
>
> Curious, but why do you need this? iirc, we added this for arch/arm/ because
> of some windows rt (?) emulation in wine. Is that still the case here and is
> anybody actually using that?
Yes, Windows ARM binaries are the well known use case, but also the compat mode should do
what the arm kernel is doing I?d think and the code wasn't adjusted yet.
What i'm curious about is why the main TLS register on arm64 is the user writeable,
I'm not an security expert but this looks odd. I could easily provoke a crash by writing
to it...
CCing Catalin Marinas
On Tue, May 05, 2015 at 06:09:57PM +0100, André Hentschel wrote:
> Am 05.05.2015 um 12:51 schrieb Will Deacon:
> > On Sun, May 03, 2015 at 05:24:18PM +0100, André Hentschel wrote:
> >> From: André Hentschel <[email protected]>
> >>
> >> Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
> >> register on ARM is preserved per thread.
> >>
> >> This patch does it analogous to the ARM patch, but for compat mode on ARM64.
> >>
> >> Signed-off-by: André Hentschel <[email protected]>
> >> Cc: Will Deacon <[email protected]>
> >> Cc: Jonathan Austin <[email protected]>
> >>
> >> ---
> >> This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
> >
> > Curious, but why do you need this? iirc, we added this for arch/arm/ because
> > of some windows rt (?) emulation in wine. Is that still the case here and is
> > anybody actually using that?
>
> Yes, Windows ARM binaries are the well known use case, but also the compat
> mode should do what the arm kernel is doing I’d think and the code wasn't
> adjusted yet.
Sure, I was just curious.
> What i'm curious about is why the main TLS register on arm64 is the user
> writeable, I'm not an security expert but this looks odd. I could easily
> provoke a crash by writing to it...
You've probably got the wrong TLS. Allowing a program to clobber it's own
thread-local storage is no worse than allowing it to write to its general
purpose registers, pc, etc.
I'm assuming the crash you saw was just a userspace crash, rather than
the kernel?
Will
Am 05.05.2015 um 19:15 schrieb Will Deacon:
> On Tue, May 05, 2015 at 06:09:57PM +0100, André Hentschel wrote:
>> Am 05.05.2015 um 12:51 schrieb Will Deacon:
>>> On Sun, May 03, 2015 at 05:24:18PM +0100, André Hentschel wrote:
>>>> From: André Hentschel <[email protected]>
>>>>
>>>> Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
>>>> register on ARM is preserved per thread.
>>>>
>>>> This patch does it analogous to the ARM patch, but for compat mode on ARM64.
>>>>
>>>> Signed-off-by: André Hentschel <[email protected]>
>>>> Cc: Will Deacon <[email protected]>
>>>> Cc: Jonathan Austin <[email protected]>
>>>>
>>>> ---
>>>> This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
>>>
>>> Curious, but why do you need this? iirc, we added this for arch/arm/ because
>>> of some windows rt (?) emulation in wine. Is that still the case here and is
>>> anybody actually using that?
>>
>> Yes, Windows ARM binaries are the well known use case, but also the compat
>> mode should do what the arm kernel is doing I’d think and the code wasn't
>> adjusted yet.
>
> Sure, I was just curious.
OK :)
So what about the patch?
>> What i'm curious about is why the main TLS register on arm64 is the user
>> writeable, I'm not an security expert but this looks odd. I could easily
>> provoke a crash by writing to it...
>
> You've probably got the wrong TLS. Allowing a program to clobber it's own
> thread-local storage is no worse than allowing it to write to its general
> purpose registers, pc, etc.
>
> I'm assuming the crash you saw was just a userspace crash, rather than
> the kernel?
>
True, but the system became horribly instable, files were overwritten by others, very strange. It was in a remote KVM VM on bare metal aarch64...
I don't dare to try it again because it causes others some trouble, but if someone wants to try it out:
https://github.com/AndreRH/tpidrurw-test
On Tue, May 05, 2015 at 06:19:24PM +0100, André Hentschel wrote:
> Am 05.05.2015 um 19:15 schrieb Will Deacon:
> > On Tue, May 05, 2015 at 06:09:57PM +0100, André Hentschel wrote:
> >> Am 05.05.2015 um 12:51 schrieb Will Deacon:
> >>> On Sun, May 03, 2015 at 05:24:18PM +0100, André Hentschel wrote:
> >>> Curious, but why do you need this? iirc, we added this for arch/arm/ because
> >>> of some windows rt (?) emulation in wine. Is that still the case here and is
> >>> anybody actually using that?
> >>
> >> Yes, Windows ARM binaries are the well known use case, but also the compat
> >> mode should do what the arm kernel is doing I’d think and the code wasn't
> >> adjusted yet.
> >
> > Sure, I was just curious.
>
> OK :)
> So what about the patch?
I'll need to take a proper look (it's on the list).
> >> What i'm curious about is why the main TLS register on arm64 is the user
> >> writeable, I'm not an security expert but this looks odd. I could easily
> >> provoke a crash by writing to it...
> >
> > You've probably got the wrong TLS. Allowing a program to clobber it's own
> > thread-local storage is no worse than allowing it to write to its general
> > purpose registers, pc, etc.
> >
> > I'm assuming the crash you saw was just a userspace crash, rather than
> > the kernel?
> >
>
> True, but the system became horribly instable, files were overwritten by
> others, very strange. It was in a remote KVM VM on bare metal aarch64...
> I don't dare to try it again because it causes others some trouble, but if
> someone wants to try it out: https://github.com/AndreRH/tpidrurw-test
Seems fine to me running both as 32-bit and 64-bit binary under an arm64
4.1-rc2 kernel.
The former just has test failures (because we don't context switch the
TLS):
[...]
ERROR: TPIDRURW is 00000000, expected cafebabe
[...]
whilst the latter SEGVs:
tpidrurw-test[1691]: unhandled level 1 translation fault (11) at
0xdeadbac2, esr 0x92000005
pgd = ffffffc079079000
[deadbac2] *pgd=0000000000000000, *pud=0000000000000000
[...]
Will
On Sun, May 03, 2015 at 05:24:18PM +0100, Andr? Hentschel wrote:
> From: Andr? Hentschel <[email protected]>
>
> Since commit a4780adeefd042482f624f5e0d577bf9cdcbb760 the user writeable TLS
> register on ARM is preserved per thread.
>
> This patch does it analogous to the ARM patch, but for compat mode on ARM64.
>
> Signed-off-by: Andr? Hentschel <[email protected]>
> Cc: Will Deacon <[email protected]>
> Cc: Jonathan Austin <[email protected]>
>
> ---
> This patch is against Linux 4.1-rc1 (b787f68c36d49bb1d9236f403813641efa74a031)
>
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 20e9591..cd7b8c9 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -78,7 +78,7 @@ struct cpu_context {
>
> struct thread_struct {
> struct cpu_context cpu_context; /* cpu context */
> - unsigned long tp_value;
> + unsigned long tp_value[2]; /* TLS registers */
I think I'd rather have a separate field for the user-writable TLS,
predicated on #ifdef COMPAT. It also removes confusion between the two tls
registers, which are also present on arm64.
> struct fpsimd_state fpsimd_state;
> unsigned long fault_address; /* fault info */
> unsigned long fault_code; /* ESR_EL1 value */
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index c6b1f3b..fc7cc6bc 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -218,7 +218,8 @@ static void tls_thread_flush(void)
> asm ("msr tpidr_el0, xzr");
>
> if (is_compat_task()) {
> - current->thread.tp_value = 0;
> + current->thread.tp_value[0] = 0;
> + current->thread.tp_value[1] = 0;
>
> /*
> * We need to ensure ordering between the shadow state and the
> @@ -254,7 +255,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
> unsigned long stk_sz, struct task_struct *p)
> {
> struct pt_regs *childregs = task_pt_regs(p);
> - unsigned long tls = p->thread.tp_value;
> + unsigned long tls = p->thread.tp_value[0];
>
> memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
>
> @@ -283,6 +284,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
> */
> if (clone_flags & CLONE_SETTLS)
> tls = childregs->regs[3];
> + if (is_compat_thread(task_thread_info(p))) {
> + unsigned long tpuser;
> + asm("mrs %0, tpidr_el0" : "=r" (tpuser));
> + p->thread.tp_value[1] = tpuser;
> + }
Can't this hunk be in the is_compat_thread(...) block slightly earlier
on in the function? In fact, we're reading tpidr_el0 for both the compat and
native cases, so really this all be unconditional.
> } else {
> memset(childregs, 0, sizeof(struct pt_regs));
> childregs->pstate = PSR_MODE_EL1h;
> @@ -291,7 +297,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
> }
> p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
> p->thread.cpu_context.sp = (unsigned long)childregs;
> - p->thread.tp_value = tls;
> + p->thread.tp_value[0] = tls;
>
> ptrace_hw_copy_thread(p);
>
> @@ -302,16 +308,17 @@ static void tls_thread_switch(struct task_struct *next)
> {
> unsigned long tpidr, tpidrro;
>
> - if (!is_compat_task()) {
> - asm("mrs %0, tpidr_el0" : "=r" (tpidr));
> - current->thread.tp_value = tpidr;
> - }
> + asm("mrs %0, tpidr_el0" : "=r" (tpidr));
> + if (is_compat_task())
> + current->thread.tp_value[1] = tpidr;
> + else
> + current->thread.tp_value[0] = tpidr;
Maybe we could have a macro thread_user_tls(thread) so that we could just
do thread_user_tls(current->thread) = tpidr;
Will