On kernel version 3.8.13, when we try to execute a statically compiled
binary from kernel, it is giving segfault:
insmod /mnt/module2.ko
[ 35.560000] sample.static: unhandled page fault (11) at 0x00000000,
code 0x80000007
[ 36.440000] Pid: 257, comm: sample.static
[ 36.444000] CPU: 3 Tainted: G W O (3.8.13+ #35)
[ 36.448000] PC is at 0x0
[ 36.452000] LR is at 0xa420
[ 36.456000] pc : [<00000000>] lr : [<0000a420>] psr: 60000030
[ 36.456000] sp : beb30d70 ip : 00000004 fp : 00000000
[ 36.464000] r10: 0000a004 r9 : 0000a0a4 r8 : 00000001
[ 36.472000] r7 : 00000001 r6 : 0008be5c r5 : 00000000 r4 : 0008ce80
[ 36.476000] r3 : 00000001 r2 : 00000001 r1 : 00000000 r0 : 00000000
[ 36.484000] Flags: nZCv IRQs on FIQs on Mode USER_32 ISA Thumb
Segment user
[ 36.492000] Control: 10c53c7d Table: 7b1f806a DAC: 00000015
[ 36.496000] Backtrace:
[ 36.500000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
[<c03e8a54>] (dump_stack+0x20/0x24)
[ 36.508000] r6:e514dd80 r5:00000000 r4:e5175fb0 r3:271ae511
[ 36.516000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c0014174>]
(show_regs+0x58/0x5c)
[ 36.524000] [<c001411c>] (show_regs+0x0/0x5c) from [<c001794c>]
(show_info+0xe0/0x14c)
[ 36.532000] r4:00000000 r3:00000000
[ 36.532000] [<c001786c>] (show_info+0x0/0x14c) from [<c001dfb4>]
(__do_user_fault+0x78/0xc8)
[ 36.540000] r7:80000007 r6:0000000b r5:00000000 r4:e514dd80
[ 36.548000] [<c001df3c>] (__do_user_fault+0x0/0xc8) from
[<c03f4574>] (do_page_fault+0x360/0x3d4)
[ 36.556000] [<c03f4214>] (do_page_fault+0x0/0x3d4) from
[<c0008470>] (do_PrefetchAbort+0x44/0xa8)
[ 36.564000] [<c000842c>] (do_PrefetchAbort+0x0/0xa8) from
[<c03f2bbc>] (ret_from_exception+0x0/0x10)
[ 36.572000] Exception stack(0xe5175fb0 to 0xe5175ff8)
[ 36.960000] do_init_module: 'module2'->init suspiciously returned
11, it should follow 0/-E convention
[ 36.960000] do_init_module: loading module anyway...
[ 36.964000] Backtrace:
[ 36.968000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
[<c03e8a54>] (dump_stack+0x20/0x24)
[ 36.972000] r6:bf000090 r5:bf00009c r4:e51d7f48 r3:00000000
[ 36.980000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c008ae88>]
(load_module+0x1a90/0x1eb4)
[ 36.984000] [<c00893f8>] (load_module+0x0/0x1eb4) from [<c008b3b0>]
(sys_init_module+0xe8/0xf8)
[ 36.988000] [<c008b2c8>] (sys_init_module+0x0/0xf8) from
[<c0012dc0>] (ret_fast_syscall+0x0/0x48)
[ 36.992000] r6:bea51a14 r5:bea51b7c r4:00006ee4
[ 36.996000] module2 mod ld
module2.c =>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kmod.h>
MODULE_LICENSE( "GPL" );
static int test( void )
{
char *argv[] = { "/mnt/sample.static", NULL };
static char *envp[] = {
"HOME=/",
"TERM=linux",
"PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
return call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC );
}
static int __init mod_entry_func( void )
{
return test();
}
static void __exit mod_exit_func( void )
{
return;
}
module_init( mod_entry_func );
module_exit( mod_exit_func );
sample.static is a simple "Hello_world" program.
However, there is no problem in executing dynamically compiled binaries.
When we revert commit 9fff2fa0db911b0b75ec1f9bec72460c0a676ef5 (arm:
switch to saner kernel_execve() semantics), there is no problem.
OR
for kernel 3.8.13 (which is just a plain revert when arch specific
kernel_execve was present), then also no problem =>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0023a87..9cf6e15 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -184,6 +184,10 @@ static int ____call_usermodehelper(void *data)
struct subprocess_info *sub_info = data;
struct cred *new;
int retval;
+#ifdef ARM
+ struct pt_regs regs;
+ struct pt_regs *curr_ptr;
+#endif
spin_lock_irq(¤t->sighand->siglock);
flush_signal_handlers(current, 1);
@@ -222,6 +226,36 @@ static int ____call_usermodehelper(void *data)
retval = do_execve(sub_info->path,
(const char __user *const __user *)sub_info->argv,
(const char __user *const __user *)sub_info->envp);
+
+#ifdef ARM
+ if (retval)
+ goto fail;
+ curr_ptr = current_pt_regs();
+ memcpy(®s, curr_ptr, sizeof(struct pt_regs));
+ /*
+ * Save argc to the register structure for userspace.
+ */
+ regs.ARM_r0 = retval;
+
+ /*
+ * We were successful. We won't be returning to our caller, but
+ * instead to user space by manipulating the kernel stack.
+ */
+ asm( "add r0, %0, %1\n\t"
+ "mov r1, %2\n\t"
+ "mov r2, %3\n\t"
+ "bl memmove\n\t" /* copy regs to top of stack */
+ "mov r8, #0\n\t" /* not a syscall */
+ "mov r9, %0\n\t" /* thread structure */
+ "mov sp, r0\n\t" /* reposition stack pointer */
+ "b ret_to_user"
+ :
+ : "r" (current_thread_info()),
+ "Ir" (THREAD_START_SP - sizeof(regs)),
+ "r" (®s),
+ "Ir" (sizeof(regs))
+ : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory");
+#endif
if (!retval)
return 0;
--
Please suggest a proper solution.
Regards,
Ashish
Forget to mention, we are using ARM
On Mon, Jul 8, 2013 at 10:45 AM, Ashish Sangwan
<[email protected]> wrote:
> On kernel version 3.8.13, when we try to execute a statically compiled
> binary from kernel, it is giving segfault:
>
> insmod /mnt/module2.ko
> [ 35.560000] sample.static: unhandled page fault (11) at 0x00000000,
> code 0x80000007
> [ 36.440000] Pid: 257, comm: sample.static
> [ 36.444000] CPU: 3 Tainted: G W O (3.8.13+ #35)
> [ 36.448000] PC is at 0x0
> [ 36.452000] LR is at 0xa420
> [ 36.456000] pc : [<00000000>] lr : [<0000a420>] psr: 60000030
> [ 36.456000] sp : beb30d70 ip : 00000004 fp : 00000000
> [ 36.464000] r10: 0000a004 r9 : 0000a0a4 r8 : 00000001
> [ 36.472000] r7 : 00000001 r6 : 0008be5c r5 : 00000000 r4 : 0008ce80
> [ 36.476000] r3 : 00000001 r2 : 00000001 r1 : 00000000 r0 : 00000000
> [ 36.484000] Flags: nZCv IRQs on FIQs on Mode USER_32 ISA Thumb
> Segment user
> [ 36.492000] Control: 10c53c7d Table: 7b1f806a DAC: 00000015
> [ 36.496000] Backtrace:
> [ 36.500000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
> [<c03e8a54>] (dump_stack+0x20/0x24)
> [ 36.508000] r6:e514dd80 r5:00000000 r4:e5175fb0 r3:271ae511
> [ 36.516000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c0014174>]
> (show_regs+0x58/0x5c)
> [ 36.524000] [<c001411c>] (show_regs+0x0/0x5c) from [<c001794c>]
> (show_info+0xe0/0x14c)
> [ 36.532000] r4:00000000 r3:00000000
> [ 36.532000] [<c001786c>] (show_info+0x0/0x14c) from [<c001dfb4>]
> (__do_user_fault+0x78/0xc8)
> [ 36.540000] r7:80000007 r6:0000000b r5:00000000 r4:e514dd80
> [ 36.548000] [<c001df3c>] (__do_user_fault+0x0/0xc8) from
> [<c03f4574>] (do_page_fault+0x360/0x3d4)
> [ 36.556000] [<c03f4214>] (do_page_fault+0x0/0x3d4) from
> [<c0008470>] (do_PrefetchAbort+0x44/0xa8)
> [ 36.564000] [<c000842c>] (do_PrefetchAbort+0x0/0xa8) from
> [<c03f2bbc>] (ret_from_exception+0x0/0x10)
> [ 36.572000] Exception stack(0xe5175fb0 to 0xe5175ff8)
> [ 36.960000] do_init_module: 'module2'->init suspiciously returned
> 11, it should follow 0/-E convention
> [ 36.960000] do_init_module: loading module anyway...
> [ 36.964000] Backtrace:
> [ 36.968000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
> [<c03e8a54>] (dump_stack+0x20/0x24)
> [ 36.972000] r6:bf000090 r5:bf00009c r4:e51d7f48 r3:00000000
> [ 36.980000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c008ae88>]
> (load_module+0x1a90/0x1eb4)
> [ 36.984000] [<c00893f8>] (load_module+0x0/0x1eb4) from [<c008b3b0>]
> (sys_init_module+0xe8/0xf8)
> [ 36.988000] [<c008b2c8>] (sys_init_module+0x0/0xf8) from
> [<c0012dc0>] (ret_fast_syscall+0x0/0x48)
> [ 36.992000] r6:bea51a14 r5:bea51b7c r4:00006ee4
> [ 36.996000] module2 mod ld
>
> module2.c =>
> #include <linux/module.h>
> #include <linux/init.h>
> #include <linux/kmod.h>
> MODULE_LICENSE( "GPL" );
> static int test( void )
> {
> char *argv[] = { "/mnt/sample.static", NULL };
> static char *envp[] = {
> "HOME=/",
> "TERM=linux",
> "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
> return call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC );
> }
> static int __init mod_entry_func( void )
> {
> return test();
> }
>
> static void __exit mod_exit_func( void )
> {
> return;
> }
> module_init( mod_entry_func );
> module_exit( mod_exit_func );
>
>
> sample.static is a simple "Hello_world" program.
>
> However, there is no problem in executing dynamically compiled binaries.
>
> When we revert commit 9fff2fa0db911b0b75ec1f9bec72460c0a676ef5 (arm:
> switch to saner kernel_execve() semantics), there is no problem.
>
> OR
>
> for kernel 3.8.13 (which is just a plain revert when arch specific
> kernel_execve was present), then also no problem =>
>
> diff --git a/kernel/kmod.c b/kernel/kmod.c
> index 0023a87..9cf6e15 100644
> --- a/kernel/kmod.c
> +++ b/kernel/kmod.c
> @@ -184,6 +184,10 @@ static int ____call_usermodehelper(void *data)
> struct subprocess_info *sub_info = data;
> struct cred *new;
> int retval;
> +#ifdef ARM
> + struct pt_regs regs;
> + struct pt_regs *curr_ptr;
> +#endif
> spin_lock_irq(¤t->sighand->siglock);
> flush_signal_handlers(current, 1);
> @@ -222,6 +226,36 @@ static int ____call_usermodehelper(void *data)
> retval = do_execve(sub_info->path,
> (const char __user *const __user *)sub_info->argv,
> (const char __user *const __user *)sub_info->envp);
> +
> +#ifdef ARM
> + if (retval)
> + goto fail;
> + curr_ptr = current_pt_regs();
> + memcpy(®s, curr_ptr, sizeof(struct pt_regs));
> + /*
> + * Save argc to the register structure for userspace.
> + */
> + regs.ARM_r0 = retval;
> +
> + /*
> + * We were successful. We won't be returning to our caller, but
> + * instead to user space by manipulating the kernel stack.
> + */
> + asm( "add r0, %0, %1\n\t"
> + "mov r1, %2\n\t"
> + "mov r2, %3\n\t"
> + "bl memmove\n\t" /* copy regs to top of stack */
> + "mov r8, #0\n\t" /* not a syscall */
> + "mov r9, %0\n\t" /* thread structure */
> + "mov sp, r0\n\t" /* reposition stack pointer */
> + "b ret_to_user"
> + :
> + : "r" (current_thread_info()),
> + "Ir" (THREAD_START_SP - sizeof(regs)),
> + "r" (®s),
> + "Ir" (sizeof(regs))
> + : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory");
> +#endif
> if (!retval)
> return 0;
> --
>
> Please suggest a proper solution.
>
> Regards,
> Ashish
Any heads up on this?
or could someone just advice what can we do to debug this?
The ret_from_fork currently looks like following:
/*
* This is how we return from a fork.
*/
ENTRY(ret_from_fork)
bl schedule_tail
cmp r5, #0
movne r0, r4
adrne lr, BSYM(1f)
movne pc, r5
1: get_thread_info tsk
b ret_slow_syscall
ENDPROC(ret_from_fork)
Is this a real issue? Because we are getting this just for static binaries.
On Mon, Jul 8, 2013 at 10:46 AM, Ashish Sangwan
<[email protected]> wrote:
> Forget to mention, we are using ARM
>
> On Mon, Jul 8, 2013 at 10:45 AM, Ashish Sangwan
> <[email protected]> wrote:
>> On kernel version 3.8.13, when we try to execute a statically compiled
>> binary from kernel, it is giving segfault:
>>
>> insmod /mnt/module2.ko
>> [ 35.560000] sample.static: unhandled page fault (11) at 0x00000000,
>> code 0x80000007
>> [ 36.440000] Pid: 257, comm: sample.static
>> [ 36.444000] CPU: 3 Tainted: G W O (3.8.13+ #35)
>> [ 36.448000] PC is at 0x0
>> [ 36.452000] LR is at 0xa420
>> [ 36.456000] pc : [<00000000>] lr : [<0000a420>] psr: 60000030
>> [ 36.456000] sp : beb30d70 ip : 00000004 fp : 00000000
>> [ 36.464000] r10: 0000a004 r9 : 0000a0a4 r8 : 00000001
>> [ 36.472000] r7 : 00000001 r6 : 0008be5c r5 : 00000000 r4 : 0008ce80
>> [ 36.476000] r3 : 00000001 r2 : 00000001 r1 : 00000000 r0 : 00000000
>> [ 36.484000] Flags: nZCv IRQs on FIQs on Mode USER_32 ISA Thumb
>> Segment user
>> [ 36.492000] Control: 10c53c7d Table: 7b1f806a DAC: 00000015
>> [ 36.496000] Backtrace:
>> [ 36.500000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
>> [<c03e8a54>] (dump_stack+0x20/0x24)
>> [ 36.508000] r6:e514dd80 r5:00000000 r4:e5175fb0 r3:271ae511
>> [ 36.516000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c0014174>]
>> (show_regs+0x58/0x5c)
>> [ 36.524000] [<c001411c>] (show_regs+0x0/0x5c) from [<c001794c>]
>> (show_info+0xe0/0x14c)
>> [ 36.532000] r4:00000000 r3:00000000
>> [ 36.532000] [<c001786c>] (show_info+0x0/0x14c) from [<c001dfb4>]
>> (__do_user_fault+0x78/0xc8)
>> [ 36.540000] r7:80000007 r6:0000000b r5:00000000 r4:e514dd80
>> [ 36.548000] [<c001df3c>] (__do_user_fault+0x0/0xc8) from
>> [<c03f4574>] (do_page_fault+0x360/0x3d4)
>> [ 36.556000] [<c03f4214>] (do_page_fault+0x0/0x3d4) from
>> [<c0008470>] (do_PrefetchAbort+0x44/0xa8)
>> [ 36.564000] [<c000842c>] (do_PrefetchAbort+0x0/0xa8) from
>> [<c03f2bbc>] (ret_from_exception+0x0/0x10)
>> [ 36.572000] Exception stack(0xe5175fb0 to 0xe5175ff8)
>> [ 36.960000] do_init_module: 'module2'->init suspiciously returned
>> 11, it should follow 0/-E convention
>> [ 36.960000] do_init_module: loading module anyway...
>> [ 36.964000] Backtrace:
>> [ 36.968000] [<c00171f8>] (dump_backtrace+0x0/0x11c) from
>> [<c03e8a54>] (dump_stack+0x20/0x24)
>> [ 36.972000] r6:bf000090 r5:bf00009c r4:e51d7f48 r3:00000000
>> [ 36.980000] [<c03e8a34>] (dump_stack+0x0/0x24) from [<c008ae88>]
>> (load_module+0x1a90/0x1eb4)
>> [ 36.984000] [<c00893f8>] (load_module+0x0/0x1eb4) from [<c008b3b0>]
>> (sys_init_module+0xe8/0xf8)
>> [ 36.988000] [<c008b2c8>] (sys_init_module+0x0/0xf8) from
>> [<c0012dc0>] (ret_fast_syscall+0x0/0x48)
>> [ 36.992000] r6:bea51a14 r5:bea51b7c r4:00006ee4
>> [ 36.996000] module2 mod ld
>>
>> module2.c =>
>> #include <linux/module.h>
>> #include <linux/init.h>
>> #include <linux/kmod.h>
>> MODULE_LICENSE( "GPL" );
>> static int test( void )
>> {
>> char *argv[] = { "/mnt/sample.static", NULL };
>> static char *envp[] = {
>> "HOME=/",
>> "TERM=linux",
>> "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL };
>> return call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC );
>> }
>> static int __init mod_entry_func( void )
>> {
>> return test();
>> }
>>
>> static void __exit mod_exit_func( void )
>> {
>> return;
>> }
>> module_init( mod_entry_func );
>> module_exit( mod_exit_func );
>>
>>
>> sample.static is a simple "Hello_world" program.
>>
>> However, there is no problem in executing dynamically compiled binaries.
>>
>> When we revert commit 9fff2fa0db911b0b75ec1f9bec72460c0a676ef5 (arm:
>> switch to saner kernel_execve() semantics), there is no problem.
>>
>> OR
>>
>> for kernel 3.8.13 (which is just a plain revert when arch specific
>> kernel_execve was present), then also no problem =>
>>
>> diff --git a/kernel/kmod.c b/kernel/kmod.c
>> index 0023a87..9cf6e15 100644
>> --- a/kernel/kmod.c
>> +++ b/kernel/kmod.c
>> @@ -184,6 +184,10 @@ static int ____call_usermodehelper(void *data)
>> struct subprocess_info *sub_info = data;
>> struct cred *new;
>> int retval;
>> +#ifdef ARM
>> + struct pt_regs regs;
>> + struct pt_regs *curr_ptr;
>> +#endif
>> spin_lock_irq(¤t->sighand->siglock);
>> flush_signal_handlers(current, 1);
>> @@ -222,6 +226,36 @@ static int ____call_usermodehelper(void *data)
>> retval = do_execve(sub_info->path,
>> (const char __user *const __user *)sub_info->argv,
>> (const char __user *const __user *)sub_info->envp);
>> +
>> +#ifdef ARM
>> + if (retval)
>> + goto fail;
>> + curr_ptr = current_pt_regs();
>> + memcpy(®s, curr_ptr, sizeof(struct pt_regs));
>> + /*
>> + * Save argc to the register structure for userspace.
>> + */
>> + regs.ARM_r0 = retval;
>> +
>> + /*
>> + * We were successful. We won't be returning to our caller, but
>> + * instead to user space by manipulating the kernel stack.
>> + */
>> + asm( "add r0, %0, %1\n\t"
>> + "mov r1, %2\n\t"
>> + "mov r2, %3\n\t"
>> + "bl memmove\n\t" /* copy regs to top of stack */
>> + "mov r8, #0\n\t" /* not a syscall */
>> + "mov r9, %0\n\t" /* thread structure */
>> + "mov sp, r0\n\t" /* reposition stack pointer */
>> + "b ret_to_user"
>> + :
>> + : "r" (current_thread_info()),
>> + "Ir" (THREAD_START_SP - sizeof(regs)),
>> + "r" (®s),
>> + "Ir" (sizeof(regs))
>> + : "r0", "r1", "r2", "r3", "r8", "r9", "ip", "lr", "memory");
>> +#endif
>> if (!retval)
>> return 0;
>> --
>>
>> Please suggest a proper solution.
>>
>> Regards,
>> Ashish
On Wed, Jul 10, 2013 at 11:42:25AM +0100, Ashish Sangwan wrote:
> Any heads up on this?
>
> or could someone just advice what can we do to debug this?
>
> The ret_from_fork currently looks like following:
> /*
> * This is how we return from a fork.
> */
> ENTRY(ret_from_fork)
> bl schedule_tail
> cmp r5, #0
> movne r0, r4
> adrne lr, BSYM(1f)
> movne pc, r5
> 1: get_thread_info tsk
> b ret_slow_syscall
> ENDPROC(ret_from_fork)
>
> Is this a real issue? Because we are getting this just for static binaries.
Ok, I've finally got to the bottom of this, but I'm not sure on the best way
to fix it. The issue is that libc expects r0 to contain a function pointer
to be invoked at exit (rtld_fini), to clean up after a dynamic linker. If
this pointer is NULL, then it is ignored. We actually zero this pointer in
our ELF_PLAT_INIT macro.
At the same time, we have this strange code called next from the ARM ELF
loader:
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
which puts argc into r0. Usually this gets overwritten by the return value
of execve (0), so everything hangs together. With kernel threads this is
different since we do the exec from ____call_usermodehelper on the stack and
then return to the new application via ret_from_fork, which takes the
slowpath; popping r0 from pt_regs and making argc visible to the library.
When the application exits and libc starts running its exit functions, we
jump to hyperspace.
My inclination would be to remove the stack popping above (patch below),
but it's a user-visible change and I'm not sure if something like OABI
requires it.
Will
--->8
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d50..413f387 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -54,7 +54,6 @@ struct thread_struct {
#define start_thread(regs,pc,sp) \
({ \
- unsigned long *stack = (unsigned long *)sp; \
memset(regs->uregs, 0, sizeof(regs->uregs)); \
if (current->personality & ADDR_LIMIT_32BIT) \
regs->ARM_cpsr = USR_MODE; \
@@ -65,9 +64,6 @@ struct thread_struct {
regs->ARM_cpsr |= PSR_ENDSTATE; \
regs->ARM_pc = pc & ~1; /* pc */ \
regs->ARM_sp = sp; /* sp */ \
- regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
- regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
- regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
nommu_start_thread(regs); \
})
On Wed, Jul 10, 2013 at 05:34:11PM +0100, Will Deacon wrote:
> On Wed, Jul 10, 2013 at 11:42:25AM +0100, Ashish Sangwan wrote:
> > Any heads up on this?
> >
> > or could someone just advice what can we do to debug this?
> >
> > The ret_from_fork currently looks like following:
> > /*
> > * This is how we return from a fork.
> > */
> > ENTRY(ret_from_fork)
> > bl schedule_tail
> > cmp r5, #0
> > movne r0, r4
> > adrne lr, BSYM(1f)
> > movne pc, r5
> > 1: get_thread_info tsk
> > b ret_slow_syscall
> > ENDPROC(ret_from_fork)
> >
> > Is this a real issue? Because we are getting this just for static binaries.
>
> Ok, I've finally got to the bottom of this, but I'm not sure on the best way
> to fix it. The issue is that libc expects r0 to contain a function pointer
> to be invoked at exit (rtld_fini), to clean up after a dynamic linker. If
> this pointer is NULL, then it is ignored. We actually zero this pointer in
> our ELF_PLAT_INIT macro.
>
> At the same time, we have this strange code called next from the ARM ELF
> loader:
>
> regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
> regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
> regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
>
> which puts argc into r0. Usually this gets overwritten by the return value
> of execve (0), so everything hangs together. With kernel threads this is
> different since we do the exec from ____call_usermodehelper on the stack and
> then return to the new application via ret_from_fork, which takes the
> slowpath; popping r0 from pt_regs and making argc visible to the library.
>
> When the application exits and libc starts running its exit functions, we
> jump to hyperspace.
>
> My inclination would be to remove the stack popping above (patch below),
> but it's a user-visible change and I'm not sure if something like OABI
> requires it.
It looks like populating r0-r2 is already broken -- libc must be getting
at least argc from the stack and not r0, otherwise it couldn't be
(ab)using r0 for some other purpose before _start.
Do I conclude correctly that the real problem here is a bug in the libc
startup code, which makes incorrect assumptions about the initial r0 in
the statically linked case?
At the ELF entry point, initial r0 is zero, but apparently only by
accident, since there is a clear intent in the kernel for r0=argc, even
if userspace can't have been using this any time recently since it is
normally clobbered with zero. It seems incorrect for userspace to rely on
either -- but I guess there is no choice but to retain that behaviour now,
since it may break existing binaries which contain that libc bug.
Cheers
---Dave
On Wed, Jul 10, 2013 at 05:34:11PM +0100, Will Deacon wrote:
> Ok, I've finally got to the bottom of this, but I'm not sure on the best way
> to fix it.
I don't think you have! You need to look back at the older ARM kernels
to really get to the bottom of this...
> The issue is that libc expects r0 to contain a function pointer
> to be invoked at exit (rtld_fini), to clean up after a dynamic linker. If
> this pointer is NULL, then it is ignored. We actually zero this pointer in
> our ELF_PLAT_INIT macro.
>
> At the same time, we have this strange code called next from the ARM ELF
> loader:
>
> regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
> regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
> regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
>
> which puts argc into r0.
You're sort of right. It dates from the days when we had a.out binaries,
those required argc, argv and envp in r0/r1/r2 - and ARM kernels carried
this hack in binfmt_aout.c to make it work in conjunction with the above:
static int load_aout_binary(struct linux_binprm * bprm)
{
...
start_thread(regs, ex.a_entry, current->mm->start_stack);
#ifndef __arm__
return 0;
#else
return regs->ARM_r0;
#endif
}
ELF, on the other hand, never had that hack - ELF has always been zero
in r0, and it's always retrieved the argc/argv/envp off the stack.
As the above hack got dropped from the kernel (I don't think it ever made
it into mainline), I think we should be safe getting rid of this
initialization of regs->ARM_r0 to r2, leaving them all as zeros.
We should probably also remove the selection of HAVE_AOUT from
arch/arm/Kconfig too as this definitely won't work with any recent
kernel (certainly not without binfmt_aout.c hacked in the above way.)
On Wed, Jul 10, 2013 at 07:52:00PM +0100, Russell King - ARM Linux wrote:
> On Wed, Jul 10, 2013 at 05:34:11PM +0100, Will Deacon wrote:
> > Ok, I've finally got to the bottom of this, but I'm not sure on the best way
> > to fix it.
>
> I don't think you have! You need to look back at the older ARM kernels
> to really get to the bottom of this...
It's hard enough looking at the current kernel with the internet connection
I'm currently on! Anyway, what I meant was that I can see why the
application is falling over.
> > The issue is that libc expects r0 to contain a function pointer
> > to be invoked at exit (rtld_fini), to clean up after a dynamic linker. If
> > this pointer is NULL, then it is ignored. We actually zero this pointer in
> > our ELF_PLAT_INIT macro.
> >
> > At the same time, we have this strange code called next from the ARM ELF
> > loader:
> >
> > regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
> > regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
> > regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
> >
> > which puts argc into r0.
>
> You're sort of right. It dates from the days when we had a.out binaries,
> those required argc, argv and envp in r0/r1/r2 - and ARM kernels carried
> this hack in binfmt_aout.c to make it work in conjunction with the above:
>
> static int load_aout_binary(struct linux_binprm * bprm)
> {
> ...
> start_thread(regs, ex.a_entry, current->mm->start_stack);
> #ifndef __arm__
> return 0;
> #else
> return regs->ARM_r0;
> #endif
> }
>
> ELF, on the other hand, never had that hack - ELF has always been zero
> in r0, and it's always retrieved the argc/argv/envp off the stack.
Aha, I was missing the a.out part, thanks. The ELF loader does call:
ELF_PLAT_INIT(regs, reloc_func_desc);
start_thread(regs, elf_entry, bprm->p);
back-to-back, which zeroes ARM_r0 and then promptly sticks argc there
straight away. libc uses the stack for everything (ARM_r0 gets clobbered
again with the return value of execve anyway), so we just have to assume
that nobody is using argv and/or envp from r1 and r2. I think that's a
sensible assumption.
> As the above hack got dropped from the kernel (I don't think it ever made
> it into mainline), I think we should be safe getting rid of this
> initialization of regs->ARM_r0 to r2, leaving them all as zeros.
>
> We should probably also remove the selection of HAVE_AOUT from
> arch/arm/Kconfig too as this definitely won't work with any recent
> kernel (certainly not without binfmt_aout.c hacked in the above way.)
Sounds like a good idea, I'll write a patch...
Will