Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756046Ab2HAVqr (ORCPT ); Wed, 1 Aug 2012 17:46:47 -0400 Received: from mail-wg0-f44.google.com ([74.125.82.44]:50764 "EHLO mail-wg0-f44.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751610Ab2HAVqp (ORCPT ); Wed, 1 Aug 2012 17:46:45 -0400 From: Meredydd Luff To: linux-kernel@vger.kernel.org Cc: Kees Cook , "H. Peter Anvin" , Ingo Molnar , Jeff Dike , Richard Weinberger , Andrew Morton , Meredydd Luff Subject: [PATCH] [RFC] syscalls,x86: Add execveat() system call Date: Wed, 1 Aug 2012 22:44:36 +0100 Message-Id: <1343857476-3073-1-git-send-email-meredydd@senatehouse.org> X-Mailer: git-send-email 1.7.4.1 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 20237 Lines: 628 HPA is already on record calling for an execveat() which also does fexecve()'s job: https://lkml.org/lkml/2006/7/11/556. And the current glibc hack for fexecve() is already causing problems in the wild. Eg: https://bugzilla.redhat.com/show_bug.cgi?id=241609, https://lkml.org/lkml/2006/12/27/123, and as recounted at http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043. So here's an attempt at just that: ---------------------------------------------------------------------- This patch adds a new system call, execveat(2). execveat() is to execve() as openat() is to open(): it takes file descriptor that refers to a directory, and resolves the filename relative to that. In addition, if the filename is NULL, execveat() executes the file to which the file descriptor refers. This replicates the functionality of fexecve(), which is a system call in other UNIXen, but in Linux glibc v2.16 it's a gross hack that depends on /proc being mounted. That hack does not work in chrooted sandboxes, or stripped-down systems without /proc mounted. execveat() does. Only x86-64 and i386 ABIs are supported in this patch. User-Mode Linux is also supported. Signed-off-by: Meredydd Luff --- CREDITS | 5 +++ arch/alpha/kernel/binfmt_loader.c | 2 +- arch/um/kernel/exec.c | 36 ++++++++++++++++---- arch/x86/ia32/ia32entry.S | 1 + arch/x86/ia32/sys_ia32.c | 20 +++++++++++ arch/x86/include/asm/sys_ia32.h | 3 ++ arch/x86/kernel/entry_32.S | 17 ++++++++++ arch/x86/kernel/entry_64.S | 15 ++++++++ arch/x86/kernel/process.c | 27 +++++++++++++++ arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + arch/x86/um/sys_call_table_32.c | 1 + arch/x86/um/sys_call_table_64.c | 1 + fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/binfmt_em86.c | 2 +- fs/binfmt_flat.c | 2 +- fs/binfmt_misc.c | 2 +- fs/binfmt_script.c | 2 +- fs/exec.c | 65 ++++++++++++++++++++++++++++++------ include/asm-generic/syscalls.h | 7 ++++ include/linux/compat.h | 2 + include/linux/fs.h | 2 +- include/linux/sched.h | 3 ++ 24 files changed, 195 insertions(+), 26 deletions(-) diff --git a/CREDITS b/CREDITS index d8fe12a..859b40b 100644 --- a/CREDITS +++ b/CREDITS @@ -2177,6 +2177,11 @@ S: Asterisk Ltd. S: Auckland S: New Zealand +N: Meredydd Luff +E: meredydd@senatehouse.org +W: http://www.senatehouse.org/ +D: execveat() system call + N: Tuomas J. Lukka E: Tuomas.Lukka@Helsinki.FI D: Original dual-monitor patches diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c index d1f474d..7968491 100644 --- a/arch/alpha/kernel/binfmt_loader.c +++ b/arch/alpha/kernel/binfmt_loader.c @@ -24,7 +24,7 @@ static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs) loader = bprm->vma->vm_end - sizeof(void *); - file = open_exec("/sbin/loader"); + file = open_exec(AT_FDCWD, "/sbin/loader"); retval = PTR_ERR(file); if (IS_ERR(file)) return retval; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 6cade93..cd7b2a9 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -44,13 +45,13 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) } EXPORT_SYMBOL(start_thread); -static long execve1(const char *file, - const char __user *const __user *argv, - const char __user *const __user *env) +static long execveat1(int dfd, const char *file, + const char __user *const __user *argv, + const char __user *const __user *env) { long error; - error = do_execve(file, argv, env, ¤t->thread.regs); + error = do_execveat(dfd, file, argv, env, ¤t->thread.regs); if (error == 0) { task_lock(current); current->ptrace &= ~PT_DTRACE; @@ -66,7 +67,7 @@ long um_execve(const char *file, const char __user *const __user *argv, const ch { long err; - err = execve1(file, argv, env); + err = execveat1(AT_FDCWD, file, argv, env); if (!err) UML_LONGJMP(current->thread.exec_buf, 1); return err; @@ -80,9 +81,30 @@ long sys_execve(const char __user *file, const char __user *const __user *argv, filename = getname(file); error = PTR_ERR(filename); - if (IS_ERR(filename)) goto out; - error = execve1(filename, argv, env); + if (IS_ERR(filename)) + goto out; + error = execveat1(AT_FDCWD, filename, argv, env); putname(filename); out: return error; } + +long sys_execveat(int dfd, const char __user *file, + const char __user *const __user *argv, + const char __user *const __user *env) +{ + long error; + char *filename = NULL; + + if (file) { + filename = getname(file); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + } + error = execveat1(dfd, filename, argv, env); + if (filename) + putname(filename); + out: + return error; +} diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20e5f7b..fd71cd7 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -460,6 +460,7 @@ GLOBAL(\label) PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx PTREGSCALL stub32_execve, sys32_execve, %rcx + PTREGSCALL stub32_execveat, sys32_execveat, %r8 PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_clone, sys32_clone, %rdx PTREGSCALL stub32_vfork, sys_vfork, %rdi diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 4540bec..e88398a 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -400,6 +400,26 @@ asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv return error; } +asmlinkage long sys32_execveat(int dfd, const char __user *name, + compat_uptr_t __user *argv, + compat_uptr_t __user *envp, + struct pt_regs *regs) +{ + long error; + char *filename = NULL; + + if (name) { + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + } + error = compat_do_execveat(dfd, filename, argv, envp, regs); + if (filename) + putname(filename); + return error; +} + asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs *regs) { diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3fda9db4..59629cf 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -56,6 +56,9 @@ asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *, compat_uptr_t __user *, struct pt_regs *); +asmlinkage long sys32_execveat(int dfd, const char __user *, + compat_uptr_t __user *, compat_uptr_t __user *, + struct pt_regs *); asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); long sys32_lseek(unsigned int, int, unsigned int); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f288..771f19a 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -724,10 +724,27 @@ ENTRY(ptregs_##name) ; \ CFI_ENDPROC; \ ENDPROC(ptregs_##name) +#define PTREGSCALL4(name) \ +ENTRY(ptregs_##name) ; \ + CFI_STARTPROC; \ + leal 4(%esp),%eax; \ + pushl_cfi %eax; \ + pushl_cfi PT_ESI(%eax); \ + movl PT_EDX(%eax),%ecx; \ + movl PT_ECX(%eax),%edx; \ + movl PT_EBX(%eax),%eax; \ + call sys_##name; \ + addl $8,%esp; \ + CFI_ADJUST_CFA_OFFSET -8; \ + ret; \ + CFI_ENDPROC; \ +ENDPROC(ptregs_##name) + PTREGSCALL1(iopl) PTREGSCALL0(fork) PTREGSCALL0(vfork) PTREGSCALL3(execve) +PTREGSCALL4(execveat) PTREGSCALL2(sigaltstack) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7d65133..597c356 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -766,6 +766,21 @@ ENTRY(stub_execve) CFI_ENDPROC END(stub_execve) +ENTRY(stub_execveat) + CFI_STARTPROC + addq $8, %rsp + PARTIAL_FRAME 0 + SAVE_REST + FIXUP_TOP_OF_STACK %r11 + movq %rsp, %r8 + call sys_execveat + RESTORE_TOP_OF_STACK %r11 + movq %rax,RAX(%rsp) + RESTORE_REST + jmp int_ret_from_sys_call + CFI_ENDPROC +END(stub_execveat) + /* * sigreturn is special because it needs to restore all registers on return. * This cannot be done with SYSRET, so use the IRET return path instead. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 735279e..5fa3c0a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -363,6 +363,33 @@ long sys_execve(const char __user *name, return error; } +long sys_execveat(int dfd, const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) +{ + long error; + char *filename = NULL; + + if (name) { + filename = getname(name); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + return error; + } + error = do_execveat(dfd, filename, argv, envp, regs); + +#ifdef CONFIG_X86_32 + if (error == 0) { + /* Make sure we don't return using sysenter.. */ + set_thread_flag(TIF_IRET); + } +#endif + + if (filename) + putname(filename); + return error; +} + /* * Idle related variables and functions */ diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 7a35a6e..a6a2788 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -356,3 +356,4 @@ 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 349 i386 kcmp sys_kcmp +350 i386 execveat ptregs_execveat stub32_execveat diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 51171ae..c35df9e 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -319,6 +319,7 @@ 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev 312 64 kcmp sys_kcmp +313 64 execveat stub_execveat # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 68d1dc9..05ec219 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -26,6 +26,7 @@ #define ptregs_fork sys_fork #define ptregs_execve sys_execve +#define ptregs_execveat sys_execveat #define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old #define ptregs_clone sys_clone diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c index 170bd92..68dbda7 100644 --- a/arch/x86/um/sys_call_table_64.c +++ b/arch/x86/um/sys_call_table_64.c @@ -31,6 +31,7 @@ #define stub_fork sys_fork #define stub_vfork sys_vfork #define stub_execve sys_execve +#define stub_execveat sys_execveat #define stub_sigaltstack sys_sigaltstack #define stub_rt_sigreturn sys_rt_sigreturn diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 1b52956..f165c1d 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -653,7 +653,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') goto out_free_interp; - interpreter = open_exec(elf_interpreter); + interpreter = open_exec(AT_FDCWD, elf_interpreter); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) goto out_free_interp; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 3d77cf8..6923adf 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -235,7 +235,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, kdebug("Using ELF interpreter %s", interpreter_name); /* replace the program with the interpreter */ - interpreter = open_exec(interpreter_name); + interpreter = open_exec(AT_FDCWD, interpreter_name); retval = PTR_ERR(interpreter); if (IS_ERR(interpreter)) { interpreter = NULL; diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index 2790c7e..7a1f312 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c @@ -80,7 +80,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) * Note that we use open_exec() as the name is now in kernel * space, and we don't need to copy it. */ - file = open_exec(interp); + file = open_exec(AT_FDCWD, interp); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 178cb70..f53b9e2 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -820,7 +820,7 @@ static int load_flat_shared_library(int id, struct lib_info *libs) /* Open the file up */ bprm.filename = buf; - bprm.file = open_exec(bprm.filename); + bprm.file = open_exec(AT_FDCWD, bprm.filename); res = PTR_ERR(bprm.file); if (IS_ERR(bprm.file)) return res; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 790b3cd..330e9ec 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -178,7 +178,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) bprm->interp = iname; /* for binfmt_script */ - interp_file = open_exec (iname); + interp_file = open_exec(AT_FDCWD, iname); retval = PTR_ERR (interp_file); if (IS_ERR (interp_file)) goto _error; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index d3b8c1f..09a056b 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -87,7 +87,7 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) /* * OK, now restart the process with the interpreter's dentry. */ - file = open_exec(interp); + file = open_exec(AT_FDCWD, interp); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/fs/exec.c b/fs/exec.c index da27b91..48a3ba7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -758,7 +758,7 @@ EXPORT_SYMBOL(setup_arg_pages); #endif /* CONFIG_MMU */ -struct file *open_exec(const char *name) +struct file *open_exec(int dfd, const char *name) { struct file *file; int err; @@ -768,9 +768,22 @@ struct file *open_exec(const char *name) .intent = LOOKUP_OPEN }; - file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); - if (IS_ERR(file)) - goto out; + if (name) { + file = do_filp_open(dfd, name, &open_exec_flags, + LOOKUP_FOLLOW); + if (IS_ERR(file)) + goto out; + } else { + file = fget(dfd); + err = -EBADF; + if (!file) + goto exit_nofile; + + err = inode_permission(file->f_path.dentry->d_inode, + open_exec_flags.acc_mode); + if (err) + goto exit; + } err = -EACCES; if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) @@ -779,7 +792,8 @@ struct file *open_exec(const char *name) if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) goto exit; - fsnotify_open(file); + if (name) + fsnotify_open(file); err = deny_write_access(file); if (err) @@ -790,6 +804,7 @@ out: exit: fput(file); +exit_nofile: return ERR_PTR(err); } EXPORT_SYMBOL(open_exec); @@ -1463,7 +1478,8 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -static int do_execve_common(const char *filename, +static int do_execveat_common(int dfd, + const char *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, struct pt_regs *regs) @@ -1510,7 +1526,7 @@ static int do_execve_common(const char *filename, clear_in_exec = retval; current->in_execve = 1; - file = open_exec(filename); + file = open_exec(dfd, filename); retval = PTR_ERR(file); if (IS_ERR(file)) goto out_unmark; @@ -1518,8 +1534,9 @@ static int do_execve_common(const char *filename, sched_exec(); bprm->file = file; - bprm->filename = filename; - bprm->interp = filename; + bprm->filename = filename ?: + (const char *) file->f_path.dentry->d_name.name; + bprm->interp = bprm->filename; retval = bprm_mm_init(bprm); if (retval) @@ -1597,7 +1614,17 @@ int do_execve(const char *filename, { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; - return do_execve_common(filename, argv, envp, regs); + return do_execveat_common(AT_FDCWD, filename, argv, envp, regs); +} + +int do_execveat(int dfd, const char *filename, + const char __user *const __user *__argv, + const char __user *const __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { .ptr.native = __argv }; + struct user_arg_ptr envp = { .ptr.native = __envp }; + return do_execveat_common(dfd, filename, argv, envp, regs); } #ifdef CONFIG_COMPAT @@ -1614,7 +1641,23 @@ int compat_do_execve(char *filename, .is_compat = true, .ptr.compat = __envp, }; - return do_execve_common(filename, argv, envp, regs); + return do_execveat_common(AT_FDCWD, filename, argv, envp, regs); +} + +int compat_do_execveat(int dfd, char *filename, + compat_uptr_t __user *__argv, + compat_uptr_t __user *__envp, + struct pt_regs *regs) +{ + struct user_arg_ptr argv = { + .is_compat = true, + .ptr.compat = __argv, + }; + struct user_arg_ptr envp = { + .is_compat = true, + .ptr.compat = __envp, + }; + return do_execveat_common(dfd, filename, argv, envp, regs); } #endif diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index d89dec8..ded3be3 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -29,6 +29,13 @@ asmlinkage long sys_execve(const char __user *filename, struct pt_regs *regs); #endif +#ifndef sys_execveat +asmlinkage long sys_execveat(int dfd, const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs); +#endif + #ifndef sys_mmap2 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, diff --git a/include/linux/compat.h b/include/linux/compat.h index 4e89039..25ab799 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -291,6 +291,8 @@ asmlinkage ssize_t compat_sys_pwritev(unsigned long fd, int compat_do_execve(char *filename, compat_uptr_t __user *argv, compat_uptr_t __user *envp, struct pt_regs *regs); +int compat_do_execveat(int dfd, char *filename, compat_uptr_t __user *argv, + compat_uptr_t __user *envp, struct pt_regs *regs); asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, diff --git a/include/linux/fs.h b/include/linux/fs.h index 17fd887..6c40dc7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2316,7 +2316,7 @@ extern struct file *create_write_pipe(int flags); extern void free_write_pipe(struct file *); extern int kernel_read(struct file *, loff_t, char *, unsigned long); -extern struct file * open_exec(const char *); +extern struct file *open_exec(int dfd, const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a1f493..2720f83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2310,6 +2310,9 @@ extern int disallow_signal(int); extern int do_execve(const char *, const char __user * const __user *, const char __user * const __user *, struct pt_regs *); +extern int do_execveat(int dfd, const char *, + const char __user * const __user *, + const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); -- 1.7.4.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/