2012-08-03 21:06:29

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 1/2] Moved core dump functionality into its own file

From: Alex Kelly <[email protected]>

This was done in preparation for making core dump functionality optional.

The variable "suid_dumpable" and associated functions are left in fs/exec.c
because they're used elsewhere, such as in ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/Makefile | 2 +-
fs/coredump.c | 689 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/exec.c | 647 +----------------------------------------------
include/linux/sched.h | 1 +
4 files changed, 692 insertions(+), 647 deletions(-)
create mode 100644 fs/coredump.c

diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..8938f82 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o coredump.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 0000000..34c9236
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,689 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+static int zap_process(struct task_struct *start, int exit_code)
+{
+ struct task_struct *t;
+ int nr = 0;
+
+ start->signal->flags = SIGNAL_GROUP_EXIT;
+ start->signal->group_exit_code = exit_code;
+ start->signal->group_stop_count = 0;
+
+ t = start;
+ do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+ if (t != current && t->mm) {
+ sigaddset(&t->pending.signal, SIGKILL);
+ signal_wake_up(t, 1);
+ nr++;
+ }
+ } while_each_thread(start, t);
+
+ return nr;
+}
+
+
+/*
+ * Core dumping helper functions. These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+ return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+struct core_name {
+ char *corename;
+ int used, size;
+};
+
+static int expand_corename(struct core_name *cn)
+{
+ char *old_corename = cn->corename;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+ cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+ if (!cn->corename) {
+ kfree(old_corename);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+ char *cur;
+ int need;
+ int ret;
+ va_list arg;
+
+ va_start(arg, fmt);
+ need = vsnprintf(NULL, 0, fmt, arg);
+ va_end(arg);
+
+ if (likely(need < cn->size - cn->used - 1))
+ goto out_printf;
+
+ ret = expand_corename(cn);
+ if (ret)
+ goto expand_fail;
+
+out_printf:
+ cur = cn->corename + cn->used;
+ va_start(arg, fmt);
+ vsnprintf(cur, need + 1, fmt, arg);
+ va_end(arg);
+ cn->used += need;
+ return 0;
+
+expand_fail:
+ return ret;
+}
+
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+ const struct cred *cred = current_cred();
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
+ int pid_in_pattern = 0;
+ int err = 0;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+ cn->corename = kmalloc(cn->size, GFP_KERNEL);
+ cn->used = 0;
+
+ if (!cn->corename)
+ return -ENOMEM;
+
+ /* Repeat as long as we have more pattern to process and more output
+ space */
+ while (*pat_ptr) {
+ if (*pat_ptr != '%') {
+ if (*pat_ptr == 0)
+ goto out;
+ err = cn_printf(cn, "%c", *pat_ptr++);
+ } else {
+ switch (*++pat_ptr) {
+ /* single % at the end, drop that */
+ case 0:
+ goto out;
+ /* Double percent, output one percent */
+ case '%':
+ err = cn_printf(cn, "%c", '%');
+ break;
+ /* pid */
+ case 'p':
+ pid_in_pattern = 1;
+ err = cn_printf(cn, "%d",
+ task_tgid_vnr(current));
+ break;
+ /* uid */
+ case 'u':
+ err = cn_printf(cn, "%d", cred->uid);
+ break;
+ /* gid */
+ case 'g':
+ err = cn_printf(cn, "%d", cred->gid);
+ break;
+ /* signal that caused the coredump */
+ case 's':
+ err = cn_printf(cn, "%ld", signr);
+ break;
+ /* UNIX time of coredump */
+ case 't': {
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ err = cn_printf(cn, "%lu", tv.tv_sec);
+ break;
+ }
+ /* hostname */
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
+ down_read(&uts_sem);
+ err = cn_printf(cn, "%s",
+ utsname()->nodename);
+ up_read(&uts_sem);
+ cn_escape(namestart);
+ break;
+ }
+ /* executable */
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
+ err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
+ break;
+ /* core limit size */
+ case 'c':
+ err = cn_printf(cn, "%lu",
+ rlimit(RLIMIT_CORE));
+ break;
+ default:
+ break;
+ }
+ ++pat_ptr;
+ }
+
+ if (err)
+ return err;
+ }
+
+ /* Backward compatibility with core_uses_pid:
+ *
+ * If core_pattern does not include a %p (as is the default)
+ * and core_uses_pid is set, then .%pid will be appended to
+ * the filename. Do not do this for piped commands. */
+ if (!ispipe && !pid_in_pattern && core_uses_pid) {
+ err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+ if (err)
+ return err;
+ }
+out:
+ return ispipe;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
+{
+ struct task_struct *g, *p;
+ unsigned long flags;
+ int nr = -EAGAIN;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
+ }
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (unlikely(nr < 0))
+ return nr;
+
+ if (atomic_read(&mm->mm_users) == nr + 1)
+ goto done;
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, &current->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
+ rcu_read_lock();
+ for_each_process(g) {
+ if (g == tsk->group_leader)
+ continue;
+ if (g->flags & PF_KTHREAD)
+ continue;
+ p = g;
+ do {
+ if (p->mm) {
+ if (unlikely(p->mm == mm)) {
+ lock_task_sighand(p, &flags);
+ nr += zap_process(p, exit_code);
+ unlock_task_sighand(p, &flags);
+ }
+ break;
+ }
+ } while_each_thread(g, p);
+ }
+ rcu_read_unlock();
+done:
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ int core_waiters = -EBUSY;
+
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+
+ down_write(&mm->mmap_sem);
+ if (!mm->core_state)
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+ up_write(&mm->mmap_sem);
+
+ if (core_waiters > 0){
+ struct core_thread *ptr;
+
+ wait_for_completion(&core_state->startup);
+ /*
+ * Wait for all the threads to become inactive, so that
+ * all the thread context (extended register state, like
+ * fpu etc) gets copied to the memory.
+ */
+ ptr = core_state->dumper.next;
+ while (ptr != NULL) {
+ wait_task_inactive(ptr->task, 0);
+ ptr = ptr->next;
+ }
+ }
+
+ return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace. Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process. Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1. This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+ struct file *files[2];
+ struct fdtable *fdt;
+ struct coredump_params *cp = (struct coredump_params *)info->data;
+ struct files_struct *cf = current->files;
+ int err = create_pipe_files(files, 0);
+ if (err)
+ return err;
+
+ cp->file = files[1];
+
+ sys_close(0);
+ fd_install(0, files[0]);
+ spin_lock(&cf->file_lock);
+ fdt = files_fdtable(cf);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
+ spin_unlock(&cf->file_lock);
+
+ /* and disallow core files too */
+ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+ return 0;
+}
+
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+ struct core_state core_state;
+ struct core_name cn;
+ struct mm_struct *mm = current->mm;
+ struct linux_binfmt * binfmt;
+ const struct cred *old_cred;
+ struct cred *cred;
+ int retval = 0;
+ int flag = 0;
+ int ispipe;
+ bool need_nonrelative = false;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = rlimit(RLIMIT_CORE),
+ /*
+ * We must use the same mm->flags while dumping core to avoid
+ * inconsistency of bit flags, since this flag is not protected
+ * by any locks.
+ */
+ .mm_flags = mm->flags,
+ };
+
+ audit_core_dumps(signr);
+
+ binfmt = mm->binfmt;
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ if (!__get_dumpable(cprm.mm_flags))
+ goto fail;
+
+ cred = prepare_creds();
+ if (!cred)
+ goto fail;
+ /*
+ * We cannot trust fsuid as being the "true" uid of the process
+ * nor do we know its entire history. We only know it was tainted
+ * so we dump it as root in mode 2, and onlt into a controlled
+ * environment (pipe handler or fully qualified path)
+ */
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+ /* Setuid core dump mode */
+ flag = O_EXCL; /* Stop rewrite attacks */
+ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ need_nonrelative = true;
+ }
+
+ retval = coredump_wait(exit_code, &core_state);
+ if (retval < 0)
+ goto fail_creds;
+
+ old_cred = override_creds(cred);
+
+ /*
+ * Clear any false indication of pending signals that might
+ * be seen by the filesystem code called to write the core file.
+ */
+ clear_thread_flag(TIF_SIGPENDING);
+
+ ispipe = format_corename(&cn, signr);
+
+ if (ispipe) {
+ int dump_count;
+ char **helper_argv;
+
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
+ if (cprm.limit == 1) {
+ /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a speacial value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 1\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
+ helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+ if (!helper_argv) {
+ printk(KERN_WARNING "%s failed to allocate memory\n",
+ __func__);
+ goto fail_dropcount;
+ }
+
+ retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+ NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+ NULL, &cprm);
+ argv_free(helper_argv);
+ if (retval) {
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
+ cn.corename);
+ goto close_fail;
+ }
+ } else {
+ struct inode *inode;
+
+ if (cprm.limit < binfmt->min_coredump)
+ goto fail_unlock;
+
+ if (need_nonrelative && cn.corename[0] != '/') {
+ printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+ "To fully qualified path!\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_unlock;
+ }
+
+ cprm.file = filp_open(cn.corename,
+ O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ 0600);
+ if (IS_ERR(cprm.file))
+ goto fail_unlock;
+
+ inode = cprm.file->f_path.dentry->d_inode;
+ if (inode->i_nlink > 1)
+ goto close_fail;
+ if (d_unhashed(cprm.file->f_path.dentry))
+ goto close_fail;
+ /*
+ * AK: actually i see no reason to not allow this for named
+ * pipes etc, but keep the previous behaviour for now.
+ */
+ if (!S_ISREG(inode->i_mode))
+ goto close_fail;
+ /*
+ * Dont allow local users get cute and trick others to coredump
+ * into their pre-created files.
+ */
+ if (!uid_eq(inode->i_uid, current_fsuid()))
+ goto close_fail;
+ if (!cprm.file->f_op || !cprm.file->f_op->write)
+ goto close_fail;
+ if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ goto close_fail;
+ }
+
+ retval = binfmt->core_dump(&cprm);
+ if (retval)
+ current->signal->group_exit_code |= 0x80;
+
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(cprm.file);
+close_fail:
+ if (cprm.file)
+ filp_close(cprm.file, NULL);
+fail_dropcount:
+ if (ispipe)
+ atomic_dec(&core_dump_count);
+fail_unlock:
+ kfree(cn.corename);
+fail_corename:
+ coredump_finish(mm);
+ revert_creds(old_cred);
+fail_creds:
+ put_cred(cred);
+fail:
+ return;
+}
+
+int dump_seek(struct file *file, loff_t off)
+{
+ int ret = 1;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ return 0;
+ } else {
+ char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return 0;
+ while (off > 0) {
+ unsigned long n = off;
+
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ if (!dump_write(file, buf, n)) {
+ ret = 0;
+ break;
+ }
+ off -= n;
+ }
+ free_page((unsigned long)buf);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4d..b604050 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@

#include <trace/events/sched.h>

-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;

-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);

@@ -1632,353 +1621,6 @@ void set_binfmt(struct linux_binfmt *new)

EXPORT_SYMBOL(set_binfmt);

-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1662,7 @@ void set_dumpable(struct mm_struct *mm, int value)
}
}

-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;

@@ -2032,290 +1674,3 @@ int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
}
-
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *files[2];
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
- int err = create_pipe_files(files, 0);
- if (err)
- return err;
-
- cp->file = files[1];
-
- sys_close(0);
- fd_install(0, files[0]);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- bool need_nonrelative = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the process
- * nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
- */
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_nonrelative && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_unlock;
- }
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
-}
-EXPORT_SYMBOL(dump_seek);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c147e70..7bb5047 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -413,6 +413,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}

extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
+extern int __get_dumpable(unsigned long mm_flags);

/* get/set_dumpable() values */
#define SUID_DUMPABLE_DISABLED 0
--
1.7.11.2


2012-08-03 21:06:40

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 2/2] Made core dump functionality optional

From: Alex <[email protected]>

Adds an expert Kconfig option, CONFIG_COREDUMP, which allows disabling of core dump.
This saves approximately 2.6k in the compiled kernel, and complements CONFIG_ELF_CORE,
which now depends on it.

CONFIG_COREDUMP also disables coredump-related sysctls, except for suid_dumpable and
related functions, which are necessary for ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/Kconfig.binfmt | 8 ++++++++
fs/Makefile | 3 ++-
include/linux/binfmts.h | 4 ++++
init/Kconfig | 1 +
kernel/sysctl.c | 6 +++++-
5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 0225742..0efd152 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -164,3 +164,11 @@ config BINFMT_MISC
You may say M here for module support and later load the module when
you have use for it; the module is called binfmt_misc. If you
don't know what to answer at this point, say Y.
+
+config COREDUMP
+ bool "Enable core dump support" if EXPERT
+ default y
+ help
+ This option enables support for performing core dumps. You almost
+ certainly want to say Y here. Not necessary on systems that never
+ need debugging or only ever run flawless code.
diff --git a/fs/Makefile b/fs/Makefile
index 8938f82..1d7af79 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o coredump.o
+ stack.o fs_struct.o statfs.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
@@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_COREDUMP) += coredump.o

obj-$(CONFIG_FHANDLE) += fhandle.o

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 366422b..00e2e89 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -132,7 +132,11 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
+#ifdef CONFIG_COREDUMP
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
+#else
+static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
+#endif
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);

diff --git a/init/Kconfig b/init/Kconfig
index af6c7f8..0e75056 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1230,6 +1230,7 @@ config BUG
Just say Y.

config ELF_CORE
+ depends on COREDUMP
default y
bool "Enable ELF core dumps" if EXPERT
help
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87174ef..af57e84 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,10 +97,12 @@
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int max_threads;
-extern int core_uses_pid;
extern int suid_dumpable;
+#ifdef CONFIG_COREDUMP
+extern int core_uses_pid;
extern char core_pattern[];
extern unsigned int core_pipe_limit;
+#endif
extern int pid_max;
extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
@@ -404,6 +406,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_COREDUMP
{
.procname = "core_uses_pid",
.data = &core_uses_pid,
@@ -425,6 +428,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
--
1.7.11.2

2012-08-04 06:43:12

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/2] Moved core dump functionality into its own file


* Alex Kelly <[email protected]> wrote:

> From: Alex Kelly <[email protected]>
>
> This was done in preparation for making core dump functionality optional.

Please use present tense and a sane title, something like:

fs: Move core dump functionality into its own file
fs: Make core dump functionality optional

While looking at that code, there's also a few uglies in it,
like:

> + return nr;
> +}
> +
> +
> +/*

> +
> + /* Repeat as long as we have more pattern to process and more output
> + space */

> +}
> +
> +
> +void do_coredump(long signr, int exit_code, struct pt_regs *regs)

> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -413,6 +413,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
>
> extern void set_dumpable(struct mm_struct *mm, int value);
> extern int get_dumpable(struct mm_struct *mm);
> +extern int __get_dumpable(unsigned long mm_flags);

These prototypes should move out of sched.h and into a
coredump.h header or so.

If we are touching this code lets use the opportunity and do
this right.

Note, I'd suggest to put any such further changes into separate,
additional patches at the end: a cleanup patch, a header file
introduction patch, etc. - and keep the "dumb code movement"
chnage in the initial patch. This will make it all much easier
to review.

Please also review the code for more uglies, the above was just
a very quick stylistic scan.

Thanks,

Ingo

2012-08-05 08:31:42

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 1/4] fs: Move core dump functionality into its own file

From: Alex Kelly <[email protected]>

This prepares for making core dump functionality optional.

The variable "suid_dumpable" and associated functions are left in fs/exec.c
because they're used elsewhere, such as in ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/Makefile | 2 +-
fs/coredump.c | 689 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/exec.c | 647 +----------------------------------------------
include/linux/sched.h | 1 +
4 files changed, 692 insertions(+), 647 deletions(-)
create mode 100644 fs/coredump.c

diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..8938f82 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o coredump.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 0000000..34c9236
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,689 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+static int zap_process(struct task_struct *start, int exit_code)
+{
+ struct task_struct *t;
+ int nr = 0;
+
+ start->signal->flags = SIGNAL_GROUP_EXIT;
+ start->signal->group_exit_code = exit_code;
+ start->signal->group_stop_count = 0;
+
+ t = start;
+ do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+ if (t != current && t->mm) {
+ sigaddset(&t->pending.signal, SIGKILL);
+ signal_wake_up(t, 1);
+ nr++;
+ }
+ } while_each_thread(start, t);
+
+ return nr;
+}
+
+
+/*
+ * Core dumping helper functions. These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+ return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+struct core_name {
+ char *corename;
+ int used, size;
+};
+
+static int expand_corename(struct core_name *cn)
+{
+ char *old_corename = cn->corename;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+ cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+ if (!cn->corename) {
+ kfree(old_corename);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+ char *cur;
+ int need;
+ int ret;
+ va_list arg;
+
+ va_start(arg, fmt);
+ need = vsnprintf(NULL, 0, fmt, arg);
+ va_end(arg);
+
+ if (likely(need < cn->size - cn->used - 1))
+ goto out_printf;
+
+ ret = expand_corename(cn);
+ if (ret)
+ goto expand_fail;
+
+out_printf:
+ cur = cn->corename + cn->used;
+ va_start(arg, fmt);
+ vsnprintf(cur, need + 1, fmt, arg);
+ va_end(arg);
+ cn->used += need;
+ return 0;
+
+expand_fail:
+ return ret;
+}
+
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+ const struct cred *cred = current_cred();
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
+ int pid_in_pattern = 0;
+ int err = 0;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+ cn->corename = kmalloc(cn->size, GFP_KERNEL);
+ cn->used = 0;
+
+ if (!cn->corename)
+ return -ENOMEM;
+
+ /* Repeat as long as we have more pattern to process and more output
+ space */
+ while (*pat_ptr) {
+ if (*pat_ptr != '%') {
+ if (*pat_ptr == 0)
+ goto out;
+ err = cn_printf(cn, "%c", *pat_ptr++);
+ } else {
+ switch (*++pat_ptr) {
+ /* single % at the end, drop that */
+ case 0:
+ goto out;
+ /* Double percent, output one percent */
+ case '%':
+ err = cn_printf(cn, "%c", '%');
+ break;
+ /* pid */
+ case 'p':
+ pid_in_pattern = 1;
+ err = cn_printf(cn, "%d",
+ task_tgid_vnr(current));
+ break;
+ /* uid */
+ case 'u':
+ err = cn_printf(cn, "%d", cred->uid);
+ break;
+ /* gid */
+ case 'g':
+ err = cn_printf(cn, "%d", cred->gid);
+ break;
+ /* signal that caused the coredump */
+ case 's':
+ err = cn_printf(cn, "%ld", signr);
+ break;
+ /* UNIX time of coredump */
+ case 't': {
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ err = cn_printf(cn, "%lu", tv.tv_sec);
+ break;
+ }
+ /* hostname */
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
+ down_read(&uts_sem);
+ err = cn_printf(cn, "%s",
+ utsname()->nodename);
+ up_read(&uts_sem);
+ cn_escape(namestart);
+ break;
+ }
+ /* executable */
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
+ err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
+ break;
+ /* core limit size */
+ case 'c':
+ err = cn_printf(cn, "%lu",
+ rlimit(RLIMIT_CORE));
+ break;
+ default:
+ break;
+ }
+ ++pat_ptr;
+ }
+
+ if (err)
+ return err;
+ }
+
+ /* Backward compatibility with core_uses_pid:
+ *
+ * If core_pattern does not include a %p (as is the default)
+ * and core_uses_pid is set, then .%pid will be appended to
+ * the filename. Do not do this for piped commands. */
+ if (!ispipe && !pid_in_pattern && core_uses_pid) {
+ err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+ if (err)
+ return err;
+ }
+out:
+ return ispipe;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
+{
+ struct task_struct *g, *p;
+ unsigned long flags;
+ int nr = -EAGAIN;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
+ }
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (unlikely(nr < 0))
+ return nr;
+
+ if (atomic_read(&mm->mm_users) == nr + 1)
+ goto done;
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, &current->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
+ rcu_read_lock();
+ for_each_process(g) {
+ if (g == tsk->group_leader)
+ continue;
+ if (g->flags & PF_KTHREAD)
+ continue;
+ p = g;
+ do {
+ if (p->mm) {
+ if (unlikely(p->mm == mm)) {
+ lock_task_sighand(p, &flags);
+ nr += zap_process(p, exit_code);
+ unlock_task_sighand(p, &flags);
+ }
+ break;
+ }
+ } while_each_thread(g, p);
+ }
+ rcu_read_unlock();
+done:
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ int core_waiters = -EBUSY;
+
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+
+ down_write(&mm->mmap_sem);
+ if (!mm->core_state)
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+ up_write(&mm->mmap_sem);
+
+ if (core_waiters > 0){
+ struct core_thread *ptr;
+
+ wait_for_completion(&core_state->startup);
+ /*
+ * Wait for all the threads to become inactive, so that
+ * all the thread context (extended register state, like
+ * fpu etc) gets copied to the memory.
+ */
+ ptr = core_state->dumper.next;
+ while (ptr != NULL) {
+ wait_task_inactive(ptr->task, 0);
+ ptr = ptr->next;
+ }
+ }
+
+ return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace. Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process. Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1. This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+ struct file *files[2];
+ struct fdtable *fdt;
+ struct coredump_params *cp = (struct coredump_params *)info->data;
+ struct files_struct *cf = current->files;
+ int err = create_pipe_files(files, 0);
+ if (err)
+ return err;
+
+ cp->file = files[1];
+
+ sys_close(0);
+ fd_install(0, files[0]);
+ spin_lock(&cf->file_lock);
+ fdt = files_fdtable(cf);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
+ spin_unlock(&cf->file_lock);
+
+ /* and disallow core files too */
+ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+ return 0;
+}
+
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+ struct core_state core_state;
+ struct core_name cn;
+ struct mm_struct *mm = current->mm;
+ struct linux_binfmt * binfmt;
+ const struct cred *old_cred;
+ struct cred *cred;
+ int retval = 0;
+ int flag = 0;
+ int ispipe;
+ bool need_nonrelative = false;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = rlimit(RLIMIT_CORE),
+ /*
+ * We must use the same mm->flags while dumping core to avoid
+ * inconsistency of bit flags, since this flag is not protected
+ * by any locks.
+ */
+ .mm_flags = mm->flags,
+ };
+
+ audit_core_dumps(signr);
+
+ binfmt = mm->binfmt;
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ if (!__get_dumpable(cprm.mm_flags))
+ goto fail;
+
+ cred = prepare_creds();
+ if (!cred)
+ goto fail;
+ /*
+ * We cannot trust fsuid as being the "true" uid of the process
+ * nor do we know its entire history. We only know it was tainted
+ * so we dump it as root in mode 2, and onlt into a controlled
+ * environment (pipe handler or fully qualified path)
+ */
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+ /* Setuid core dump mode */
+ flag = O_EXCL; /* Stop rewrite attacks */
+ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ need_nonrelative = true;
+ }
+
+ retval = coredump_wait(exit_code, &core_state);
+ if (retval < 0)
+ goto fail_creds;
+
+ old_cred = override_creds(cred);
+
+ /*
+ * Clear any false indication of pending signals that might
+ * be seen by the filesystem code called to write the core file.
+ */
+ clear_thread_flag(TIF_SIGPENDING);
+
+ ispipe = format_corename(&cn, signr);
+
+ if (ispipe) {
+ int dump_count;
+ char **helper_argv;
+
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
+ if (cprm.limit == 1) {
+ /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a speacial value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 1\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
+ helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+ if (!helper_argv) {
+ printk(KERN_WARNING "%s failed to allocate memory\n",
+ __func__);
+ goto fail_dropcount;
+ }
+
+ retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+ NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+ NULL, &cprm);
+ argv_free(helper_argv);
+ if (retval) {
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
+ cn.corename);
+ goto close_fail;
+ }
+ } else {
+ struct inode *inode;
+
+ if (cprm.limit < binfmt->min_coredump)
+ goto fail_unlock;
+
+ if (need_nonrelative && cn.corename[0] != '/') {
+ printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+ "To fully qualified path!\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_unlock;
+ }
+
+ cprm.file = filp_open(cn.corename,
+ O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ 0600);
+ if (IS_ERR(cprm.file))
+ goto fail_unlock;
+
+ inode = cprm.file->f_path.dentry->d_inode;
+ if (inode->i_nlink > 1)
+ goto close_fail;
+ if (d_unhashed(cprm.file->f_path.dentry))
+ goto close_fail;
+ /*
+ * AK: actually i see no reason to not allow this for named
+ * pipes etc, but keep the previous behaviour for now.
+ */
+ if (!S_ISREG(inode->i_mode))
+ goto close_fail;
+ /*
+ * Dont allow local users get cute and trick others to coredump
+ * into their pre-created files.
+ */
+ if (!uid_eq(inode->i_uid, current_fsuid()))
+ goto close_fail;
+ if (!cprm.file->f_op || !cprm.file->f_op->write)
+ goto close_fail;
+ if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ goto close_fail;
+ }
+
+ retval = binfmt->core_dump(&cprm);
+ if (retval)
+ current->signal->group_exit_code |= 0x80;
+
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(cprm.file);
+close_fail:
+ if (cprm.file)
+ filp_close(cprm.file, NULL);
+fail_dropcount:
+ if (ispipe)
+ atomic_dec(&core_dump_count);
+fail_unlock:
+ kfree(cn.corename);
+fail_corename:
+ coredump_finish(mm);
+ revert_creds(old_cred);
+fail_creds:
+ put_cred(cred);
+fail:
+ return;
+}
+
+int dump_seek(struct file *file, loff_t off)
+{
+ int ret = 1;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ return 0;
+ } else {
+ char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return 0;
+ while (off > 0) {
+ unsigned long n = off;
+
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ if (!dump_write(file, buf, n)) {
+ ret = 0;
+ break;
+ }
+ off -= n;
+ }
+ free_page((unsigned long)buf);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4d..b604050 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@

#include <trace/events/sched.h>

-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;

-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);

@@ -1632,353 +1621,6 @@ void set_binfmt(struct linux_binfmt *new)

EXPORT_SYMBOL(set_binfmt);

-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1662,7 @@ void set_dumpable(struct mm_struct *mm, int value)
}
}

-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;

@@ -2032,290 +1674,3 @@ int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
}
-
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *files[2];
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
- int err = create_pipe_files(files, 0);
- if (err)
- return err;
-
- cp->file = files[1];
-
- sys_close(0);
- fd_install(0, files[0]);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- bool need_nonrelative = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the process
- * nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
- */
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_nonrelative && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_unlock;
- }
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
-}
-EXPORT_SYMBOL(dump_seek);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c147e70..7bb5047 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -413,6 +413,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}

extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
+extern int __get_dumpable(unsigned long mm_flags);

/* get/set_dumpable() values */
#define SUID_DUMPABLE_DISABLED 0
--
1.7.11.2

2012-08-05 08:31:48

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 2/4] fs: Make core dump functionality optional

From: Alex <[email protected]>

Adds an expert Kconfig option, CONFIG_COREDUMP, which allows disabling of core dump.
This saves approximately 2.6k in the compiled kernel, and complements CONFIG_ELF_CORE,
which now depends on it.

CONFIG_COREDUMP also disables coredump-related sysctls, except for suid_dumpable and
related functions, which are necessary for ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/Kconfig.binfmt | 8 ++++++++
fs/Makefile | 3 ++-
include/linux/binfmts.h | 4 ++++
init/Kconfig | 1 +
kernel/sysctl.c | 6 +++++-
5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 0225742..0efd152 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -164,3 +164,11 @@ config BINFMT_MISC
You may say M here for module support and later load the module when
you have use for it; the module is called binfmt_misc. If you
don't know what to answer at this point, say Y.
+
+config COREDUMP
+ bool "Enable core dump support" if EXPERT
+ default y
+ help
+ This option enables support for performing core dumps. You almost
+ certainly want to say Y here. Not necessary on systems that never
+ need debugging or only ever run flawless code.
diff --git a/fs/Makefile b/fs/Makefile
index 8938f82..1d7af79 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o coredump.o
+ stack.o fs_struct.o statfs.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
@@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_COREDUMP) += coredump.o

obj-$(CONFIG_FHANDLE) += fhandle.o

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 366422b..00e2e89 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -132,7 +132,11 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
+#ifdef CONFIG_COREDUMP
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
+#else
+static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
+#endif
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);

diff --git a/init/Kconfig b/init/Kconfig
index af6c7f8..0e75056 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1230,6 +1230,7 @@ config BUG
Just say Y.

config ELF_CORE
+ depends on COREDUMP
default y
bool "Enable ELF core dumps" if EXPERT
help
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87174ef..af57e84 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,10 +97,12 @@
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int max_threads;
-extern int core_uses_pid;
extern int suid_dumpable;
+#ifdef CONFIG_COREDUMP
+extern int core_uses_pid;
extern char core_pattern[];
extern unsigned int core_pipe_limit;
+#endif
extern int pid_max;
extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
@@ -404,6 +406,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_COREDUMP
{
.procname = "core_uses_pid",
.data = &core_uses_pid,
@@ -425,6 +428,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
--
1.7.11.2

2012-08-05 08:31:58

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 4/4] fs: Update coredump-related headers

From: Alex Kelly <[email protected]>

This patch creates a new header file, fs/coredump.h, which contains
functions only used by the new coredump.c. It also moves do_coredump
to the /include/coredump.h header file, for consistency.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/coredump.c | 2 ++
fs/coredump.h | 6 ++++++
fs/exec.c | 1 +
include/linux/binfmts.h | 5 -----
include/linux/coredump.h | 5 +++++
include/linux/sched.h | 2 --
kernel/signal.c | 1 +
7 files changed, 15 insertions(+), 7 deletions(-)
create mode 100644 fs/coredump.h

diff --git a/fs/coredump.c b/fs/coredump.c
index 7f75060..f5ac9d7 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -14,6 +14,7 @@
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <linux/coredump.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
@@ -39,6 +40,7 @@

#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"

#include <trace/events/sched.h>

diff --git a/fs/coredump.h b/fs/coredump.h
new file mode 100644
index 0000000..e39ff07
--- /dev/null
+++ b/fs/coredump.h
@@ -0,0 +1,6 @@
+#ifndef _FS_COREDUMP_H
+#define _FS_COREDUMP_H
+
+extern int __get_dumpable(unsigned long mm_flags);
+
+#endif
diff --git a/fs/exec.c b/fs/exec.c
index b604050..a0ad3a2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,7 @@

#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"

#include <trace/events/sched.h>

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 00e2e89..c7b16ee 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -132,11 +132,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
-#ifdef CONFIG_COREDUMP
-extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-#else
-static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
-#endif
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index ba4b85a..42f9752 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,5 +11,10 @@
*/
extern int dump_write(struct file *file, const void *addr, int nr);
extern int dump_seek(struct file *file, loff_t off);
+#ifdef CONFIG_COREDUMP
+extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
+#else
+static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
+#endif

#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7bb5047..13ff447 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -410,10 +410,8 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
#endif

-
extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
-extern int __get_dumpable(unsigned long mm_flags);

/* get/set_dumpable() values */
#define SUID_DUMPABLE_DISABLED 0
diff --git a/kernel/signal.c b/kernel/signal.c
index be4f856..fb4fd72 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/coredump.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/ptrace.h>
--
1.7.11.2

2012-08-05 08:31:56

by Alex Kelly

[permalink] [raw]
Subject: [PATCH 3/4] fs: Clean up some artifacts in coredump.c

From: Alex Kelly <[email protected]>

Specifically, some whitespace got carried over from the move that
shouldn't have, and there were some comment style issues in the original
code that are now fixed

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/coredump.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 34c9236..7f75060 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -70,7 +70,6 @@ static int zap_process(struct task_struct *start, int exit_code)
return nr;
}

-
/*
* Core dumping helper functions. These are the only things you should
* do on a core-file: use only these functions to write out all the
@@ -195,8 +194,10 @@ static int format_corename(struct core_name *cn, long signr)
if (!cn->corename)
return -ENOMEM;

- /* Repeat as long as we have more pattern to process and more output
- space */
+ /*
+ * Repeat as long as we have more pattern to process and more output
+ * space.
+ */
while (*pat_ptr) {
if (*pat_ptr != '%') {
if (*pat_ptr == 0)
@@ -471,7 +472,6 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
return 0;
}

-
void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
struct core_state core_state;
--
1.7.11.2

2012-08-05 11:21:58

by Alex Kelly

[permalink] [raw]
Subject: [PATCHv3 1/4] fs: Move core dump functionality into its own file

This prepares for making core dump functionality optional.

The variable "suid_dumpable" and associated functions are left in fs/exec.c
because they're used elsewhere, such as in ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
v2: This patch set is a second revision that follows some suggestions from
Ingo Molnar and Josh Triplett. Specifically, authorship of commits is
revised for consistency, and an additional two patches cleaning up artifacts
and making headers more sane are added.

v3: This version fixes a few more authorship issues and some problems caused
by a bad git send-email config. Sorry about the extra mails

fs/Makefile | 2 +-
fs/coredump.c | 689 ++++++++++++++++++++++++++++++++++++++++++++++++++
fs/exec.c | 647 +----------------------------------------------
include/linux/sched.h | 1 +
4 files changed, 692 insertions(+), 647 deletions(-)
create mode 100644 fs/coredump.c

diff --git a/fs/Makefile b/fs/Makefile
index 2fb9779..8938f82 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o
+ stack.o fs_struct.o statfs.o coredump.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/coredump.c b/fs/coredump.c
new file mode 100644
index 0000000..34c9236
--- /dev/null
+++ b/fs/coredump.c
@@ -0,0 +1,689 @@
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/swap.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/perf_event.h>
+#include <linux/highmem.h>
+#include <linux/spinlock.h>
+#include <linux/key.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/utsname.h>
+#include <linux/pid_namespace.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/security.h>
+#include <linux/syscalls.h>
+#include <linux/tsacct_kern.h>
+#include <linux/cn_proc.h>
+#include <linux/audit.h>
+#include <linux/tracehook.h>
+#include <linux/kmod.h>
+#include <linux/fsnotify.h>
+#include <linux/fs_struct.h>
+#include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
+#include <linux/compat.h>
+
+#include <asm/uaccess.h>
+#include <asm/mmu_context.h>
+#include <asm/tlb.h>
+#include <asm/exec.h>
+
+#include <trace/events/task.h>
+#include "internal.h"
+
+#include <trace/events/sched.h>
+
+int core_uses_pid;
+char core_pattern[CORENAME_MAX_SIZE] = "core";
+unsigned int core_pipe_limit;
+static atomic_t call_count = ATOMIC_INIT(1);
+
+/* The maximal length of core_pattern is also specified in sysctl.c */
+static int zap_process(struct task_struct *start, int exit_code)
+{
+ struct task_struct *t;
+ int nr = 0;
+
+ start->signal->flags = SIGNAL_GROUP_EXIT;
+ start->signal->group_exit_code = exit_code;
+ start->signal->group_stop_count = 0;
+
+ t = start;
+ do {
+ task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
+ if (t != current && t->mm) {
+ sigaddset(&t->pending.signal, SIGKILL);
+ signal_wake_up(t, 1);
+ nr++;
+ }
+ } while_each_thread(start, t);
+
+ return nr;
+}
+
+
+/*
+ * Core dumping helper functions. These are the only things you should
+ * do on a core-file: use only these functions to write out all the
+ * necessary info.
+ */
+int dump_write(struct file *file, const void *addr, int nr)
+{
+ return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+}
+EXPORT_SYMBOL(dump_write);
+
+struct core_name {
+ char *corename;
+ int used, size;
+};
+
+static int expand_corename(struct core_name *cn)
+{
+ char *old_corename = cn->corename;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+ cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+ if (!cn->corename) {
+ kfree(old_corename);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+ char *cur;
+ int need;
+ int ret;
+ va_list arg;
+
+ va_start(arg, fmt);
+ need = vsnprintf(NULL, 0, fmt, arg);
+ va_end(arg);
+
+ if (likely(need < cn->size - cn->used - 1))
+ goto out_printf;
+
+ ret = expand_corename(cn);
+ if (ret)
+ goto expand_fail;
+
+out_printf:
+ cur = cn->corename + cn->used;
+ va_start(arg, fmt);
+ vsnprintf(cur, need + 1, fmt, arg);
+ va_end(arg);
+ cn->used += need;
+ return 0;
+
+expand_fail:
+ return ret;
+}
+
+static void cn_escape(char *str)
+{
+ for (; *str; str++)
+ if (*str == '/')
+ *str = '!';
+}
+
+static int cn_print_exe_file(struct core_name *cn)
+{
+ struct file *exe_file;
+ char *pathbuf, *path;
+ int ret;
+
+ exe_file = get_mm_exe_file(current->mm);
+ if (!exe_file) {
+ char *commstart = cn->corename + cn->used;
+ ret = cn_printf(cn, "%s (path unknown)", current->comm);
+ cn_escape(commstart);
+ return ret;
+ }
+
+ pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
+ if (!pathbuf) {
+ ret = -ENOMEM;
+ goto put_exe_file;
+ }
+
+ path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
+ if (IS_ERR(path)) {
+ ret = PTR_ERR(path);
+ goto free_buf;
+ }
+
+ cn_escape(path);
+
+ ret = cn_printf(cn, "%s", path);
+
+free_buf:
+ kfree(pathbuf);
+put_exe_file:
+ fput(exe_file);
+ return ret;
+}
+
+/* format_corename will inspect the pattern parameter, and output a
+ * name into corename, which must have space for at least
+ * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
+ */
+static int format_corename(struct core_name *cn, long signr)
+{
+ const struct cred *cred = current_cred();
+ const char *pat_ptr = core_pattern;
+ int ispipe = (*pat_ptr == '|');
+ int pid_in_pattern = 0;
+ int err = 0;
+
+ cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+ cn->corename = kmalloc(cn->size, GFP_KERNEL);
+ cn->used = 0;
+
+ if (!cn->corename)
+ return -ENOMEM;
+
+ /* Repeat as long as we have more pattern to process and more output
+ space */
+ while (*pat_ptr) {
+ if (*pat_ptr != '%') {
+ if (*pat_ptr == 0)
+ goto out;
+ err = cn_printf(cn, "%c", *pat_ptr++);
+ } else {
+ switch (*++pat_ptr) {
+ /* single % at the end, drop that */
+ case 0:
+ goto out;
+ /* Double percent, output one percent */
+ case '%':
+ err = cn_printf(cn, "%c", '%');
+ break;
+ /* pid */
+ case 'p':
+ pid_in_pattern = 1;
+ err = cn_printf(cn, "%d",
+ task_tgid_vnr(current));
+ break;
+ /* uid */
+ case 'u':
+ err = cn_printf(cn, "%d", cred->uid);
+ break;
+ /* gid */
+ case 'g':
+ err = cn_printf(cn, "%d", cred->gid);
+ break;
+ /* signal that caused the coredump */
+ case 's':
+ err = cn_printf(cn, "%ld", signr);
+ break;
+ /* UNIX time of coredump */
+ case 't': {
+ struct timeval tv;
+ do_gettimeofday(&tv);
+ err = cn_printf(cn, "%lu", tv.tv_sec);
+ break;
+ }
+ /* hostname */
+ case 'h': {
+ char *namestart = cn->corename + cn->used;
+ down_read(&uts_sem);
+ err = cn_printf(cn, "%s",
+ utsname()->nodename);
+ up_read(&uts_sem);
+ cn_escape(namestart);
+ break;
+ }
+ /* executable */
+ case 'e': {
+ char *commstart = cn->corename + cn->used;
+ err = cn_printf(cn, "%s", current->comm);
+ cn_escape(commstart);
+ break;
+ }
+ case 'E':
+ err = cn_print_exe_file(cn);
+ break;
+ /* core limit size */
+ case 'c':
+ err = cn_printf(cn, "%lu",
+ rlimit(RLIMIT_CORE));
+ break;
+ default:
+ break;
+ }
+ ++pat_ptr;
+ }
+
+ if (err)
+ return err;
+ }
+
+ /* Backward compatibility with core_uses_pid:
+ *
+ * If core_pattern does not include a %p (as is the default)
+ * and core_uses_pid is set, then .%pid will be appended to
+ * the filename. Do not do this for piped commands. */
+ if (!ispipe && !pid_in_pattern && core_uses_pid) {
+ err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+ if (err)
+ return err;
+ }
+out:
+ return ispipe;
+}
+
+static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
+{
+ struct task_struct *g, *p;
+ unsigned long flags;
+ int nr = -EAGAIN;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ if (!signal_group_exit(tsk->signal)) {
+ mm->core_state = core_state;
+ nr = zap_process(tsk, exit_code);
+ }
+ spin_unlock_irq(&tsk->sighand->siglock);
+ if (unlikely(nr < 0))
+ return nr;
+
+ if (atomic_read(&mm->mm_users) == nr + 1)
+ goto done;
+ /*
+ * We should find and kill all tasks which use this mm, and we should
+ * count them correctly into ->nr_threads. We don't take tasklist
+ * lock, but this is safe wrt:
+ *
+ * fork:
+ * None of sub-threads can fork after zap_process(leader). All
+ * processes which were created before this point should be
+ * visible to zap_threads() because copy_process() adds the new
+ * process to the tail of init_task.tasks list, and lock/unlock
+ * of ->siglock provides a memory barrier.
+ *
+ * do_exit:
+ * The caller holds mm->mmap_sem. This means that the task which
+ * uses this mm can't pass exit_mm(), so it can't exit or clear
+ * its ->mm.
+ *
+ * de_thread:
+ * It does list_replace_rcu(&leader->tasks, &current->tasks),
+ * we must see either old or new leader, this does not matter.
+ * However, it can change p->sighand, so lock_task_sighand(p)
+ * must be used. Since p->mm != NULL and we hold ->mmap_sem
+ * it can't fail.
+ *
+ * Note also that "g" can be the old leader with ->mm == NULL
+ * and already unhashed and thus removed from ->thread_group.
+ * This is OK, __unhash_process()->list_del_rcu() does not
+ * clear the ->next pointer, we will find the new leader via
+ * next_thread().
+ */
+ rcu_read_lock();
+ for_each_process(g) {
+ if (g == tsk->group_leader)
+ continue;
+ if (g->flags & PF_KTHREAD)
+ continue;
+ p = g;
+ do {
+ if (p->mm) {
+ if (unlikely(p->mm == mm)) {
+ lock_task_sighand(p, &flags);
+ nr += zap_process(p, exit_code);
+ unlock_task_sighand(p, &flags);
+ }
+ break;
+ }
+ } while_each_thread(g, p);
+ }
+ rcu_read_unlock();
+done:
+ atomic_set(&core_state->nr_threads, nr);
+ return nr;
+}
+
+static int coredump_wait(int exit_code, struct core_state *core_state)
+{
+ struct task_struct *tsk = current;
+ struct mm_struct *mm = tsk->mm;
+ int core_waiters = -EBUSY;
+
+ init_completion(&core_state->startup);
+ core_state->dumper.task = tsk;
+ core_state->dumper.next = NULL;
+
+ down_write(&mm->mmap_sem);
+ if (!mm->core_state)
+ core_waiters = zap_threads(tsk, mm, core_state, exit_code);
+ up_write(&mm->mmap_sem);
+
+ if (core_waiters > 0){
+ struct core_thread *ptr;
+
+ wait_for_completion(&core_state->startup);
+ /*
+ * Wait for all the threads to become inactive, so that
+ * all the thread context (extended register state, like
+ * fpu etc) gets copied to the memory.
+ */
+ ptr = core_state->dumper.next;
+ while (ptr != NULL) {
+ wait_task_inactive(ptr->task, 0);
+ ptr = ptr->next;
+ }
+ }
+
+ return core_waiters;
+}
+
+static void coredump_finish(struct mm_struct *mm)
+{
+ struct core_thread *curr, *next;
+ struct task_struct *task;
+
+ next = mm->core_state->dumper.next;
+ while ((curr = next) != NULL) {
+ next = curr->next;
+ task = curr->task;
+ /*
+ * see exit_mm(), curr->task must not see
+ * ->task == NULL before we read ->next.
+ */
+ smp_mb();
+ curr->task = NULL;
+ wake_up_process(task);
+ }
+
+ mm->core_state = NULL;
+}
+
+static void wait_for_dump_helpers(struct file *file)
+{
+ struct pipe_inode_info *pipe;
+
+ pipe = file->f_path.dentry->d_inode->i_pipe;
+
+ pipe_lock(pipe);
+ pipe->readers++;
+ pipe->writers--;
+
+ while ((pipe->readers > 1) && (!signal_pending(current))) {
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_wait(pipe);
+ }
+
+ pipe->readers--;
+ pipe->writers++;
+ pipe_unlock(pipe);
+
+}
+
+/*
+ * umh_pipe_setup
+ * helper function to customize the process used
+ * to collect the core in userspace. Specifically
+ * it sets up a pipe and installs it as fd 0 (stdin)
+ * for the process. Returns 0 on success, or
+ * PTR_ERR on failure.
+ * Note that it also sets the core limit to 1. This
+ * is a special value that we use to trap recursive
+ * core dumps
+ */
+static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
+{
+ struct file *files[2];
+ struct fdtable *fdt;
+ struct coredump_params *cp = (struct coredump_params *)info->data;
+ struct files_struct *cf = current->files;
+ int err = create_pipe_files(files, 0);
+ if (err)
+ return err;
+
+ cp->file = files[1];
+
+ sys_close(0);
+ fd_install(0, files[0]);
+ spin_lock(&cf->file_lock);
+ fdt = files_fdtable(cf);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
+ spin_unlock(&cf->file_lock);
+
+ /* and disallow core files too */
+ current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
+
+ return 0;
+}
+
+
+void do_coredump(long signr, int exit_code, struct pt_regs *regs)
+{
+ struct core_state core_state;
+ struct core_name cn;
+ struct mm_struct *mm = current->mm;
+ struct linux_binfmt * binfmt;
+ const struct cred *old_cred;
+ struct cred *cred;
+ int retval = 0;
+ int flag = 0;
+ int ispipe;
+ bool need_nonrelative = false;
+ static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = rlimit(RLIMIT_CORE),
+ /*
+ * We must use the same mm->flags while dumping core to avoid
+ * inconsistency of bit flags, since this flag is not protected
+ * by any locks.
+ */
+ .mm_flags = mm->flags,
+ };
+
+ audit_core_dumps(signr);
+
+ binfmt = mm->binfmt;
+ if (!binfmt || !binfmt->core_dump)
+ goto fail;
+ if (!__get_dumpable(cprm.mm_flags))
+ goto fail;
+
+ cred = prepare_creds();
+ if (!cred)
+ goto fail;
+ /*
+ * We cannot trust fsuid as being the "true" uid of the process
+ * nor do we know its entire history. We only know it was tainted
+ * so we dump it as root in mode 2, and onlt into a controlled
+ * environment (pipe handler or fully qualified path)
+ */
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
+ /* Setuid core dump mode */
+ flag = O_EXCL; /* Stop rewrite attacks */
+ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ need_nonrelative = true;
+ }
+
+ retval = coredump_wait(exit_code, &core_state);
+ if (retval < 0)
+ goto fail_creds;
+
+ old_cred = override_creds(cred);
+
+ /*
+ * Clear any false indication of pending signals that might
+ * be seen by the filesystem code called to write the core file.
+ */
+ clear_thread_flag(TIF_SIGPENDING);
+
+ ispipe = format_corename(&cn, signr);
+
+ if (ispipe) {
+ int dump_count;
+ char **helper_argv;
+
+ if (ispipe < 0) {
+ printk(KERN_WARNING "format_corename failed\n");
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_corename;
+ }
+
+ if (cprm.limit == 1) {
+ /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ *
+ * Normally core limits are irrelevant to pipes, since
+ * we're not writing to the file system, but we use
+ * cprm.limit of 1 here as a speacial value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things
+ *
+ * Note that we use task_tgid_vnr here to grab the pid
+ * of the process group leader. That way we get the
+ * right pid if a thread in a multi-threaded
+ * core_pattern process dies.
+ */
+ printk(KERN_WARNING
+ "Process %d(%s) has RLIMIT_CORE set to 1\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Aborting core\n");
+ goto fail_unlock;
+ }
+ cprm.limit = RLIM_INFINITY;
+
+ dump_count = atomic_inc_return(&core_dump_count);
+ if (core_pipe_limit && (core_pipe_limit < dump_count)) {
+ printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_dropcount;
+ }
+
+ helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
+ if (!helper_argv) {
+ printk(KERN_WARNING "%s failed to allocate memory\n",
+ __func__);
+ goto fail_dropcount;
+ }
+
+ retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
+ NULL, UMH_WAIT_EXEC, umh_pipe_setup,
+ NULL, &cprm);
+ argv_free(helper_argv);
+ if (retval) {
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
+ cn.corename);
+ goto close_fail;
+ }
+ } else {
+ struct inode *inode;
+
+ if (cprm.limit < binfmt->min_coredump)
+ goto fail_unlock;
+
+ if (need_nonrelative && cn.corename[0] != '/') {
+ printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+ "To fully qualified path!\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_unlock;
+ }
+
+ cprm.file = filp_open(cn.corename,
+ O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ 0600);
+ if (IS_ERR(cprm.file))
+ goto fail_unlock;
+
+ inode = cprm.file->f_path.dentry->d_inode;
+ if (inode->i_nlink > 1)
+ goto close_fail;
+ if (d_unhashed(cprm.file->f_path.dentry))
+ goto close_fail;
+ /*
+ * AK: actually i see no reason to not allow this for named
+ * pipes etc, but keep the previous behaviour for now.
+ */
+ if (!S_ISREG(inode->i_mode))
+ goto close_fail;
+ /*
+ * Dont allow local users get cute and trick others to coredump
+ * into their pre-created files.
+ */
+ if (!uid_eq(inode->i_uid, current_fsuid()))
+ goto close_fail;
+ if (!cprm.file->f_op || !cprm.file->f_op->write)
+ goto close_fail;
+ if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+ goto close_fail;
+ }
+
+ retval = binfmt->core_dump(&cprm);
+ if (retval)
+ current->signal->group_exit_code |= 0x80;
+
+ if (ispipe && core_pipe_limit)
+ wait_for_dump_helpers(cprm.file);
+close_fail:
+ if (cprm.file)
+ filp_close(cprm.file, NULL);
+fail_dropcount:
+ if (ispipe)
+ atomic_dec(&core_dump_count);
+fail_unlock:
+ kfree(cn.corename);
+fail_corename:
+ coredump_finish(mm);
+ revert_creds(old_cred);
+fail_creds:
+ put_cred(cred);
+fail:
+ return;
+}
+
+int dump_seek(struct file *file, loff_t off)
+{
+ int ret = 1;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ return 0;
+ } else {
+ char *buf = (char *)get_zeroed_page(GFP_KERNEL);
+
+ if (!buf)
+ return 0;
+ while (off > 0) {
+ unsigned long n = off;
+
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ if (!dump_write(file, buf, n)) {
+ ret = 0;
+ break;
+ }
+ off -= n;
+ }
+ free_page((unsigned long)buf);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(dump_seek);
diff --git a/fs/exec.c b/fs/exec.c
index 574cf4d..b604050 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -66,19 +66,8 @@

#include <trace/events/sched.h>

-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;

-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);

@@ -1632,353 +1621,6 @@ void set_binfmt(struct linux_binfmt *new)

EXPORT_SYMBOL(set_binfmt);

-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, &current->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -2020,7 +1662,7 @@ void set_dumpable(struct mm_struct *mm, int value)
}
}

-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;

@@ -2032,290 +1674,3 @@ int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
}
-
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *files[2];
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
- int err = create_pipe_files(files, 0);
- if (err)
- return err;
-
- cp->file = files[1];
-
- sys_close(0);
- fd_install(0, files[0]);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- bool need_nonrelative = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the process
- * nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
- */
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_nonrelative && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_unlock;
- }
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
-}
-EXPORT_SYMBOL(dump_seek);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c147e70..7bb5047 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -413,6 +413,7 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}

extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
+extern int __get_dumpable(unsigned long mm_flags);

/* get/set_dumpable() values */
#define SUID_DUMPABLE_DISABLED 0
--
1.7.11.2

2012-08-05 11:22:56

by Alex Kelly

[permalink] [raw]
Subject: [PATCHv3 3/4] fs: Clean up some artifacts in coredump.c

Specifically, some whitespace got carried over from the move that
shouldn't have, and there were some comment style issues in the original
code that are now fixed

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/coredump.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 34c9236..7f75060 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -70,7 +70,6 @@ static int zap_process(struct task_struct *start, int exit_code)
return nr;
}

-
/*
* Core dumping helper functions. These are the only things you should
* do on a core-file: use only these functions to write out all the
@@ -195,8 +194,10 @@ static int format_corename(struct core_name *cn, long signr)
if (!cn->corename)
return -ENOMEM;

- /* Repeat as long as we have more pattern to process and more output
- space */
+ /*
+ * Repeat as long as we have more pattern to process and more output
+ * space.
+ */
while (*pat_ptr) {
if (*pat_ptr != '%') {
if (*pat_ptr == 0)
@@ -471,7 +472,6 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
return 0;
}

-
void do_coredump(long signr, int exit_code, struct pt_regs *regs)
{
struct core_state core_state;
--
1.7.11.2

2012-08-05 11:22:57

by Alex Kelly

[permalink] [raw]
Subject: [PATCHv3 4/4] fs: Update coredump-related headers

This patch creates a new header file, fs/coredump.h, which contains
functions only used by the new coredump.c. It also moves do_coredump
to the include/linux/coredump.h header file, for consistency.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/coredump.c | 2 ++
fs/coredump.h | 6 ++++++
fs/exec.c | 1 +
include/linux/binfmts.h | 5 -----
include/linux/coredump.h | 5 +++++
include/linux/sched.h | 2 --
kernel/signal.c | 1 +
7 files changed, 15 insertions(+), 7 deletions(-)
create mode 100644 fs/coredump.h

diff --git a/fs/coredump.c b/fs/coredump.c
index 7f75060..f5ac9d7 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -14,6 +14,7 @@
#include <linux/key.h>
#include <linux/personality.h>
#include <linux/binfmts.h>
+#include <linux/coredump.h>
#include <linux/utsname.h>
#include <linux/pid_namespace.h>
#include <linux/module.h>
@@ -39,6 +40,7 @@

#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"

#include <trace/events/sched.h>

diff --git a/fs/coredump.h b/fs/coredump.h
new file mode 100644
index 0000000..e39ff07
--- /dev/null
+++ b/fs/coredump.h
@@ -0,0 +1,6 @@
+#ifndef _FS_COREDUMP_H
+#define _FS_COREDUMP_H
+
+extern int __get_dumpable(unsigned long mm_flags);
+
+#endif
diff --git a/fs/exec.c b/fs/exec.c
index b604050..a0ad3a2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -63,6 +63,7 @@

#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"

#include <trace/events/sched.h>

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 00e2e89..c7b16ee 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -132,11 +132,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
-#ifdef CONFIG_COREDUMP
-extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-#else
-static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
-#endif
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index ba4b85a..42f9752 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -11,5 +11,10 @@
*/
extern int dump_write(struct file *file, const void *addr, int nr);
extern int dump_seek(struct file *file, loff_t off);
+#ifdef CONFIG_COREDUMP
+extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
+#else
+static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
+#endif

#endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7bb5047..13ff447 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -410,10 +410,8 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
#endif

-
extern void set_dumpable(struct mm_struct *mm, int value);
extern int get_dumpable(struct mm_struct *mm);
-extern int __get_dumpable(unsigned long mm_flags);

/* get/set_dumpable() values */
#define SUID_DUMPABLE_DISABLED 0
diff --git a/kernel/signal.c b/kernel/signal.c
index be4f856..fb4fd72 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
+#include <linux/coredump.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/ptrace.h>
--
1.7.11.2

2012-08-05 11:22:53

by Alex Kelly

[permalink] [raw]
Subject: [PATCHv3 2/4] fs: Make core dump functionality optional

Adds an expert Kconfig option, CONFIG_COREDUMP, which allows disabling of core dump.
This saves approximately 2.6k in the compiled kernel, and complements CONFIG_ELF_CORE,
which now depends on it.

CONFIG_COREDUMP also disables coredump-related sysctls, except for suid_dumpable and
related functions, which are necessary for ptrace.

Signed-off-by: Alex Kelly <[email protected]>
Reviewed-by: Josh Triplett <[email protected]>
---
fs/Kconfig.binfmt | 8 ++++++++
fs/Makefile | 3 ++-
include/linux/binfmts.h | 4 ++++
init/Kconfig | 1 +
kernel/sysctl.c | 6 +++++-
5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 0225742..0efd152 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -164,3 +164,11 @@ config BINFMT_MISC
You may say M here for module support and later load the module when
you have use for it; the module is called binfmt_misc. If you
don't know what to answer at this point, say Y.
+
+config COREDUMP
+ bool "Enable core dump support" if EXPERT
+ default y
+ help
+ This option enables support for performing core dumps. You almost
+ certainly want to say Y here. Not necessary on systems that never
+ need debugging or only ever run flawless code.
diff --git a/fs/Makefile b/fs/Makefile
index 8938f82..1d7af79 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o fs_struct.o statfs.o coredump.o
+ stack.o fs_struct.o statfs.o

ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
@@ -48,6 +48,7 @@ obj-$(CONFIG_FS_MBCACHE) += mbcache.o
obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
+obj-$(CONFIG_COREDUMP) += coredump.o

obj-$(CONFIG_FHANDLE) += fhandle.o

diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 366422b..00e2e89 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -132,7 +132,11 @@ extern int copy_strings_kernel(int argc, const char *const *argv,
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
+#ifdef CONFIG_COREDUMP
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
+#else
+static inline void do_coredump(long signr, int exit_code, struct pt_regs *regs) {}
+#endif
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);

diff --git a/init/Kconfig b/init/Kconfig
index af6c7f8..0e75056 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1230,6 +1230,7 @@ config BUG
Just say Y.

config ELF_CORE
+ depends on COREDUMP
default y
bool "Enable ELF core dumps" if EXPERT
help
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 87174ef..af57e84 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,10 +97,12 @@
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int max_threads;
-extern int core_uses_pid;
extern int suid_dumpable;
+#ifdef CONFIG_COREDUMP
+extern int core_uses_pid;
extern char core_pattern[];
extern unsigned int core_pipe_limit;
+#endif
extern int pid_max;
extern int min_free_kbytes;
extern int pid_max_min, pid_max_max;
@@ -404,6 +406,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_COREDUMP
{
.procname = "core_uses_pid",
.data = &core_uses_pid,
@@ -425,6 +428,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#endif
#ifdef CONFIG_PROC_SYSCTL
{
.procname = "tainted",
--
1.7.11.2

2012-08-05 18:42:41

by Kees Cook

[permalink] [raw]
Subject: [NAK] Re: [PATCHv3 1/4] fs: Move core dump functionality into its own file

Hi Alex,

Overall, this seems like a fine idea. However, I think the code move
has gone very badly. Comments are mismatched, there are typos added,
formatting changed, case changed, trailing white space added, and
punctuation dropped.

NAKed-by: Kees Cook <[email protected]>

On Sun, Aug 05, 2012 at 04:18:38AM -0700, Alex Kelly wrote:
> +/* The maximal length of core_pattern is also specified in sysctl.c */
> +static int zap_process(struct task_struct *start, int exit_code)

These two lines have nothing to do with each other. This comment belongs
with "char core_pattern[CORENAME_MAX_SIZE] = "core";", IIUC.

Why is zap_process not with the zap_threads helpers?

> +void do_coredump(long signr, int exit_code, struct pt_regs *regs)
> +[...]
> + * so we dump it as root in mode 2, and onlt into a controlled

"onlt"? The removed code didn't have this typo. Was the moved code
not cut/pasted? That got my attention. In fact, I went and compared
all the code that was moved, and this was not the only change -- what
happened here?

--- /tmp/del.code 2012-08-05 11:32:36.602881193 -0700
+++ /tmp/add.code 2012-08-05 11:33:06.615270285 -0700
@@ -327,7 +328,7 @@
core_waiters = zap_threads(tsk, mm, core_state, exit_code);
up_write(&mm->mmap_sem);

- if (core_waiters > 0) {
+ if (core_waiters > 0){
struct core_thread *ptr;

wait_for_completion(&core_state->startup);
@@ -466,8 +467,8 @@
/*
* We cannot trust fsuid as being the "true" uid of the process
* nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
+ * so we dump it as root in mode 2, and onlt into a controlled
+ * environment (pipe handler or fully qualified path)
*/
if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
/* Setuid core dump mode */
@@ -505,11 +506,11 @@
*
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
+ * cprm.limit of 1 here as a speacial value, this is a
* consistent way to catch recursive crashes.
* We can still crash if the core_pattern binary sets
* RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
+ * lots of stupid things
*
* Note that we use task_tgid_vnr here to grab the pid
* of the process group leader. That way we get the
@@ -556,7 +557,7 @@

if (need_nonrelative && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
+ "To fully qualified path!\n",
task_tgid_vnr(current), current->comm);
printk(KERN_WARNING "Skipping core dump\n");
goto fail_unlock;


-Kees

--
Kees Cook @outflux.net