2006-10-12 07:44:21

by Akinobu Mita

[permalink] [raw]
Subject: [patch 6/7] process filtering for fault-injection capabilities

From: Akinobu Mita <[email protected]>

This patch provides process filtering feature.
The process filter allows failing only permitted processes
by /proc/<pid>/make-it-fail

Please see the example that demostrates how to inject slab allocation
failures into module init/cleanup code
in Documentation/fault-injection/fault-injection.txt

Signed-off-by: Akinobu Mita <[email protected]>

fs/proc/base.c | 65 +++++++++++++++++++++++++++++++++++++++++++
include/linux/fault-inject.h | 2 +
include/linux/sched.h | 3 +
lib/fault-inject.c | 19 ++++++++++++
4 files changed, 89 insertions(+)

Index: work-fault-inject/fs/proc/base.c
===================================================================
--- work-fault-inject.orig/fs/proc/base.c
+++ work-fault-inject/fs/proc/base.c
@@ -776,6 +776,65 @@ static struct file_operations proc_login
};
#endif

+#ifdef CONFIG_FAULT_INJECTION
+static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
+ char buffer[PROC_NUMBUF];
+ size_t len;
+ int make_it_fail;
+ loff_t __ppos = *ppos;
+
+ if (!task)
+ return -ESRCH;
+ make_it_fail = task->make_it_fail;
+ put_task_struct(task);
+
+ len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
+ if (__ppos >= len)
+ return 0;
+ if (count > len-__ppos)
+ count = len-__ppos;
+ if (copy_to_user(buf, buffer + __ppos, count))
+ return -EFAULT;
+ *ppos = __ppos + count;
+ return count;
+}
+
+static ssize_t proc_fault_inject_write(struct file * file,
+ const char __user * buf, size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+ int make_it_fail;
+
+ if (!capable(CAP_SYS_RESOURCE))
+ return -EPERM;
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ make_it_fail = simple_strtol(buffer, &end, 0);
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ task->make_it_fail = make_it_fail;
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
+}
+
+static struct file_operations proc_fault_inject_operations = {
+ .read = proc_fault_inject_read,
+ .write = proc_fault_inject_write,
+};
+#endif
+
#ifdef CONFIG_SECCOMP
static ssize_t seccomp_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -1788,6 +1847,9 @@ static struct pid_entry tgid_base_stuff[
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
#endif
+#ifdef CONFIG_FAULT_INJECTION
+ REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
};

static int proc_tgid_base_readdir(struct file * filp,
@@ -2062,6 +2124,9 @@ static struct pid_entry tid_base_stuff[]
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, loginuid),
#endif
+#ifdef CONFIG_FAULT_INJECTION
+ REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+#endif
};

static int proc_tid_base_readdir(struct file * filp,
Index: work-fault-inject/include/linux/sched.h
===================================================================
--- work-fault-inject.orig/include/linux/sched.h
+++ work-fault-inject/include/linux/sched.h
@@ -1051,6 +1051,9 @@ struct task_struct {
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
+#ifdef CONFIG_FAULT_INJECTION
+ int make_it_fail;
+#endif
};

static inline pid_t process_group(struct task_struct *tsk)
Index: work-fault-inject/include/linux/fault-inject.h
===================================================================
--- work-fault-inject.orig/include/linux/fault-inject.h
+++ work-fault-inject/include/linux/fault-inject.h
@@ -17,6 +17,7 @@ struct fault_attr {
atomic_t times;
atomic_t space;
unsigned long verbose;
+ u32 task_filter;

unsigned long count;

@@ -30,6 +31,7 @@ struct fault_attr {
struct dentry *times_file;
struct dentry *space_file;
struct dentry *verbose_file;
+ struct dentry *task_filter_file;
} entries;

#endif
Index: work-fault-inject/lib/fault-inject.c
===================================================================
--- work-fault-inject.orig/lib/fault-inject.c
+++ work-fault-inject/lib/fault-inject.c
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/module.h>
+#include <linux/interrupt.h>
#include <linux/fault-inject.h>

int setup_fault_attr(struct fault_attr *attr, char *str)
@@ -58,6 +59,11 @@ static void fail_dump(struct fault_attr
dump_stack();
}

+static int fail_task(struct fault_attr *attr, struct task_struct *task)
+{
+ return !in_interrupt() && task->make_it_fail;
+}
+
/*
* This code is stolen from failmalloc-1.0
* http://www.nongnu.org/failmalloc/
@@ -65,6 +71,9 @@ static void fail_dump(struct fault_attr

int should_fail(struct fault_attr *attr, ssize_t size)
{
+ if (attr->task_filter && !fail_task(attr, current))
+ return 0;
+
if (atomic_read(&max_failures(attr)) == 0)
return 0;

@@ -155,6 +164,10 @@ void cleanup_fault_attr_entries(struct f
debugfs_remove(attr->entries.verbose_file);
attr->entries.verbose_file = NULL;
}
+ if (attr->entries.task_filter_file) {
+ debugfs_remove(attr->entries.task_filter_file);
+ attr->entries.task_filter_file = NULL;
+ }
debugfs_remove(attr->entries.dir);
attr->entries.dir = NULL;
}
@@ -198,6 +211,12 @@ int init_fault_attr_entries(struct fault
goto fail;
attr->entries.verbose_file = file;

+ file = debugfs_create_bool("task-filter", mode, dir,
+ &attr->task_filter);
+ if (!file)
+ goto fail;
+ attr->entries.task_filter_file = file;
+
return 0;
fail:
cleanup_fault_attr_entries(attr);

--


2006-10-13 17:35:11

by Don Mullis

[permalink] [raw]
Subject: Re: [patch 6/7] process filtering for fault-injection capabilities

On Thu, 2006-10-12 at 16:43 +0900, Akinobu Mita wrote:
> This patch provides process filtering feature.
> The process filter allows failing only permitted processes
> by /proc/<pid>/make-it-fail

Akinobu: Toward the end of the previous round of review, we had
the following exchange:

On Tue, 2006-09-19 at 17:05 +0800, Akinobu Mita wrote:
On Mon, Sep 18, 2006 at 10:54:51PM -0700, Don Mullis wrote:
> > Add functionality to the process_filter variable: A negative argument
> > injects failures for only for pid==-process_filter, thereby permitting
> > per-process failures from boot time.
> >
>
> Is it better to add new filter for this purpose?
> Because someone may want to filter by tgid instead of pid.
>
> - positive value is for task->pid
> - nevative value is for task->tgid

Your idea sounds good to me.


So naturally I'm wondering why the functionality was dropped.
An application I had in mind was to identify which of the boot-time
calls to the slab allocator must not fail but are not yet marked
__GFP_NOFAIL (some experimentation showed that for pid 1 there are
lots of these).

Andrew: Would such an exercise would be worth the effort?





2006-10-13 18:52:54

by Andrew Morton

[permalink] [raw]
Subject: Re: [patch 6/7] process filtering for fault-injection capabilities

On Fri, 13 Oct 2006 10:28:54 -0700
Don Mullis <[email protected]> wrote:

> On Thu, 2006-10-12 at 16:43 +0900, Akinobu Mita wrote:
> > This patch provides process filtering feature.
> > The process filter allows failing only permitted processes
> > by /proc/<pid>/make-it-fail
>
> Akinobu: Toward the end of the previous round of review, we had
> the following exchange:
>
> On Tue, 2006-09-19 at 17:05 +0800, Akinobu Mita wrote:
> On Mon, Sep 18, 2006 at 10:54:51PM -0700, Don Mullis wrote:
> > > Add functionality to the process_filter variable: A negative argument
> > > injects failures for only for pid==-process_filter, thereby permitting
> > > per-process failures from boot time.
> > >
> >
> > Is it better to add new filter for this purpose?
> > Because someone may want to filter by tgid instead of pid.
> >
> > - positive value is for task->pid
> > - nevative value is for task->tgid
>
> Your idea sounds good to me.
>
>
> So naturally I'm wondering why the functionality was dropped.
> An application I had in mind was to identify which of the boot-time
> calls to the slab allocator must not fail but are not yet marked
> __GFP_NOFAIL (some experimentation showed that for pid 1 there are
> lots of these).
>
> Andrew: Would such an exercise would be worth the effort?
>

If we're looking for unchecked boot-time allocation failures then I'd say
no, there's not much point in adding code to check for these. We tend to
assume that the machine has enough memory to boot the kernel and initial
userspace.

That being said, some boot-time allocations are in code which could also
have been compiled into a module, so we do want to be checking those,
because we do care about memory-allocation failures at modprobe time. But
we can check for these by building the relevant driver as a module and then
testing it.



And the "if it's positive it's a pid, if it's negative it's a tgid"
interface is rather unpleasant - if we're going to do this we should use
separate control files, or use something like

echo "pid=1234" > /proc/process_filter
echo "tgid=4321" > /proc/process_filter