Hello Andrew,
A recent change in the fault injection code introduced an undesired change
in the behavior of the I/O timeout failure injection code. This series
restores the original behavior. Please consider these patches for kernel
v4.14.
Thanks,
Bart.
Changes compared to v1:
- Fixed build with CONFIG_FAIL_PAGE_ALLOC, CONFIG_FAIL_IO_TIMEOUT and/or
CONFIG_FAIL_FUTEX enabled.
Bart Van Assche (2):
fault-inject: Restore support for task-independent fault injection
block: Make I/O timeout failure injection again task-independent
block/blk-timeout.c | 2 +-
include/linux/fault-inject.h | 11 +++++++++--
lib/fault-inject.c | 4 +++-
3 files changed, 13 insertions(+), 4 deletions(-)
--
2.14.0
Certain faults should be injected independent of the context
in which these occur. Commit e41d58185f14 made it impossible to
inject faults independent of their context. Restore support for
task-independent fault injection by adding the attribute 'global'.
References: commit e41d58185f14 ("fault-inject: support systematic fault injection")
Signed-off-by: Bart Van Assche <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Akinobu Mita <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Andrew Morton <[email protected]>
---
include/linux/fault-inject.h | 11 +++++++++--
lib/fault-inject.c | 4 +++-
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index 728d4e0292aa..88dae2f21881 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -18,6 +18,7 @@ struct fault_attr {
atomic_t times;
atomic_t space;
unsigned long verbose;
+ bool global;
bool task_filter;
unsigned long stacktrace_depth;
unsigned long require_start;
@@ -30,17 +31,23 @@ struct fault_attr {
struct dentry *dname;
};
-#define FAULT_ATTR_INITIALIZER { \
+#define __FAULT_ATTR_INITIALIZER(__global) { \
.interval = 1, \
.times = ATOMIC_INIT(1), \
.require_end = ULONG_MAX, \
+ .global = (__global), \
.stacktrace_depth = 32, \
.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
.verbose = 2, \
.dname = NULL, \
}
-#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
+#define FAULT_ATTR_INITIALIZER __FAULT_ATTR_INITIALIZER(false)
+
+#define DECLARE_FAULT_ATTR(name) \
+ struct fault_attr name = __FAULT_ATTR_INITIALIZER(false)
+#define DECLARE_GLOBAL_FAULT_ATTR(name) \
+ struct fault_attr name = __FAULT_ATTR_INITIALIZER(true)
int setup_fault_attr(struct fault_attr *attr, char *str);
bool should_fail(struct fault_attr *attr, ssize_t size);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 7d315fdb9f13..c8f6ef5df3c6 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -107,7 +107,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
bool should_fail(struct fault_attr *attr, ssize_t size)
{
- if (in_task()) {
+ if (!attr->global && in_task()) {
unsigned int fail_nth = READ_ONCE(current->fail_nth);
if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
@@ -224,6 +224,8 @@ struct dentry *fault_create_debugfs_attr(const char *name,
if (!debugfs_create_u32("verbose_ratelimit_burst", mode, dir,
&attr->ratelimit_state.burst))
goto fail;
+ if (!debugfs_create_bool("global", mode, dir, &attr->global))
+ goto fail;
if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
goto fail;
--
2.14.0
Commit e41d58185f14 made all faults that are triggered from task
context, including I/O timeouts, dependent on the failure
injection settings for that task. Make it again possible to inject
I/O timeout failures independent of task context. An example for a
fault injection for which this patch makes a difference (from a VM
booted with the 'threadirqs' command line parameter):
FAULT_INJECTION: forcing a failure. name fail_io_timeout, interval 1, probability 100, space 0, times 1
CPU: 6 PID: 1253 Comm: irq/36-skd0-msi Not tainted 4.13.0-rc2-dbg+ #5
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
Call Trace:
dump_stack+0x8e/0xcd
should_fail+0x154/0x160
blk_should_fake_timeout+0x27/0x30
blk_mq_complete_request+0x11/0x30
skd_end_request+0x32/0xc0 [skd]
skd_isr_completion_posted.isra.30+0x14a/0x400 [skd]
skd_comp_q+0x40/0xb0 [skd]
irq_forced_thread_fn+0x2a/0x70
irq_thread+0x144/0x1a0
kthread+0x125/0x140
ret_from_fork+0x2a/0x40
Fixes: commit e41d58185f14 ("fault-inject: support systematic fault injection")
Signed-off-by: Bart Van Assche <[email protected]>
Cc: Jens Axboe <[email protected]>
Cc: Dmitry Vyukov <[email protected]>
Cc: Akinobu Mita <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Andrew Morton <[email protected]>
---
block/blk-timeout.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 17ec83bb0900..2e92b20b125f 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -11,7 +11,7 @@
#ifdef CONFIG_FAIL_IO_TIMEOUT
-static DECLARE_FAULT_ATTR(fail_io_timeout);
+static DECLARE_GLOBAL_FAULT_ATTR(fail_io_timeout);
static int __init setup_fail_io_timeout(char *str)
{
--
2.14.0
2017-08-23 8:00 GMT+09:00 Bart Van Assche <[email protected]>:
> Certain faults should be injected independent of the context
> in which these occur. Commit e41d58185f14 made it impossible to
> inject faults independent of their context. Restore support for
> task-independent fault injection by adding the attribute 'global'.
There was a the problem reported by fail-make-request user and the
problem is introduced by the follow-up patches for systematic
fault injection.
Please check the commit 9eeb52ae712e ("fault-inject: fix wrong
should_fail() decision in task context") and see if the problem
you reported is identical to the commit.
> References: commit e41d58185f14 ("fault-inject: support systematic fault injection")
> Signed-off-by: Bart Van Assche <[email protected]>
> Cc: Dmitry Vyukov <[email protected]>
> Cc: Akinobu Mita <[email protected]>
> Cc: Michal Hocko <[email protected]>
> Cc: Andrew Morton <[email protected]>
> ---
> include/linux/fault-inject.h | 11 +++++++++--
> lib/fault-inject.c | 4 +++-
> 2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index 728d4e0292aa..88dae2f21881 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -18,6 +18,7 @@ struct fault_attr {
> atomic_t times;
> atomic_t space;
> unsigned long verbose;
> + bool global;
> bool task_filter;
> unsigned long stacktrace_depth;
> unsigned long require_start;
> @@ -30,17 +31,23 @@ struct fault_attr {
> struct dentry *dname;
> };
>
> -#define FAULT_ATTR_INITIALIZER { \
> +#define __FAULT_ATTR_INITIALIZER(__global) { \
> .interval = 1, \
> .times = ATOMIC_INIT(1), \
> .require_end = ULONG_MAX, \
> + .global = (__global), \
> .stacktrace_depth = 32, \
> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
> .verbose = 2, \
> .dname = NULL, \
> }
>
> -#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
> +#define FAULT_ATTR_INITIALIZER __FAULT_ATTR_INITIALIZER(false)
> +
> +#define DECLARE_FAULT_ATTR(name) \
> + struct fault_attr name = __FAULT_ATTR_INITIALIZER(false)
> +#define DECLARE_GLOBAL_FAULT_ATTR(name) \
> + struct fault_attr name = __FAULT_ATTR_INITIALIZER(true)
> int setup_fault_attr(struct fault_attr *attr, char *str);
> bool should_fail(struct fault_attr *attr, ssize_t size);
>
> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
> index 7d315fdb9f13..c8f6ef5df3c6 100644
> --- a/lib/fault-inject.c
> +++ b/lib/fault-inject.c
> @@ -107,7 +107,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>
> bool should_fail(struct fault_attr *attr, ssize_t size)
> {
> - if (in_task()) {
> + if (!attr->global && in_task()) {
> unsigned int fail_nth = READ_ONCE(current->fail_nth);
>
> if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
> @@ -224,6 +224,8 @@ struct dentry *fault_create_debugfs_attr(const char *name,
> if (!debugfs_create_u32("verbose_ratelimit_burst", mode, dir,
> &attr->ratelimit_state.burst))
> goto fail;
> + if (!debugfs_create_bool("global", mode, dir, &attr->global))
> + goto fail;
> if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
> goto fail;
>
> --
> 2.14.0
>
On Wed, Aug 23, 2017 at 2:10 AM, Akinobu Mita <[email protected]> wrote:
> 2017-08-23 8:00 GMT+09:00 Bart Van Assche <[email protected]>:
>> Certain faults should be injected independent of the context
>> in which these occur. Commit e41d58185f14 made it impossible to
>> inject faults independent of their context. Restore support for
>> task-independent fault injection by adding the attribute 'global'.
>
> There was a the problem reported by fail-make-request user and the
> problem is introduced by the follow-up patches for systematic
> fault injection.
>
> Please check the commit 9eeb52ae712e ("fault-inject: fix wrong
> should_fail() decision in task context") and see if the problem
> you reported is identical to the commit.
Agree.
Otherwise I don't understand this commit. We now have 2 orthogonal
injection mechanisms: one global (original) and the new local. If one
needs global injection, he/she just enables the global one. We don't
seem to need the global flag on fault attributes.
>> References: commit e41d58185f14 ("fault-inject: support systematic fault injection")
>> Signed-off-by: Bart Van Assche <[email protected]>
>> Cc: Dmitry Vyukov <[email protected]>
>> Cc: Akinobu Mita <[email protected]>
>> Cc: Michal Hocko <[email protected]>
>> Cc: Andrew Morton <[email protected]>
>> ---
>> include/linux/fault-inject.h | 11 +++++++++--
>> lib/fault-inject.c | 4 +++-
>> 2 files changed, 12 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
>> index 728d4e0292aa..88dae2f21881 100644
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -18,6 +18,7 @@ struct fault_attr {
>> atomic_t times;
>> atomic_t space;
>> unsigned long verbose;
>> + bool global;
>> bool task_filter;
>> unsigned long stacktrace_depth;
>> unsigned long require_start;
>> @@ -30,17 +31,23 @@ struct fault_attr {
>> struct dentry *dname;
>> };
>>
>> -#define FAULT_ATTR_INITIALIZER { \
>> +#define __FAULT_ATTR_INITIALIZER(__global) { \
>> .interval = 1, \
>> .times = ATOMIC_INIT(1), \
>> .require_end = ULONG_MAX, \
>> + .global = (__global), \
>> .stacktrace_depth = 32, \
>> .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
>> .verbose = 2, \
>> .dname = NULL, \
>> }
>>
>> -#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
>> +#define FAULT_ATTR_INITIALIZER __FAULT_ATTR_INITIALIZER(false)
>> +
>> +#define DECLARE_FAULT_ATTR(name) \
>> + struct fault_attr name = __FAULT_ATTR_INITIALIZER(false)
>> +#define DECLARE_GLOBAL_FAULT_ATTR(name) \
>> + struct fault_attr name = __FAULT_ATTR_INITIALIZER(true)
>> int setup_fault_attr(struct fault_attr *attr, char *str);
>> bool should_fail(struct fault_attr *attr, ssize_t size);
>>
>> diff --git a/lib/fault-inject.c b/lib/fault-inject.c
>> index 7d315fdb9f13..c8f6ef5df3c6 100644
>> --- a/lib/fault-inject.c
>> +++ b/lib/fault-inject.c
>> @@ -107,7 +107,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
>>
>> bool should_fail(struct fault_attr *attr, ssize_t size)
>> {
>> - if (in_task()) {
>> + if (!attr->global && in_task()) {
>> unsigned int fail_nth = READ_ONCE(current->fail_nth);
>>
>> if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1))
>> @@ -224,6 +224,8 @@ struct dentry *fault_create_debugfs_attr(const char *name,
>> if (!debugfs_create_u32("verbose_ratelimit_burst", mode, dir,
>> &attr->ratelimit_state.burst))
>> goto fail;
>> + if (!debugfs_create_bool("global", mode, dir, &attr->global))
>> + goto fail;
>> if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
>> goto fail;
>>
>> --
>> 2.14.0
>>
On Wed, 2017-08-23 at 09:10 +0900, Akinobu Mita wrote:
> 2017-08-23 8:00 GMT+09:00 Bart Van Assche <[email protected]>:
> > Certain faults should be injected independent of the context
> > in which these occur. Commit e41d58185f14 made it impossible to
> > inject faults independent of their context. Restore support for
> > task-independent fault injection by adding the attribute 'global'.
>
> There was a the problem reported by fail-make-request user and the
> problem is introduced by the follow-up patches for systematic
> fault injection.
>
> Please check the commit 9eeb52ae712e ("fault-inject: fix wrong
> should_fail() decision in task context") and see if the problem
> you reported is identical to the commit.
Hello Akinobu,
Since I was using the block layer for-next branch as the basis of my tests,
commit 9eeb52ae712e was not present in the kernel tree used in my tests.
However, with that commit 9eeb52ae712e applied I don't need my two patches
anymore to inject block layer timeout faults successfully. So I'm fine with
dropping my two patches.
Sorry for the noise.
Bart.