I've found a regression of nilfs2 file system in 2.6.39-rc1.
The recent per-queue plugging removal patch causes kernel oopses in
nilfs. The following patch will fix this.
I'll send it to Linus along with other bug-fixes.
Thanks,
Ryusuke Konishi
--
From: Ryusuke Konishi <[email protected]>
nilfs2: fix oops due to a bad aops initialization
Nilfs in 2.6.39-rc1 hit the following oops:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
IP: [<ffffffff810ac235>] try_to_release_page+0x2a/0x3d
PGD 234cb6067 PUD 234c72067 PMD 0
Oops: 0000 [#1] SMP
<snip>
Process truncate (pid: 10995, threadinfo ffff8802353c2000, task ffff880234cfa000)
Stack:
ffff8802333c77b8 ffffffff810b64b0 0000000000003802 ffffffffa0052cca
0000000000000000 ffff8802353c3b58 0000000000000000 ffff8802353c3b58
0000000000000001 0000000000000000 ffffea0007b92308 ffffea0007b92308
Call Trace:
[<ffffffff810b64b0>] ? invalidate_inode_pages2_range+0x15f/0x273
[<ffffffffa0052cca>] ? nilfs_palloc_get_block+0x2d/0xaf [nilfs2]
[<ffffffff810589e7>] ? bit_waitqueue+0x14/0xa1
[<ffffffff81058ab1>] ? wake_up_bit+0x10/0x20
[<ffffffffa00433fd>] ? nilfs_forget_buffer+0x66/0x7a [nilfs2]
[<ffffffffa00467b8>] ? nilfs_btree_concat_left+0x5c/0x77 [nilfs2]
[<ffffffffa00471fc>] ? nilfs_btree_delete+0x395/0x3cf [nilfs2]
[<ffffffffa00449a3>] ? nilfs_bmap_do_delete+0x6e/0x79 [nilfs2]
[<ffffffffa0045845>] ? nilfs_btree_last_key+0x14b/0x15e [nilfs2]
[<ffffffffa00449dd>] ? nilfs_bmap_truncate+0x2f/0x83 [nilfs2]
[<ffffffffa0044ab2>] ? nilfs_bmap_last_key+0x35/0x62 [nilfs2]
[<ffffffffa003e99b>] ? nilfs_truncate_bmap+0x6b/0xc7 [nilfs2]
[<ffffffffa003ee4a>] ? nilfs_truncate+0x79/0xe4 [nilfs2]
[<ffffffff810b6c00>] ? vmtruncate+0x33/0x3b
[<ffffffffa003e8f1>] ? nilfs_setattr+0x4d/0x8c [nilfs2]
[<ffffffff81026106>] ? do_page_fault+0x31b/0x356
[<ffffffff810f9d61>] ? notify_change+0x17d/0x262
[<ffffffff810e5046>] ? do_truncate+0x65/0x80
[<ffffffff810e52af>] ? sys_ftruncate+0xf1/0xf6
[<ffffffff8132c012>] ? system_call_fastpath+0x16/0x1b
Code: c3 48 83 ec 08 48 8b 17 48 8b 47 18 80 e2 01 75 04 0f 0b eb fe 48 8b 17 80 e6 20 74 05 31 c0 41 59 c3 48 85 c0 74 11 48 8b 40 58
8b 40 48 48 85 c0 74 04 41 58 ff e0 59 e9 b1 b5 05 00 41 54
RIP [<ffffffff810ac235>] try_to_release_page+0x2a/0x3d
RSP <ffff8802353c3b08>
CR2: 0000000000000048
This oops was brought in by the change "block: remove per-queue
plugging" (commit: 7eaceaccab5f40bb). It initializes mapping->a_ops
with a NULL pointer for some pages in nilfs (e.g. btree node pages),
but mm code doesn't NULL pointer checks against mapping->a_ops. (the
check is done for each callback function)
This corrects the aops initialization and fixes the oops.
Signed-off-by: Ryusuke Konishi <[email protected]>
Cc: Jens Axboe <[email protected]>
---
fs/nilfs2/page.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 4d2a1ee..9d2dc6b 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
void nilfs_mapping_init(struct address_space *mapping,
struct backing_dev_info *bdi)
{
+ static const struct address_space_operations empty_aops;
+
mapping->host = NULL;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = bdi;
- mapping->a_ops = NULL;
+ mapping->a_ops = &empty_aops;
}
/*
--
1.7.3.5
On 2011-03-30 06:25, Ryusuke Konishi wrote:
> I've found a regression of nilfs2 file system in 2.6.39-rc1.
>
> The recent per-queue plugging removal patch causes kernel oopses in
> nilfs. The following patch will fix this.
>
> I'll send it to Linus along with other bug-fixes.
Irk, I guess we need the empty aops then.
You can add my acked-by to the patch, sorry about that.
--
Jens Axboe
On 2011-03-30 06:25, Ryusuke Konishi wrote:
> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
> index 4d2a1ee..9d2dc6b 100644
> --- a/fs/nilfs2/page.c
> +++ b/fs/nilfs2/page.c
> @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
> void nilfs_mapping_init(struct address_space *mapping,
> struct backing_dev_info *bdi)
> {
> + static const struct address_space_operations empty_aops;
> +
> mapping->host = NULL;
> mapping->flags = 0;
> mapping_set_gfp_mask(mapping, GFP_NOFS);
> mapping->assoc_mapping = NULL;
> mapping->backing_dev_info = bdi;
> - mapping->a_ops = NULL;
> + mapping->a_ops = &empty_aops;
> }
Hmm wait, inode init should set the mapping aops to an empty type
already. Does the OOPS go away if you just remove the mapping->a_ops =
NULL assignment?
--
Jens Axboe
On 2011-03-30 13:07, Jens Axboe wrote:
> On 2011-03-30 06:25, Ryusuke Konishi wrote:
>> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
>> index 4d2a1ee..9d2dc6b 100644
>> --- a/fs/nilfs2/page.c
>> +++ b/fs/nilfs2/page.c
>> @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
>> void nilfs_mapping_init(struct address_space *mapping,
>> struct backing_dev_info *bdi)
>> {
>> + static const struct address_space_operations empty_aops;
>> +
>> mapping->host = NULL;
>> mapping->flags = 0;
>> mapping_set_gfp_mask(mapping, GFP_NOFS);
>> mapping->assoc_mapping = NULL;
>> mapping->backing_dev_info = bdi;
>> - mapping->a_ops = NULL;
>> + mapping->a_ops = &empty_aops;
>> }
>
> Hmm wait, inode init should set the mapping aops to an empty type
> already. Does the OOPS go away if you just remove the mapping->a_ops =
> NULL assignment?
In any case, I think this is a better fix.
diff --git a/fs/inode.c b/fs/inode.c
index 5f4e11a..b818730 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -125,6 +125,13 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
static DECLARE_RWSEM(iprune_sem);
/*
+ * Empty aops. Can be used for the cases where the user does not
+ * define any of the address_space operations.
+ */
+const struct address_space_operations empty_aops = {
+};
+
+/*
* Statistics gathering..
*/
struct inodes_stat_t inodes_stat;
@@ -176,7 +183,6 @@ int proc_nr_inodes(ctl_table *table, int write,
*/
int inode_init_always(struct super_block *sb, struct inode *inode)
{
- static const struct address_space_operations empty_aops;
static const struct inode_operations empty_iops;
static const struct file_operations empty_fops;
struct address_space *const mapping = &inode->i_data;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 4d2a1ee..1168059 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -500,7 +500,7 @@ void nilfs_mapping_init(struct address_space *mapping,
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = bdi;
- mapping->a_ops = NULL;
+ mapping->a_ops = &empty_aops;
}
/*
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f..403c192 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,7 +80,6 @@ enum {
};
static const struct inode_operations none_inode_operations;
-static const struct address_space_operations none_address_operations;
static const struct file_operations none_file_operations;
/**
@@ -130,7 +129,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
}
/* Re-define all operations to be "nothing" */
- inode->i_mapping->a_ops = &none_address_operations;
+ inode->i_mapping->a_ops = &empty_aops;
inode->i_op = &none_inode_operations;
inode->i_fop = &none_file_operations;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 52f283c..1b95af3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -613,6 +613,8 @@ struct address_space_operations {
int (*error_remove_page)(struct address_space *, struct page *);
};
+extern const struct address_space_operations empty_aops;
+
/*
* pagecache_write_begin/pagecache_write_end must be used by general code
* to write into the pagecache.
--
Jens Axboe
On Wed, 30 Mar 2011 13:07:46 +0200, Jens Axboe <[email protected]> wrote:
> On 2011-03-30 06:25, Ryusuke Konishi wrote:
> > diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
> > index 4d2a1ee..9d2dc6b 100644
> > --- a/fs/nilfs2/page.c
> > +++ b/fs/nilfs2/page.c
> > @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
> > void nilfs_mapping_init(struct address_space *mapping,
> > struct backing_dev_info *bdi)
> > {
> > + static const struct address_space_operations empty_aops;
> > +
> > mapping->host = NULL;
> > mapping->flags = 0;
> > mapping_set_gfp_mask(mapping, GFP_NOFS);
> > mapping->assoc_mapping = NULL;
> > mapping->backing_dev_info = bdi;
> > - mapping->a_ops = NULL;
> > + mapping->a_ops = &empty_aops;
> > }
>
> Hmm wait, inode init should set the mapping aops to an empty type
> already. Does the OOPS go away if you just remove the mapping->a_ops =
> NULL assignment?
>
> --
> Jens Axboe
Nilfs has two mappings in each inode, one for data pages and another
for btree nodes. The aops initialization is neede for the latter one.
Ryusuke Konishi
On Wed, 30 Mar 2011 13:17:18 +0200, Jens Axboe wrote:
> On 2011-03-30 13:07, Jens Axboe wrote:
> > On 2011-03-30 06:25, Ryusuke Konishi wrote:
> >> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
> >> index 4d2a1ee..9d2dc6b 100644
> >> --- a/fs/nilfs2/page.c
> >> +++ b/fs/nilfs2/page.c
> >> @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
> >> void nilfs_mapping_init(struct address_space *mapping,
> >> struct backing_dev_info *bdi)
> >> {
> >> + static const struct address_space_operations empty_aops;
> >> +
> >> mapping->host = NULL;
> >> mapping->flags = 0;
> >> mapping_set_gfp_mask(mapping, GFP_NOFS);
> >> mapping->assoc_mapping = NULL;
> >> mapping->backing_dev_info = bdi;
> >> - mapping->a_ops = NULL;
> >> + mapping->a_ops = &empty_aops;
> >> }
> >
> > Hmm wait, inode init should set the mapping aops to an empty type
> > already. Does the OOPS go away if you just remove the mapping->a_ops =
> > NULL assignment?
>
> In any case, I think this is a better fix.
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 5f4e11a..b818730 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -125,6 +125,13 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
> static DECLARE_RWSEM(iprune_sem);
>
> /*
> + * Empty aops. Can be used for the cases where the user does not
> + * define any of the address_space operations.
> + */
> +const struct address_space_operations empty_aops = {
> +};
> +
> +/*
> * Statistics gathering..
> */
> struct inodes_stat_t inodes_stat;
> @@ -176,7 +183,6 @@ int proc_nr_inodes(ctl_table *table, int write,
> */
> int inode_init_always(struct super_block *sb, struct inode *inode)
> {
> - static const struct address_space_operations empty_aops;
> static const struct inode_operations empty_iops;
> static const struct file_operations empty_fops;
> struct address_space *const mapping = &inode->i_data;
> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
> index 4d2a1ee..1168059 100644
> --- a/fs/nilfs2/page.c
> +++ b/fs/nilfs2/page.c
> @@ -500,7 +500,7 @@ void nilfs_mapping_init(struct address_space *mapping,
> mapping_set_gfp_mask(mapping, GFP_NOFS);
> mapping->assoc_mapping = NULL;
> mapping->backing_dev_info = bdi;
> - mapping->a_ops = NULL;
> + mapping->a_ops = &empty_aops;
> }
>
> /*
> diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
> index c74400f..403c192 100644
> --- a/fs/ubifs/xattr.c
> +++ b/fs/ubifs/xattr.c
> @@ -80,7 +80,6 @@ enum {
> };
>
> static const struct inode_operations none_inode_operations;
> -static const struct address_space_operations none_address_operations;
> static const struct file_operations none_file_operations;
>
> /**
> @@ -130,7 +129,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
> }
>
> /* Re-define all operations to be "nothing" */
> - inode->i_mapping->a_ops = &none_address_operations;
> + inode->i_mapping->a_ops = &empty_aops;
> inode->i_op = &none_inode_operations;
> inode->i_fop = &none_file_operations;
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 52f283c..1b95af3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -613,6 +613,8 @@ struct address_space_operations {
> int (*error_remove_page)(struct address_space *, struct page *);
> };
>
> +extern const struct address_space_operations empty_aops;
> +
> /*
> * pagecache_write_begin/pagecache_write_end must be used by general code
> * to write into the pagecache.
>
> --
> Jens Axboe
Sorry but I've already sent a pull request to Linus with your
Acked-by. And, this seems to be doing two things. One is for fixing
the current kerenl oops and another is for sharing the empty address
space operations.
I have no problem with both patches, but for the purpose of the bug
fix, the first one looks simpler.
Thanks,
Ryusuke Konishi
On 2011-03-30 13:48, Ryusuke Konishi wrote:
> On Wed, 30 Mar 2011 13:17:18 +0200, Jens Axboe wrote:
>> On 2011-03-30 13:07, Jens Axboe wrote:
>>> On 2011-03-30 06:25, Ryusuke Konishi wrote:
>>>> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
>>>> index 4d2a1ee..9d2dc6b 100644
>>>> --- a/fs/nilfs2/page.c
>>>> +++ b/fs/nilfs2/page.c
>>>> @@ -495,12 +495,14 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
>>>> void nilfs_mapping_init(struct address_space *mapping,
>>>> struct backing_dev_info *bdi)
>>>> {
>>>> + static const struct address_space_operations empty_aops;
>>>> +
>>>> mapping->host = NULL;
>>>> mapping->flags = 0;
>>>> mapping_set_gfp_mask(mapping, GFP_NOFS);
>>>> mapping->assoc_mapping = NULL;
>>>> mapping->backing_dev_info = bdi;
>>>> - mapping->a_ops = NULL;
>>>> + mapping->a_ops = &empty_aops;
>>>> }
>>>
>>> Hmm wait, inode init should set the mapping aops to an empty type
>>> already. Does the OOPS go away if you just remove the mapping->a_ops =
>>> NULL assignment?
>>
>> In any case, I think this is a better fix.
>>
>> diff --git a/fs/inode.c b/fs/inode.c
>> index 5f4e11a..b818730 100644
>> --- a/fs/inode.c
>> +++ b/fs/inode.c
>> @@ -125,6 +125,13 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
>> static DECLARE_RWSEM(iprune_sem);
>>
>> /*
>> + * Empty aops. Can be used for the cases where the user does not
>> + * define any of the address_space operations.
>> + */
>> +const struct address_space_operations empty_aops = {
>> +};
>> +
>> +/*
>> * Statistics gathering..
>> */
>> struct inodes_stat_t inodes_stat;
>> @@ -176,7 +183,6 @@ int proc_nr_inodes(ctl_table *table, int write,
>> */
>> int inode_init_always(struct super_block *sb, struct inode *inode)
>> {
>> - static const struct address_space_operations empty_aops;
>> static const struct inode_operations empty_iops;
>> static const struct file_operations empty_fops;
>> struct address_space *const mapping = &inode->i_data;
>> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
>> index 4d2a1ee..1168059 100644
>> --- a/fs/nilfs2/page.c
>> +++ b/fs/nilfs2/page.c
>> @@ -500,7 +500,7 @@ void nilfs_mapping_init(struct address_space *mapping,
>> mapping_set_gfp_mask(mapping, GFP_NOFS);
>> mapping->assoc_mapping = NULL;
>> mapping->backing_dev_info = bdi;
>> - mapping->a_ops = NULL;
>> + mapping->a_ops = &empty_aops;
>> }
>>
>> /*
>> diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
>> index c74400f..403c192 100644
>> --- a/fs/ubifs/xattr.c
>> +++ b/fs/ubifs/xattr.c
>> @@ -80,7 +80,6 @@ enum {
>> };
>>
>> static const struct inode_operations none_inode_operations;
>> -static const struct address_space_operations none_address_operations;
>> static const struct file_operations none_file_operations;
>>
>> /**
>> @@ -130,7 +129,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
>> }
>>
>> /* Re-define all operations to be "nothing" */
>> - inode->i_mapping->a_ops = &none_address_operations;
>> + inode->i_mapping->a_ops = &empty_aops;
>> inode->i_op = &none_inode_operations;
>> inode->i_fop = &none_file_operations;
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 52f283c..1b95af3 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -613,6 +613,8 @@ struct address_space_operations {
>> int (*error_remove_page)(struct address_space *, struct page *);
>> };
>>
>> +extern const struct address_space_operations empty_aops;
>> +
>> /*
>> * pagecache_write_begin/pagecache_write_end must be used by general code
>> * to write into the pagecache.
>>
>> --
>> Jens Axboe
>
> Sorry but I've already sent a pull request to Linus with your
> Acked-by. And, this seems to be doing two things. One is for fixing
> the current kerenl oops and another is for sharing the empty address
> space operations.
>
> I have no problem with both patches, but for the purpose of the bug
> fix, the first one looks simpler.
No worries, I'll just merge later on.
--
Jens Axboe