When we open/rename/unlink a file and open/rmdir a directory, the inode
nlink can't be zero, if it does, the file system is inconsistency,
and it can cause some unexpected errors, so add aggressive detection.
Signed-off-by: yi zhang <[email protected]>
---
fs/namei.c | 44 ++++++++++++++++++++++++++++++++++----------
include/linux/fs.h | 2 ++
2 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index ad74877..a39bf7c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -257,6 +257,23 @@ void putname(struct filename *name)
__putname(name);
}
+int generic_validate(struct inode *inode)
+{
+ if (unlikely(inode->i_nlink == 0))
+ return -EUCLEAN;
+}
+EXPORT_SYMBOL(generic_validate);
+
+static inline int inode_validate(struct inode *inode)
+{
+ int retval = 0;
+
+ if (inode->i_op->validate)
+ retval = inode->i_op->validate(inode);
+
+ return retval;
+}
+
static int check_acl(struct inode *inode, int mask)
{
#ifdef CONFIG_FS_POSIX_ACL
@@ -2716,20 +2733,21 @@ EXPORT_SYMBOL(__check_sticky);
* Check whether we can remove a link victim from directory dir, check
* whether the type of victim is right.
* 1. We can't do it if dir is read-only (done in permission())
- * 2. We should have write and exec permissions on dir
- * 3. We can't remove anything from append-only dir
- * 4. We can't do anything with immutable dir (done in permission())
- * 5. If the sticky bit on dir is set we should either
+ * 2. We should validate the victim's inode
+ * 3. We should have write and exec permissions on dir
+ * 4. We can't remove anything from append-only dir
+ * 5. We can't do anything with immutable dir (done in permission())
+ * 6. If the sticky bit on dir is set we should either
* a. be owner of dir, or
* b. be owner of victim, or
* c. have CAP_FOWNER capability
- * 6. If the victim is append-only or immutable we can't do antyhing with
+ * 7. If the victim is append-only or immutable we can't do antyhing with
* links pointing to it.
- * 7. If the victim has an unknown uid or gid we can't change the inode.
- * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
- * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
- * 10. We can't remove a root or mountpoint.
- * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
+ * 8. If the victim has an unknown uid or gid we can't change the inode.
+ * 9. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ * 10. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ * 11. We can't remove a root or mountpoint.
+ * 12. We don't allow removal of NFS sillyrenamed files; it's handled by
* nfs_async_unlink().
*/
static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
@@ -2744,6 +2762,9 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
BUG_ON(victim->d_parent->d_inode != dir);
audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
+ error = inode_validate(inode);
+ if (error)
+ return error;
error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -2889,6 +2910,9 @@ static int may_open(const struct path *path, int acc_mode, int flag)
break;
}
+ error = inode_validate(inode);
+ if (error)
+ return error;
error = inode_permission(inode, MAY_OPEN | acc_mode);
if (error)
return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba0743..52910f7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1710,6 +1710,7 @@ struct inode_operations {
umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
int (*set_acl)(struct inode *, struct posix_acl *, int);
+ int (*validate)(struct inode *);
} ____cacheline_aligned;
ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@ -2534,6 +2535,7 @@ extern int inode_permission(struct inode *, int);
extern int __inode_permission(struct inode *, int);
extern int generic_permission(struct inode *, int);
extern int __check_sticky(struct inode *dir, struct inode *inode);
+extern int generic_validate(struct inode *inode);
static inline bool execute_ok(struct inode *inode)
{
--
2.5.0
Because of the disk and hardware issue, the inode->i_nlink of ext4
becomes zero abnormally but the dentry is still positive, it will
cause memory corruption after the following process:
1) Due to the inode->i_nlink is 0, this inode will be added into
the orhpan list,
2) ext4_rename() cover this inode, and drop_nlink() will underflow
the inode->i_nlink to 0xFFFFFFFF,
3) iput() add this inode to LRU,
4) evict() will call destroy_inode() to destroy this inode but
skip removing it from the orphan list,
5) after this, the inode's memory address space will be used by
other module, when the ext4 filesystem change the orphan list, it will
trample other module's data and then may cause oops.
This patch detect inode->i_nlink in i_op->valitate, if it becomes zero
abnormally, we call ext4_error and return -EFSCORRUPTED.
Signed-off-by: yi zhang <[email protected]>
---
fs/ext4/ext4.h | 1 +
fs/ext4/file.c | 1 +
fs/ext4/inode.c | 10 ++++++++++
fs/ext4/namei.c | 2 ++
fs/ext4/symlink.c | 3 +++
5 files changed, 17 insertions(+)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2163c1e..451f3b1f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2472,6 +2472,7 @@ extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
+extern int ext4_validate(struct inode *inode);
extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
extern int ext4_sync_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d663d3d..8c6c702 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -762,5 +762,6 @@ const struct inode_operations ext4_file_inode_operations = {
.get_acl = ext4_get_acl,
.set_acl = ext4_set_acl,
.fiemap = ext4_fiemap,
+ .validate = ext4_validate,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 88d57af..6f5c393 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5382,6 +5382,16 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
+int ext4_validate(struct inode *inode)
+{
+ if (inode->i_nlink == 0) {
+ EXT4_ERROR_INODE(inode, "bad nlink value: %lu", inode->i_nlink);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
int pextents)
{
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index eadba91..0396fe2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3888,6 +3888,7 @@ const struct inode_operations ext4_dir_inode_operations = {
.get_acl = ext4_get_acl,
.set_acl = ext4_set_acl,
.fiemap = ext4_fiemap,
+ .validate = ext4_validate,
};
const struct inode_operations ext4_special_inode_operations = {
@@ -3895,4 +3896,5 @@ const struct inode_operations ext4_special_inode_operations = {
.listxattr = ext4_listxattr,
.get_acl = ext4_get_acl,
.set_acl = ext4_set_acl,
+ .validate = ext4_validate,
};
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 73b184d..92377d0 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -86,16 +86,19 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.get_link = ext4_encrypted_get_link,
.setattr = ext4_setattr,
.listxattr = ext4_listxattr,
+ .validate = ext4_validate,
};
const struct inode_operations ext4_symlink_inode_operations = {
.get_link = page_get_link,
.setattr = ext4_setattr,
.listxattr = ext4_listxattr,
+ .validate = ext4_validate,
};
const struct inode_operations ext4_fast_symlink_inode_operations = {
.get_link = simple_get_link,
.setattr = ext4_setattr,
.listxattr = ext4_listxattr,
+ .validate = ext4_validate,
};
--
2.5.0
Hi yi,
[auto build test WARNING on ext4/dev]
[also build test WARNING on v4.10-rc4 next-20170118]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/yi-zhang/vfs-add-detection-of-inode-validation/20170119-013142
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: i386-randconfig-i1-201703 (attached as .config)
compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
reproduce:
# save the attached .config to linux build tree
make ARCH=i386
All warnings (new ones prefixed by >>):
fs/ext4/inode.c: In function 'ext4_validate':
>> fs/ext4/inode.c:5388:3: warning: format '%lu' expects argument of type 'long unsigned int', but argument 6 has type 'unsigned int' [-Wformat=]
EXT4_ERROR_INODE(inode, "bad nlink value: %lu", inode->i_nlink);
^
vim +5388 fs/ext4/inode.c
5372 * allocation is done, we will have i_blocks inconsistent with
5373 * on-disk file blocks.
5374 * We always keep i_blocks updated together with real
5375 * allocation. But to not confuse with user, stat
5376 * will return the blocks that include the delayed allocation
5377 * blocks for this file.
5378 */
5379 delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
5380 EXT4_I(inode)->i_reserved_data_blocks);
5381 stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
5382 return 0;
5383 }
5384
5385 int ext4_validate(struct inode *inode)
5386 {
5387 if (inode->i_nlink == 0) {
> 5388 EXT4_ERROR_INODE(inode, "bad nlink value: %lu", inode->i_nlink);
5389 return -EFSCORRUPTED;
5390 }
5391
5392 return 0;
5393 }
5394
5395 static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
5396 int pextents)
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
Hi yi,
[auto build test WARNING on ext4/dev]
[also build test WARNING on v4.10-rc4 next-20170118]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
url: https://github.com/0day-ci/linux/commits/yi-zhang/vfs-add-detection-of-inode-validation/20170119-013142
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: x86_64-randconfig-s1-01190013 (attached as .config)
compiler: gcc-4.4 (Debian 4.4.7-8) 4.4.7
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64
All warnings (new ones prefixed by >>):
fs/ext4/inode.c: In function 'ext4_validate':
>> fs/ext4/inode.c:5388: warning: format '%lu' expects type 'long unsigned int', but argument 6 has type 'unsigned int'
fs/ext4/inode.o: warning: objtool: ext4_inode_journal_mode()+0xc0: function has unreachable instruction
vim +5388 fs/ext4/inode.c
5372 * allocation is done, we will have i_blocks inconsistent with
5373 * on-disk file blocks.
5374 * We always keep i_blocks updated together with real
5375 * allocation. But to not confuse with user, stat
5376 * will return the blocks that include the delayed allocation
5377 * blocks for this file.
5378 */
5379 delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
5380 EXT4_I(inode)->i_reserved_data_blocks);
5381 stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
5382 return 0;
5383 }
5384
5385 int ext4_validate(struct inode *inode)
5386 {
5387 if (inode->i_nlink == 0) {
> 5388 EXT4_ERROR_INODE(inode, "bad nlink value: %lu", inode->i_nlink);
5389 return -EFSCORRUPTED;
5390 }
5391
5392 return 0;
5393 }
5394
5395 static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
5396 int pextents)
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
FYI, we noticed the following commit:
commit: 3fbc7bbd071c2fa802d94aab68257ee591ff2185 ("ext4: add detection of i_nlink")
url: https://github.com/0day-ci/linux/commits/yi-zhang/vfs-add-detection-of-inode-validation/20170119-013142
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
in testcase: blogbench
with following parameters:
disk: 1HDD
fs: ext4
cpufreq_governor: performance
test-description: Blogbench is a portable filesystem benchmark that tries to reproduce the load of a real-world busy file server.
test-url: https://www.pureftpd.org/project/blogbench
on test machine: 32 threads Intel(R) Core(TM) i5-2300 CPU @ 2.80GHz with 4G memory
caused below changes:
[ 23.891025] Benchmarking for 30 iterations.
[ 23.891027]
[ 23.898537] The test will run during 5 minutes.
[ 23.898538]
[ 23.905748]
[ 23.909450] Nb blogs R articles W articles R pictures W pictures R comments W comments
[ 23.909451]
[ 25.439956] EXT4-fs error (device sda2): ext4_validate:5388: inode #35127453: comm blogbench: bad nlink value: 0
[ 25.474247] EXT4-fs error (device sda2): ext4_validate:5388: inode #35127453: comm blogbench: bad nlink value: 0
[ 26.239439] EXT4-fs error (device sda2): ext4_validate:5388: inode #45088934: comm blogbench: bad nlink value: 0
[ 26.277307] EXT4-fs error (device sda2): ext4_validate:5388: inode #45088934: comm blogbench: bad nlink value: 0
[ 33.845065] 144 1367458 7950 874822 7055 698873 23004
[ 33.845071]
[ 34.567113] EXT4-fs error (device sda2): ext4_validate:5388: inode #38797366: comm blogbench: bad nlink value: 0
[ 36.457183] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.541966] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.590946] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.708818] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.717562] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.737046] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.857064] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 36.886975] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 37.050375] EXT4-fs error (device sda2): ext4_validate:5388: inode #1835048: comm blogbench: bad nlink value: 0
[ 39.914724] EXT4-fs error: 26 callbacks suppressed
[ 39.920397] EXT4-fs error (device sda2): ext4_validate:5388: inode #14418082: comm blogbench: bad nlink value: 0
[ 39.941975] EXT4-fs error (device sda2): ext4_validate:5388: inode #14418082: comm blogbench: bad nlink value: 0
[ 39.956585] EXT4-fs error (device sda2): ext4_validate:5388: inode #14680188: comm blogbench: bad nlink value: 0
[ 39.959336] EXT4-fs error (device sda2): ext4_validate:5388: inode #14418082: comm blogbench: bad nlink value: 0
[ 39.969813] EXT4-fs error (device sda2): ext4_validate:5388: inode #14418082: comm blogbench: bad nlink value: 0
[ 39.971726] EXT4-fs error (device sda2): ext4_validate:5388: inode #20316356: comm blogbench: bad nlink value: 0
[ 39.973644] EXT4-fs error (device sda2): ext4_validate:5388: inode #14418082: comm blogbench: bad nlink value: 0
[ 39.990417] EXT4-fs error (device sda2): ext4_validate:5388: inode #27000844: comm blogbench: bad nlink value: 0
[ 39.993916] EXT4-fs error (device sda2): ext4_validate:5388: inode #27000844: comm blogbench: bad nlink value: 0
[ 40.019657] EXT4-fs error (device sda2): ext4_validate:5388: inode #27000844: comm blogbench: bad nlink value: 0
[ 43.844343] 172 1222347 1446 783841 1404 774376 6563
To reproduce:
git clone git://git.kernel.org/pub/scm/linux/kernel/git/wfg/lkp-tests.git
cd lkp-tests
bin/lkp install job.yaml # job file is attached in this email
bin/lkp run job.yaml
Thanks,
Xiaolong