2017-11-30 15:17:58

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 0/3] 64 bit inode counter support

With current hardware clusters faced with the trouble of
creating enough inodes on partitions. Lustre has 0-size
files to store some information about files. Current
MDS disk sizes allow to store large amount of such files, but
EXT4 limits this number to ~4 billions.

Lustre FS has features like DNE to distribute metadata over many targets
(disks), but disks are used not effectively. It would be great to have
ability to store more then ~4 billions inodes on one EXT4 file system.

This patches add 1) dirdata feature, that allow to store additional
data in direntry 2) code that uses dirdata to store high bits of
64bit inode number.

This is third version of the patch set. Changes since v2:
* added patch "Removes static definition of dx_root struct"
* __le32 used for on-disk data structures
* used __u8 type not char
* macroses __EXT4_DIR_REC_LEN and EXT4_DIR_REC_LEN renamed
* added structures to address dirent fields
* removed BUG_ON() execution
* no need to add s_usr_quota_inum_hi and s_grp_quota_inum_hi
* checksum using high part of inode
* fixed bug in size check code

Andreas Dilger (1):
ext4: dirdata feature

Artem Blagodarenko (1):
ext4: Add 64-bit inode number support

Yang Sheng (1):
ext4: Removes static definition of dx_root struct

fs/ext4/dir.c | 21 ++--
fs/ext4/ext4.h | 184 +++++++++++++++++++++++++++++++----
fs/ext4/ialloc.c | 19 ++--
fs/ext4/inline.c | 18 ++--
fs/ext4/inode.c | 5 +
fs/ext4/namei.c | 289 ++++++++++++++++++++++++++++++++++++++-----------------
fs/ext4/resize.c | 8 +-
fs/ext4/super.c | 17 +++-
8 files changed, 423 insertions(+), 138 deletions(-)

--
2.13.6 (Apple Git-96)


2017-11-30 15:18:03

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 2/3] ext4: dirdata feature

From: Andreas Dilger <[email protected]>

This patch implements feature which allows ext4 fs users (e.g. Lustre)
to store data in ext4 dirent. Data is stored in ext4 dirent after
file-name, this space is accounted in de->rec_len.
Flag EXT4_DIRENT_LUFID added to d_type if extra data
is present.

Make use of dentry->d_fsdata to pass fid to ext4. so no
changes in ext4_add_entry() interface required.

Signed-off-by: Andreas Dilger <[email protected]>
Signed-off-by: Artem Blagodarenko <[email protected]>
---
fs/ext4/dir.c | 17 ++++---
fs/ext4/ext4.h | 99 ++++++++++++++++++++++++++++++++++++++---
fs/ext4/inline.c | 18 ++++----
fs/ext4/namei.c | 132 ++++++++++++++++++++++++++++++++++++++++++-------------
fs/ext4/super.c | 3 +-
5 files changed, 218 insertions(+), 51 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b04e882179c6..0c4dddb0f07a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -67,11 +67,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);

- if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
+ if (unlikely(rlen < EXT4_DIR_NAME_LEN(1)))
error_msg = "rec_len is smaller than minimal";
else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
- else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
+ else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
error_msg = "rec_len is too small for name_len";
else if (unlikely(((char *) de - buf) + rlen > size))
error_msg = "directory entry across range";
@@ -218,7 +218,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
* failure will be detected in the
* dirent test below. */
if (ext4_rec_len_from_disk(de->rec_len,
- sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
+ sb->s_blocksize) <
+ EXT4_DIR_NAME_LEN(1))
break;
i += ext4_rec_len_from_disk(de->rec_len,
sb->s_blocksize);
@@ -441,12 +442,18 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
struct fname *fname, *new_fn;
struct dir_private_info *info;
int len;
+ int extra_data = 0;

info = dir_file->private_data;
p = &info->root.rb_node;

/* Create and allocate the fname structure */
- len = sizeof(struct fname) + ent_name->len + 1;
+ if (dirent->file_type & ~EXT4_FT_MASK)
+ extra_data = ext4_get_dirent_data_len(dirent);
+
+ len = sizeof(struct fname) + dirent->name_len + extra_data + 1;
+
+
new_fn = kzalloc(len, GFP_KERNEL);
if (!new_fn)
return -ENOMEM;
@@ -455,7 +462,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
new_fn->inode = le32_to_cpu(dirent->inode);
new_fn->name_len = ent_name->len;
new_fn->file_type = dirent->file_type;
- memcpy(new_fn->name, ent_name->name, ent_name->len);
+ memcpy(new_fn->name, ent_name->name, ent_name->len + extra_data);
new_fn->name[ent_name->len] = 0;

while (*p) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e2abe01c8c6b..3678657d8e47 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1111,6 +1111,7 @@ struct ext4_inode_info {
* Mount flags set via mount options or defaults
*/
#define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
+#define EXT4_MOUNT_DIRDATA 0x00002 /* Data in directory entries*/
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
@@ -1804,7 +1805,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
- EXT4_FEATURE_INCOMPAT_LARGEDIR)
+ EXT4_FEATURE_INCOMPAT_LARGEDIR | \
+ EXT4_FEATURE_INCOMPAT_DIRDATA)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
@@ -1965,6 +1967,56 @@ struct ext4_dir_entry_tail {

#define EXT4_FT_DIR_CSUM 0xDE

+#define EXT4_FT_MASK 0xf
+
+#if EXT4_FT_MAX > EXT4_FT_MASK
+#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
+#endif
+
+/*
+ * d_type has 4 unused bits, so it can hold four types data. these different
+ * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
+ * stored, in flag order, after file-name in ext4 dirent.
+ */
+/*
+ * this flag is added to d_type if ext4 dirent has extra data after
+ * filename. this data length is variable and length is stored in first byte
+ * of data. data start after filename NUL byte.
+ * This is used by Lustre FS.
+ */
+#define EXT4_DIRENT_LUFID 0x10
+#define EXT4_DIRENT_INODE 0x20
+#define DIRENT_INODE_LEN 2
+
+#define EXT4_LUFID_MAGIC 0xAD200907UL
+
+struct ext4_dirent_data_header {
+ /* length of this header + the whole data blob */
+ __u8 ddh_length;
+} __packed;
+
+struct ext4_dirent_lufid {
+ struct ext4_dirent_data_header dl_header; /* 1 + 16n */
+ __u8 dl_data[0];
+} __packed;
+
+struct ext4_dentry_param {
+ __u32 edp_magic; /* EXT4_LUFID_MAGIC */
+ struct ext4_dirent_lufid edp_lufid;
+} __packed;
+
+static inline struct ext4_dirent_data_header *
+ ext4_dentry_get_data(struct super_block *sb,
+ struct ext4_dentry_param *p)
+{
+ if (!ext4_has_feature_dirdata(sb))
+ return NULL;
+ if (p && p->edp_magic == EXT4_LUFID_MAGIC)
+ return &p->edp_lufid.dl_header;
+ else
+ return NULL;
+}
+
/*
* EXT4_DIR_PAD defines the directory entries boundaries
*
@@ -1972,8 +2024,14 @@ struct ext4_dir_entry_tail {
*/
#define EXT4_DIR_PAD 4
#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
-#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
+
+/* the name + inode data without any extra dirdata */
+#define EXT4_DIR_NAME_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
~EXT4_DIR_ROUND)
+/* the total size of the dirent including any extra data */
+#define EXT4_DIR_REC_LEN(de) (EXT4_DIR_NAME_LEN(de->name_len +\
+ ext4_get_dirent_data_len(de)))
+
#define EXT4_MAX_REC_LEN ((1<<16)-1)

/*
@@ -2376,7 +2434,10 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
struct ext4_filename *fname,
- struct ext4_dir_entry_2 **dest_de);
+ struct ext4_dir_entry_2 **dest_de,
+ bool is_dotdot,
+ bool *write_short_dotdot,
+ unsigned short dotdot_reclen);
void ext4_insert_dentry(struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
@@ -2392,10 +2453,16 @@ static const unsigned char ext4_filetype_table[] = {

static inline unsigned char get_dtype(struct super_block *sb, int filetype)
{
- if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX)
+ int fl_index = filetype & EXT4_FT_MASK;
+
+ if (!ext4_has_feature_filetype(sb) || fl_index >= EXT4_FT_MAX)
return DT_UNKNOWN;

- return ext4_filetype_table[filetype];
+ if (!test_opt(sb, DIRDATA))
+ return (ext4_filetype_table[fl_index]);
+
+ return (ext4_filetype_table[fl_index]) |
+ (filetype & ~EXT4_FT_MASK);
}
extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
void *buf, int buf_size);
@@ -3271,6 +3338,28 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)

extern const struct iomap_ops ext4_iomap_ops;

+/*
+ * Compute the total directory entry data length.
+ * This includes the filename and an implicit NUL terminator (always present),
+ * and optional extensions. Each extension has a bit set in the high 4 bits of
+ * de->file_type, and the extension length is the first byte in each entry.
+ */
+static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
+{
+ char *len = de->name + de->name_len + 1 /* NUL terminator */;
+ int dlen = 0;
+ __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
+
+ while (extra_data_flags) {
+ if (extra_data_flags & 1) {
+ dlen += *len + (dlen == 0);
+ len += *len;
+ }
+ extra_data_flags >>= 1;
+ }
+ return dlen;
+}
+
#endif /* __KERNEL__ */

#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 28c5c3abddb3..666891dc03cd 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1026,7 +1026,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_dir_entry_2 *de;

err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
- inline_size, fname, &de);
+ inline_size, fname, &de, 0, NULL, 0);
if (err)
return err;

@@ -1103,7 +1103,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
int new_size = get_max_inline_xattr_value_size(dir, iloc);

- if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
+ if (new_size - old_size <= EXT4_DIR_NAME_LEN(1))
return -ENOSPC;

ret = ext4_update_inline_data(handle, dir,
@@ -1384,8 +1384,8 @@ int htree_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 1;
strcpy(fake.name, ".");
fake.rec_len = ext4_rec_len_to_disk(
- EXT4_DIR_REC_LEN(fake.name_len),
- inline_size);
+ EXT4_DIR_NAME_LEN(fake.name_len),
+ inline_size);
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake;
pos = EXT4_INLINE_DOTDOT_OFFSET;
@@ -1394,8 +1394,8 @@ int htree_inlinedir_to_tree(struct file *dir_file,
fake.name_len = 2;
strcpy(fake.name, "..");
fake.rec_len = ext4_rec_len_to_disk(
- EXT4_DIR_REC_LEN(fake.name_len),
- inline_size);
+ EXT4_DIR_NAME_LEN(fake.name_len),
+ inline_size);
ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
de = &fake;
pos = EXT4_INLINE_DOTDOT_SIZE;
@@ -1492,8 +1492,8 @@ int ext4_read_inline_dir(struct file *file,
* So we will use extra_offset and extra_size to indicate them
* during the inline dir iteration.
*/
- dotdot_offset = EXT4_DIR_REC_LEN(1);
- dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
+ dotdot_offset = EXT4_DIR_NAME_LEN(1);
+ dotdot_size = dotdot_offset + EXT4_DIR_NAME_LEN(2);
extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
extra_size = extra_offset + inline_size;

@@ -1528,7 +1528,7 @@ int ext4_read_inline_dir(struct file *file,
* failure will be detected in the
* dirent test below. */
if (ext4_rec_len_from_disk(de->rec_len, extra_size)
- < EXT4_DIR_REC_LEN(1))
+ < EXT4_DIR_NAME_LEN(1))
break;
i += ext4_rec_len_from_disk(de->rec_len,
extra_size);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7c649cf2b630..67edab5572d8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -239,7 +239,8 @@ static unsigned dx_get_count(struct dx_entry *entries);
static unsigned dx_get_limit(struct dx_entry *entries);
static void dx_set_count(struct dx_entry *entries, unsigned value);
static void dx_set_limit(struct dx_entry *entries, unsigned value);
-static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
+static inline unsigned int dx_root_limit(struct inode *dir,
+ struct ext4_dir_entry_2 *dot_de, unsigned int infosize);
static unsigned dx_node_limit(struct inode *dir);
static struct dx_frame *dx_probe(struct ext4_filename *fname,
struct inode *dir,
@@ -552,10 +553,15 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}

-static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
+static inline unsigned int dx_root_limit(struct inode *dir,
+ struct ext4_dir_entry_2 *dot_de, unsigned int infosize)
{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
- EXT4_DIR_REC_LEN(2) - infosize;
+ struct ext4_dir_entry_2 *dotdot_de;
+ unsigned int entry_space;
+
+ dotdot_de = ext4_next_entry(dot_de, dir->i_sb->s_blocksize);
+ entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(dot_de) -
+ EXT4_DIR_REC_LEN(dotdot_de) - infosize;

if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
@@ -564,7 +570,8 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)

static inline unsigned dx_node_limit(struct inode *dir)
{
- unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
+ unsigned int entry_space = dir->i_sb->s_blocksize -
+ EXT4_DIR_NAME_LEN(0);

if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
@@ -676,7 +683,7 @@ static struct stats dx_show_leaf(struct inode *dir,
(unsigned) ((char *) de - base));
#endif
}
- space += EXT4_DIR_REC_LEN(de->name_len);
+ space += EXT4_DIR_REC_LEN(de);
names++;
}
de = ext4_next_entry(de, size);
@@ -984,7 +991,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
de = (struct ext4_dir_entry_2 *) bh->b_data;
top = (struct ext4_dir_entry_2 *) ((char *) de +
dir->i_sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
+ EXT4_DIR_NAME_LEN(0));
#ifdef CONFIG_EXT4_FS_ENCRYPTION
/* Check if the directory is encrypted */
if (ext4_encrypted_inode(dir)) {
@@ -1567,6 +1574,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
inode = NULL;
if (bh) {
__u32 ino = le32_to_cpu(de->inode);
+
brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) {
EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
@@ -1635,7 +1643,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
while (count--) {
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
(from + (map->offs<<2));
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
+ rec_len = EXT4_DIR_REC_LEN(de);
memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len =
ext4_rec_len_to_disk(rec_len, blocksize);
@@ -1659,7 +1667,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
while ((char*)de < base + blocksize) {
next = ext4_next_entry(de, blocksize);
if (de->inode && de->name_len) {
- rec_len = EXT4_DIR_REC_LEN(de->name_len);
+ rec_len = EXT4_DIR_REC_LEN(de);
if (de > to)
memmove(to, de, rec_len);
to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
@@ -1790,10 +1798,13 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
struct buffer_head *bh,
void *buf, int buf_size,
struct ext4_filename *fname,
- struct ext4_dir_entry_2 **dest_de)
+ struct ext4_dir_entry_2 **dest_de,
+ bool is_dotdot,
+ bool *write_short_dotdot,
+ unsigned short dotdot_reclen)
{
struct ext4_dir_entry_2 *de;
- unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
+ unsigned short reclen = EXT4_DIR_NAME_LEN(fname_len(fname));
int nlen, rlen;
unsigned int offset = 0;
char *top;
@@ -1806,10 +1817,28 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
return -EFSCORRUPTED;
if (ext4_match(fname, de))
return -EEXIST;
- nlen = EXT4_DIR_REC_LEN(de->name_len);
+ nlen = EXT4_DIR_REC_LEN(de);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
+ /* Check first for enough space for the full entry */
if ((de->inode ? rlen - nlen : rlen) >= reclen)
break;
+ /* Then for dotdot entries, check for the smaller space
+ * required for just the entry, no FID
+ */
+ if (is_dotdot) {
+ if ((de->inode ? rlen - nlen : rlen) >=
+ dotdot_reclen) {
+ *write_short_dotdot = true;
+ break;
+ }
+ /* The new ".." entry mut be written over the
+ * previous ".." entry, which is the first
+ * entry traversed by this scan. If it doesn't
+ * fit, something is badly wrong, so -EIO.
+ */
+ return -EIO;
+ }
+
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
@@ -1828,7 +1857,8 @@ void ext4_insert_dentry(struct inode *inode,

int nlen, rlen;

- nlen = EXT4_DIR_REC_LEN(de->name_len);
+ nlen = EXT4_DIR_REC_LEN(de);
+
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if (de->inode) {
struct ext4_dir_entry_2 *de1 =
@@ -1852,21 +1882,46 @@ void ext4_insert_dentry(struct inode *inode,
* space. It will return -ENOSPC if no space is available, and -EIO
* and -EEXIST if directory entry already exists.
*/
-static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
+static int add_dirent_to_buf(handle_t *handle,
+ struct dentry *dentry,
+ struct ext4_filename *fname,
struct inode *dir,
struct inode *inode, struct ext4_dir_entry_2 *de,
struct buffer_head *bh)
{
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
- int err;
+ unsigned short reclen, dotdot_reclen = 0;
+ int err, dlen = 0;
+ bool is_dotdot = false, write_short_dotdot = false;
+ struct ext4_dirent_data_header *ddh;
+ int namelen = dentry->d_name.len;

if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

+ ddh = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
+ dentry->d_fsdata);
+ if (ddh)
+ dlen = ddh->ddh_length + 1 /* NUL separator */;
+
+ is_dotdot = (namelen == 2 &&
+ memcmp(dentry->d_name.name, "..", 2) == 0);
+
+ /* dotdot entries must be in the second place in a directory block,
+ * so calculate an alternate length without the dirdata so they can
+ * always be made to fit in the existing slot
+ */
+ if (is_dotdot)
+ dotdot_reclen = EXT4_DIR_NAME_LEN(namelen);
+
+ reclen = EXT4_DIR_NAME_LEN(namelen + dlen + 3);
+
if (!de) {
err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
- blocksize - csum_size, fname, &de);
+ blocksize - csum_size, fname, &de,
+ is_dotdot,
+ &write_short_dotdot, dotdot_reclen);
if (err)
return err;
}
@@ -1880,6 +1935,24 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
/* By now the buffer is marked for journaling */
ext4_insert_dentry(inode, de, blocksize, fname);

+ /* If we're writing short form of "dotdot", don't add data section */
+ if (ddh && !write_short_dotdot) {
+ de->name[namelen] = 0;
+ memcpy(&de->name[namelen + 1], ddh, ddh->ddh_length);
+ de->file_type |= EXT4_DIRENT_LUFID;
+ data_offset = ddh->ddh_length;
+ }
+
+ if (inode) {
+ __u32 *i_ino_hi;
+
+ de->name[namelen + 1 + data_offset] = 5;
+ i_ino_hi = (__u32 *)&de->name[namelen + 1 + data_offset + 1];
+ *i_ino_hi = cpu_to_le32((__u32)(inode->i_ino >> 32));
+ de->file_type |= EXT4_DIRENT_INODE;
+ de->inode = cpu_to_le32(inode->i_ino & 0xFFFFFFFF);
+ }
+
/*
* XXX shouldn't update any times until successful
* completion of syscall, but too many callers depend
@@ -1976,20 +2049,17 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
dotdot_de->rec_len =
ext4_rec_len_to_disk(blocksize - le16_to_cpu(dot_de->rec_len),
blocksize);
-
/* initialize hashing info */
dx_info = dx_get_dx_info(dot_de);
memset(dx_info, 0, sizeof(*dx_info));
dx_info->info_length = sizeof(*dx_info);
dx_info->hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
-
entries = (void *)dx_info + sizeof(*dx_info);
-
dx_set_block(entries, 1);
dx_set_count(entries, 1);
- dx_set_limit(entries, dx_root_limit(dir, (struct ext4_dir_entry_2 *)
- frame->bh->b_data,
- sizeof(*dx_info)));
+ dx_set_limit(entries, dx_root_limit(dir,
+ (struct ext4_dir_entry_2 *)frame->bh->b_data,
+ sizeof(*dx_info)));

/* Initialize as for dx_probe */
fname->hinfo.hash_version = dx_info->hash_version;
@@ -2017,7 +2087,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
goto out_frames;
}

- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
+ retval = add_dirent_to_buf(handle, NULL, fname, dir, inode, de, bh2);
out_frames:
/*
* Even if the block split failed, we have to properly write
@@ -2094,7 +2164,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
bh = NULL;
goto out;
}
- retval = add_dirent_to_buf(handle, &fname, dir, inode,
+ retval = add_dirent_to_buf(handle, dentry, &fname, dir, inode,
NULL, bh);
if (retval != -ENOSPC)
goto out;
@@ -2123,7 +2193,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
initialize_dirent_tail(t, blocksize);
}

- retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
+ retval = add_dirent_to_buf(handle, dentry, &fname, dir, inode, de, bh);
out:
ext4_fname_free_filename(&fname);
brelse(bh);
@@ -2165,7 +2235,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
if (err)
goto journal_error;

- err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
+ err = add_dirent_to_buf(handle, NULL, fname, dir, inode, NULL, bh);
if (err != -ENOSPC)
goto cleanup;

@@ -2291,7 +2361,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
err = PTR_ERR(de);
goto cleanup;
}
- err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
+ err = add_dirent_to_buf(handle, NULL, fname, dir, inode, de, bh);
goto cleanup;

journal_error:
@@ -2557,7 +2627,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
{
de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1;
- de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
+ de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de),
blocksize);
strcpy(de->name, ".");
ext4_set_de_type(inode->i_sb, de, S_IFDIR);
@@ -2567,11 +2637,11 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
de->name_len = 2;
if (!dotdot_real_len)
de->rec_len = ext4_rec_len_to_disk(blocksize -
- (csum_size + EXT4_DIR_REC_LEN(1)),
+ (csum_size + EXT4_DIR_NAME_LEN(1)),
blocksize);
else
de->rec_len = ext4_rec_len_to_disk(
- EXT4_DIR_REC_LEN(de->name_len), blocksize);
+ EXT4_DIR_REC_LEN(de), blocksize);
strcpy(de->name, "..");
ext4_set_de_type(inode->i_sb, de, S_IFDIR);

@@ -2700,7 +2770,7 @@ bool ext4_empty_dir(struct inode *inode)
}

sb = inode->i_sb;
- if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
+ if (inode->i_size < EXT4_DIR_NAME_LEN(1) + EXT4_DIR_NAME_LEN(2)) {
EXT4_ERROR_INODE(inode, "invalid size");
return true;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b0915b734a38..ead9406d9cff 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1339,7 +1339,7 @@ enum {
Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
- Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
+ Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1400,6 +1400,7 @@ static const match_table_t tokens = {
{Opt_noquota, "noquota"},
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
+ {Opt_dirdata, "dirdata"},
{Opt_prjquota, "prjquota"},
{Opt_barrier, "barrier=%u"},
{Opt_barrier, "barrier"},
--
2.13.6 (Apple Git-96)

2017-11-30 15:18:05

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 3/3] ext4: Add 64-bit inode number support

Use dirdata to store high bits of 64bit inode
number.

Signed-off-by: Artem Blagodarenko <[email protected]>
---
fs/ext4/dir.c | 4 +--
fs/ext4/ext4.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++----------
fs/ext4/ialloc.c | 19 ++++++++-----
fs/ext4/inode.c | 5 ++++
fs/ext4/namei.c | 53 ++++++++++++++++++++++++++++-------
fs/ext4/resize.c | 8 +++---
fs/ext4/super.c | 14 +++++++---
7 files changed, 147 insertions(+), 41 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0c4dddb0f07a..d971bc68903c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -76,7 +76,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
else if (unlikely(((char *) de - buf) + rlen > size))
error_msg = "directory entry across range";
else if (unlikely(le32_to_cpu(de->inode) >
- le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
+ ext4_get_inodes_count(dir->i_sb)))
error_msg = "inode out of bounds";
else
return 0;
@@ -382,7 +382,7 @@ struct fname {
__u32 minor_hash;
struct rb_node rb_hash;
struct fname *next;
- __u32 inode;
+ __u64 inode;
__u8 name_len;
__u8 file_type;
char name[0];
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3678657d8e47..18e30589b704 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1331,7 +1331,10 @@ struct ext4_super_block {
__le32 s_lpf_ino; /* Location of the lost+found inode */
__le32 s_prj_quota_inum; /* inode for tracking project quota */
__le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */
- __le32 s_reserved[98]; /* Padding to the end of the block */
+ __le32 s_inodes_count_hi; /* higth part of inode count */
+ __le32 s_free_inodes_count_hi; /* Free inodes count */
+ __le32 s_prj_quota_inum_hi; /* high part of project quota inode */
+ __le32 s_reserved[95]; /* Padding to the end of the block */
__le32 s_checksum; /* crc32c(superblock) */
};

@@ -1539,18 +1542,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
return container_of(inode, struct ext4_inode_info, vfs_inode);
}

-static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
-{
- return ino == EXT4_ROOT_INO ||
- ino == EXT4_USR_QUOTA_INO ||
- ino == EXT4_GRP_QUOTA_INO ||
- ino == EXT4_BOOT_LOADER_INO ||
- ino == EXT4_JOURNAL_INO ||
- ino == EXT4_RESIZE_INO ||
- (ino >= EXT4_FIRST_INO(sb) &&
- ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
-}
-
/*
* Inode dynamic state flags
*/
@@ -1689,6 +1680,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
+#define EXT4_FEATURE_INCOMPAT_INODE64 0x20000

#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
static inline bool ext4_has_feature_##name(struct super_block *sb) \
@@ -1777,6 +1769,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED)
EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR)
EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA)
EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
+EXT4_FEATURE_INCOMPAT_FUNCS(inode64, INODE64)
+

#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1805,6 +1799,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
EXT4_FEATURE_INCOMPAT_ENCRYPT | \
EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
+ EXT4_FEATURE_INCOMPAT_INODE64 | \
EXT4_FEATURE_INCOMPAT_LARGEDIR | \
EXT4_FEATURE_INCOMPAT_DIRDATA)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
@@ -2000,6 +1995,11 @@ struct ext4_dirent_lufid {
__u8 dl_data[0];
} __packed;

+struct ext4_dirent_inohi {
+ struct ext4_dirent_data_header di_header; /* 1 + 4 */
+ __le32 di_inohi;
+} __packed;
+
struct ext4_dentry_param {
__u32 edp_magic; /* EXT4_LUFID_MAGIC */
struct ext4_dirent_lufid edp_lufid;
@@ -2476,7 +2476,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct

/* ialloc.c */
extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
- const struct qstr *qstr, __u32 goal,
+ const struct qstr *qstr, __u64 goal,
uid_t *owner, __u32 i_flags,
int handle_type, unsigned int line_no,
int nblocks);
@@ -2903,6 +2903,63 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
return 1 << sbi->s_log_groups_per_flex;
}

+static inline unsigned long ext4_get_inodes_count(struct super_block *sb)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ unsigned long inodes_count = le32_to_cpu(es->s_inodes_count);
+
+ if (ext4_has_feature_inode64(sb))
+ inodes_count |=
+ (unsigned long)le32_to_cpu(es->s_inodes_count_hi)
+ << 32;
+ return inodes_count;
+}
+
+static inline void ext4_set_inodes_count(struct super_block *sb,
+ unsigned long val)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+ if (ext4_has_feature_inode64(sb))
+ es->s_inodes_count_hi = cpu_to_le32(val >> 32);
+
+ es->s_inodes_count = cpu_to_le32(val);
+}
+
+static inline unsigned long ext4_get_free_inodes_count(struct super_block *sb)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ unsigned long inodes_count = le32_to_cpu(es->s_free_inodes_count);
+
+ if (ext4_has_feature_inode64(sb))
+ inodes_count |=
+ (unsigned long)le32_to_cpu(es->s_free_inodes_count_hi)
+ << 32;
+ return inodes_count;
+}
+
+static inline void ext4_set_free_inodes_count(struct super_block *sb,
+ unsigned long val)
+{
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+ if (ext4_has_feature_inode64(sb))
+ es->s_free_inodes_count_hi = cpu_to_le32(val >> 32);
+
+ es->s_free_inodes_count = cpu_to_le32(val);
+}
+
+static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+{
+ return ino == EXT4_ROOT_INO ||
+ ino == EXT4_USR_QUOTA_INO ||
+ ino == EXT4_GRP_QUOTA_INO ||
+ ino == EXT4_JOURNAL_INO ||
+ ino == EXT4_RESIZE_INO ||
+ (ino >= EXT4_FIRST_INO(sb) &&
+ ino <= ext4_get_inodes_count(sb));
+}
+
#define ext4_std_error(sb, errno) \
do { \
if ((errno)) \
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index ee823022aa34..e23dc4133e84 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -303,7 +303,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
ext4_clear_inode(inode);

es = EXT4_SB(sb)->s_es;
- if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+ if (ino < EXT4_FIRST_INO(sb) || ino > ext4_get_inodes_count(sb)) {
ext4_error(sb, "reserved or nonexistent inode %lu", ino);
goto error_return;
}
@@ -770,7 +770,7 @@ static int find_inode_bit(struct super_block *sb, ext4_group_t group,
*/
struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
umode_t mode, const struct qstr *qstr,
- __u32 goal, uid_t *owner, __u32 i_flags,
+ __u64 goal, uid_t *owner, __u32 i_flags,
int handle_type, unsigned int line_no,
int nblocks)
{
@@ -887,7 +887,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (!goal)
goal = sbi->s_inode_goal;

- if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
+ if (goal && goal <= ext4_get_inodes_count(sb)) {
group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
ret2 = 0;
@@ -1149,6 +1149,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
__le32 gen = cpu_to_le32(inode->i_generation);
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
sizeof(inum));
+ if (inode->i_ino >> 32) {
+ inum = cpu_to_le32(inode->i_ino >> 32);
+ csum = ext4_chksum(sbi, sbi->s_csum_seed,
+ (__u8 *)&inum, sizeof(inum));
+ }
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
sizeof(gen));
}
@@ -1226,7 +1231,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
/* Verify that we are loading a valid orphan from disk */
struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
{
- unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
+ unsigned long max_ino = ext4_get_inodes_count(sb);
ext4_group_t block_group;
int bit;
struct buffer_head *bitmap_bh = NULL;
@@ -1330,9 +1335,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
bitmap_count += x;
}
brelse(bitmap_bh);
- printk(KERN_DEBUG "ext4_count_free_inodes: "
- "stored = %u, computed = %lu, %lu\n",
- le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+ printk(KERN_DEBUG "ext4_count_free_inodes:\n"
+ "stored = %lu, computed = %lu, %lu\n",
+ ext4_get_inodes_count(sb), desc_count, bitmap_count);
return desc_count;
#else
desc_count = 0;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 31db875bc7a1..9caefee1bce9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4691,6 +4691,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
__le32 gen = raw_inode->i_generation;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
sizeof(inum));
+ if (inode->i_ino >> 32) {
+ inum = cpu_to_le32(inode->i_ino >> 32);
+ csum = ext4_chksum(sbi, sbi->s_csum_seed,
+ (__u8 *)&inum, sizeof(inum));
+ }
ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
sizeof(gen));
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 67edab5572d8..0ef33556c51d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1573,11 +1573,45 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return (struct dentry *) bh;
inode = NULL;
if (bh) {
- __u32 ino = le32_to_cpu(de->inode);
+ unsigned long ino;
+
+ ino = le32_to_cpu(de->inode);
+ if (ext4_has_feature_inode64(dir->i_sb) &&
+ (de->file_type & EXT4_DIRENT_INODE)) {
+ struct ext4_dirent_data_header *ddh =
+ (struct ext4_dirent_data_header *)
+ &de->name[de->name_len];
+
+ if (de->file_type & EXT4_DIRENT_LUFID) {
+ /* skip LUFID record if present */
+ ddh = (struct ext4_dirent_data_header *)
+ &de->name[de->name_len + 1 +
+ ddh->ddh_length];
+ }
+
+ if ((char *)ddh > &de->name[de->rec_len]) {
+ EXT4_ERROR_INODE(dir,
+ "corrupted dirdata entry\n");
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+
+ if (ddh->ddh_length == (sizeof(__u32) + 1)) {
+ __le32 ino_hi;
+ struct ext4_dirent_lufid *dlf =
+ (struct ext4_dirent_lufid *)ddh;
+
+ memcpy(&ino_hi, dlf->dl_data, sizeof(__u32));
+ ino |= (__u64)le32_to_cpu(ino_hi) << 32;
+ } else {
+ EXT4_ERROR_INODE(dir,
+ "corrupted dirdata inode number\n");
+ return ERR_PTR(-EFSCORRUPTED);
+ }
+ }

brelse(bh);
if (!ext4_valid_inum(dir->i_sb, ino)) {
- EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
+ EXT4_ERROR_INODE(dir, "bad inode number: %lu", ino);
return ERR_PTR(-EFSCORRUPTED);
}
if (unlikely(ino == dir->i_ino)) {
@@ -1588,7 +1622,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
- "deleted inode referenced: %u",
+ "deleted inode referenced: %lu",
ino);
return ERR_PTR(-EFSCORRUPTED);
}
@@ -1892,7 +1926,7 @@ static int add_dirent_to_buf(handle_t *handle,
unsigned int blocksize = dir->i_sb->s_blocksize;
int csum_size = 0;
unsigned short reclen, dotdot_reclen = 0;
- int err, dlen = 0;
+ int err, dlen = 0, data_offset = 0;
bool is_dotdot = false, write_short_dotdot = false;
struct ext4_dirent_data_header *ddh;
int namelen = dentry->d_name.len;
@@ -1944,13 +1978,12 @@ static int add_dirent_to_buf(handle_t *handle,
}

if (inode) {
- __u32 *i_ino_hi;
-
- de->name[namelen + 1 + data_offset] = 5;
- i_ino_hi = (__u32 *)&de->name[namelen + 1 + data_offset + 1];
- *i_ino_hi = cpu_to_le32((__u32)(inode->i_ino >> 32));
- de->file_type |= EXT4_DIRENT_INODE;
+ struct ext4_dirent_inohi *di = (struct ext4_dirent_inohi *)
+ &de->name[namelen + 1 + data_offset];
+ di->di_header.ddh_length = sizeof(*di);
+ di->di_inohi = cpu_to_le32(inode->i_ino >> 32);
de->inode = cpu_to_le32(inode->i_ino & 0xFFFFFFFF);
+ de->file_type |= EXT4_DIRENT_INODE;
}

/*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 035cd3f4785e..d0d5acd1a70d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1337,10 +1337,10 @@ static void ext4_update_super(struct super_block *sb,

ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
- le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
- flex_gd->count);
- le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
- flex_gd->count);
+ ext4_set_inodes_count(sb, ext4_get_inodes_count(sb) +
+ EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
+ ext4_set_free_inodes_count(sb, ext4_get_free_inodes_count(sb) +
+ EXT4_INODES_PER_GROUP(sb) * flex_gd->count);

ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ead9406d9cff..a06252f9aada 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3489,6 +3489,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto cantfind_ext4;
}

+ if (ext4_has_feature_inode64(sb) &&
+ (sizeof(u64) != sizeof(unsigned long))) {
+ ext4_msg(sb, KERN_ERR, "64 bit inodes need 64 bit kernel.");
+ goto failed_mount;
+ }
+
/* Load the checksum driver */
if (ext4_has_feature_metadata_csum(sb) ||
ext4_has_feature_ea_inode(sb)) {
@@ -4248,7 +4254,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
GFP_KERNEL);
if (!err) {
unsigned long freei = ext4_count_free_inodes(sb);
- sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
+ ext4_set_free_inodes_count(sb, freei);
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
@@ -4705,9 +4711,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeclusters_counter)));
if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
- es->s_free_inodes_count =
- cpu_to_le32(percpu_counter_sum_positive(
- &EXT4_SB(sb)->s_freeinodes_counter));
+ ext4_set_free_inodes_count(sb,
+ cpu_to_le32(percpu_counter_sum_positive(
+ &EXT4_SB(sb)->s_freeinodes_counter)));
BUFFER_TRACE(sbh, "marking dirty");
ext4_superblock_csum_set(sb);
if (sync)
--
2.13.6 (Apple Git-96)

2017-11-30 15:18:00

by Artem Blagodarenko

[permalink] [raw]
Subject: [PATCH v3 1/3] ext4: Removes static definition of dx_root struct

From: Yang Sheng <[email protected]>

Removes static definition of dx_root struct. so that "." and ".." dirent
can have extra data. This patch does not change any functionality but is
required for ext4_data_in_dirent patch.

Signed-off-by: Yang Sheng <[email protected]>
Signed-off-by: Artem Blagodarenko <[email protected]>
---
fs/ext4/namei.c | 130 +++++++++++++++++++++++++++++++-------------------------
1 file changed, 71 insertions(+), 59 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c1cf020d1889..7c649cf2b630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -193,23 +193,13 @@ struct dx_entry
* dirent the two low bits of the hash version will be zero. Therefore, the
* hash version mod 4 should never be 0. Sincerely, the paranoia department.
*/
-
-struct dx_root
+struct dx_root_info
{
- struct fake_dirent dot;
- char dot_name[4];
- struct fake_dirent dotdot;
- char dotdot_name[4];
- struct dx_root_info
- {
- __le32 reserved_zero;
- u8 hash_version;
- u8 info_length; /* 8 */
- u8 indirect_levels;
- u8 unused_flags;
- }
- info;
- struct dx_entry entries[0];
+ __le32 reserved_zero;
+ u8 hash_version;
+ u8 info_length; /* 8 */
+ u8 indirect_levels;
+ u8 unused_flags;
};

struct dx_node
@@ -521,6 +511,17 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
entry->block = cpu_to_le32(value);
}

+struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
+{
+ /* get dotdot first */
+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_NAME_LEN(1));
+
+ /* dx root info is after dotdot entry */
+ de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_NAME_LEN(2));
+
+ return (struct dx_root_info *)de;
+}
+
static inline unsigned dx_get_hash(struct dx_entry *entry)
{
return le32_to_cpu(entry->hash);
@@ -734,7 +735,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
{
unsigned count, indirect;
struct dx_entry *at, *entries, *p, *q, *m;
- struct dx_root *root;
+ struct dx_root_info *info;
struct dx_frame *frame = frame_in;
struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
u32 hash;
@@ -744,17 +745,17 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
if (IS_ERR(frame->bh))
return (struct dx_frame *) frame->bh;

- root = (struct dx_root *) frame->bh->b_data;
- if (root->info.hash_version != DX_HASH_TEA &&
- root->info.hash_version != DX_HASH_HALF_MD4 &&
- root->info.hash_version != DX_HASH_LEGACY) {
- ext4_warning_inode(dir, "Unrecognised inode hash code %u",
- root->info.hash_version);
+ info = dx_get_dx_info((struct ext4_dir_entry_2 *)frame->bh->b_data);
+ if (info->hash_version != DX_HASH_TEA &&
+ info->hash_version != DX_HASH_HALF_MD4 &&
+ info->hash_version != DX_HASH_LEGACY) {
+ ext4_warning(dir->i_sb, "inode #%lu: unrecognised hash code %u",
+ dir->i_ino, info->hash_version);
goto fail;
}
if (fname)
hinfo = &fname->hinfo;
- hinfo->hash_version = root->info.hash_version;
+ hinfo->hash_version = info->hash_version;
if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
@@ -762,18 +763,18 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
ext4fs_dirhash(fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;

- if (root->info.unused_flags & 1) {
- ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
- root->info.unused_flags);
+ if (info->unused_flags & 1) {
+ ext4_warning_inode(dir,
+ "inode #%lu: Unimplemented hash flags: %#06x",
+ dir->i_ino, info->unused_flags);
goto fail;
}

- indirect = root->info.indirect_levels;
+ indirect = info->indirect_levels;
if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
ext4_warning(dir->i_sb,
- "Directory (ino: %lu) htree depth %#06x exceed"
- "supported value", dir->i_ino,
- ext4_dir_htree_level(dir->i_sb));
+ "inode #%lu unimplemented hash depth %#06x",
+ dir->i_ino, ext4_dir_htree_level(dir->i_sb));
if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
ext4_warning(dir->i_sb, "Enable large directory "
"feature to access it");
@@ -781,14 +782,17 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
goto fail;
}

- entries = (struct dx_entry *)(((char *)&root->info) +
- root->info.info_length);
+ entries = (struct dx_entry *)(((char *)info) + info->info_length);

if (dx_get_limit(entries) != dx_root_limit(dir,
- root->info.info_length)) {
- ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
- dx_get_limit(entries),
- dx_root_limit(dir, root->info.info_length));
+ (struct ext4_dir_entry_2 *) frame->bh->b_data,
+ info->info_length)) {
+ ext4_warning(dir->i_sb, "dx entry: limit != root limit "
+ "inode #%lu: dx entry: limit %u != root limit %u",
+ dir->i_ino, dx_get_limit(entries),
+ dx_root_limit(dir, (struct ext4_dir_entry_2 *)
+ frame->bh->b_data,
+ info->info_length));
goto fail;
}

@@ -872,7 +876,7 @@ static void dx_release(struct dx_frame *frames)
if (frames[0].bh == NULL)
return;

- info = &((struct dx_root *)frames[0].bh->b_data)->info;
+ info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
for (i = 0; i <= info->indirect_levels; i++) {
if (frames[i].bh == NULL)
break;
@@ -1907,17 +1911,16 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
struct inode *inode, struct buffer_head *bh)
{
struct buffer_head *bh2;
- struct dx_root *root;
struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
struct dx_entry *entries;
- struct ext4_dir_entry_2 *de, *de2;
+ struct ext4_dir_entry_2 *de, *de2, *dot_de, *dotdot_de;
struct ext4_dir_entry_tail *t;
char *data1, *top;
unsigned len;
int retval;
unsigned blocksize;
ext4_lblk_t block;
- struct fake_dirent *fde;
+ struct dx_root_info *dx_info;
int csum_size = 0;

if (ext4_has_metadata_csum(inode->i_sb))
@@ -1932,18 +1935,19 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
brelse(bh);
return retval;
}
- root = (struct dx_root *) bh->b_data;
+
+ dot_de = (struct ext4_dir_entry_2 *)bh->b_data;
+ dotdot_de = ext4_next_entry(dot_de, blocksize);

/* The 0th block becomes the root, move the dirents out */
- fde = &root->dotdot;
- de = (struct ext4_dir_entry_2 *)((char *)fde +
- ext4_rec_len_from_disk(fde->rec_len, blocksize));
- if ((char *) de >= (((char *) root) + blocksize)) {
+ de = (struct ext4_dir_entry_2 *)((char *)dotdot_de +
+ ext4_rec_len_from_disk(dotdot_de->rec_len, blocksize));
+ if ((char *)de >= (((char *)dot_de) + blocksize)) {
EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
brelse(bh);
return -EFSCORRUPTED;
}
- len = ((char *) root) + (blocksize - csum_size) - (char *) de;
+ len = ((char *)dot_de) + (blocksize - csum_size) - (char *)de;

/* Allocate new block for the 0th block's dirents */
bh2 = ext4_append(handle, dir, &block);
@@ -1969,19 +1973,26 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
}

/* Initialize the root; the dot dirents already exist */
- de = (struct ext4_dir_entry_2 *) (&root->dotdot);
- de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
- blocksize);
- memset (&root->info, 0, sizeof(root->info));
- root->info.info_length = sizeof(root->info);
- root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
- entries = root->entries;
+ dotdot_de->rec_len =
+ ext4_rec_len_to_disk(blocksize - le16_to_cpu(dot_de->rec_len),
+ blocksize);
+
+ /* initialize hashing info */
+ dx_info = dx_get_dx_info(dot_de);
+ memset(dx_info, 0, sizeof(*dx_info));
+ dx_info->info_length = sizeof(*dx_info);
+ dx_info->hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+
+ entries = (void *)dx_info + sizeof(*dx_info);
+
dx_set_block(entries, 1);
dx_set_count(entries, 1);
- dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
+ dx_set_limit(entries, dx_root_limit(dir, (struct ext4_dir_entry_2 *)
+ frame->bh->b_data,
+ sizeof(*dx_info)));

/* Initialize as for dx_probe */
- fname->hinfo.hash_version = root->info.hash_version;
+ fname->hinfo.hash_version = dx_info->hash_version;
if (fname->hinfo.hash_version <= DX_HASH_TEA)
fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
@@ -2252,7 +2263,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
goto journal_error;
}
} else {
- struct dx_root *dxroot;
+ struct dx_root_info *info;
memcpy((char *) entries2, (char *) entries,
icount * sizeof(struct dx_entry));
dx_set_limit(entries2, dx_node_limit(dir));
@@ -2260,8 +2271,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
/* Set up root */
dx_set_count(entries, 1);
dx_set_block(entries + 0, newblock);
- dxroot = (struct dx_root *)frames[0].bh->b_data;
- dxroot->info.indirect_levels += 1;
+ info = dx_get_dx_info((struct ext4_dir_entry_2 *)
+ frames[0].bh->b_data);
+ info->indirect_levels += 1;
dxtrace(printk(KERN_DEBUG
"Creating %d level index...\n",
info->indirect_levels));
--
2.13.6 (Apple Git-96)

2017-11-30 22:17:07

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v3 1/3] ext4: Removes static definition of dx_root struct

On Nov 30, 2017, at 8:17 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Yang Sheng <[email protected]>
>
> Removes static definition of dx_root struct. so that "." and ".." dirent
> can have extra data. This patch does not change any functionality but is
> required for ext4_data_in_dirent patch.
>
> Signed-off-by: Yang Sheng <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> fs/ext4/namei.c | 130 +++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 71 insertions(+), 59 deletions(-)
>
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index c1cf020d1889..7c649cf2b630 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -193,23 +193,13 @@ struct dx_entry
> * dirent the two low bits of the hash version will be zero. Therefore, the
> * hash version mod 4 should never be 0. Sincerely, the paranoia department.
> */
> -
> -struct dx_root
> +struct dx_root_info
> {
> - struct fake_dirent dot;
> - char dot_name[4];
> - struct fake_dirent dotdot;
> - char dotdot_name[4];
> - struct dx_root_info
> - {
> - __le32 reserved_zero;
> - u8 hash_version;
> - u8 info_length; /* 8 */
> - u8 indirect_levels;
> - u8 unused_flags;
> - }
> - info;
> - struct dx_entry entries[0];
> + __le32 reserved_zero;
> + u8 hash_version;
> + u8 info_length; /* 8 */
> + u8 indirect_levels;
> + u8 unused_flags;
> };
>
> struct dx_node
> @@ -521,6 +511,17 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
> entry->block = cpu_to_le32(value);
> }
>
> +struct dx_root_info *dx_get_dx_info(struct ext4_dir_entry_2 *de)
> +{
> + /* get dotdot first */
> + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_NAME_LEN(1));
> +
> + /* dx root info is after dotdot entry */
> + de = (struct ext4_dir_entry_2 *)((char *)de + EXT4_DIR_NAME_LEN(2));
> +
> + return (struct dx_root_info *)de;
> +}
> +
> static inline unsigned dx_get_hash(struct dx_entry *entry)
> {
> return le32_to_cpu(entry->hash);
> @@ -744,17 +745,17 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> if (IS_ERR(frame->bh))
> return (struct dx_frame *) frame->bh;
>
> - root = (struct dx_root *) frame->bh->b_data;
> - if (root->info.hash_version != DX_HASH_TEA &&
> - root->info.hash_version != DX_HASH_HALF_MD4 &&
> - root->info.hash_version != DX_HASH_LEGACY) {
> - ext4_warning_inode(dir, "Unrecognised inode hash code %u",
> - root->info.hash_version);
> + info = dx_get_dx_info((struct ext4_dir_entry_2 *)frame->bh->b_data);
> + if (info->hash_version != DX_HASH_TEA &&
> + info->hash_version != DX_HASH_HALF_MD4 &&
> + info->hash_version != DX_HASH_LEGACY) {
> + ext4_warning(dir->i_sb, "inode #%lu: unrecognised hash code %u",
> + dir->i_ino, info->hash_version);

This is probably a conflict from the older version of the dx_root patch with
newer error messages. The new ext4_warning_inode() message should be kept,
and just the "root->info." usage replaced by "info->".

> @@ -762,18 +763,18 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
>
> - if (root->info.unused_flags & 1) {
> - ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
> - root->info.unused_flags);
> + if (info->unused_flags & 1) {
> + ext4_warning_inode(dir,
> + "inode #%lu: Unimplemented hash flags: %#06x",
> + dir->i_ino, info->unused_flags);

Similarly, the ext4_warning_inode() message already prints "inode #%lu: " at
the start, so the old message should be kept and just replace "root->info.".

> if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
> ext4_warning(dir->i_sb,
> - "Directory (ino: %lu) htree depth %#06x exceed"
> - "supported value", dir->i_ino,
> - ext4_dir_htree_level(dir->i_sb));
> + "inode #%lu unimplemented hash depth %#06x",
> + dir->i_ino, ext4_dir_htree_level(dir->i_sb));

This should also be using ext4_warning_inode() and drop the "inode #%lu"
at the start, leaving "directory htree depth %u exceeds supported value %u",
and also print ext4_dir_htree_level(dir->i_sb) at the end. It isn't clear
why the depth was printed in hex previously, that doesn't make sense.

> @@ -781,14 +782,17 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
>
> if (dx_get_limit(entries) != dx_root_limit(dir,

It would make sense to move "dx_root_limit(dir," to the next line, aligned
after 'if (', so that it is not wrapping so close to the end.

> - root->info.info_length)) {
> - ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
> - dx_get_limit(entries),
> - dx_root_limit(dir, root->info.info_length));
> + (struct ext4_dir_entry_2 *) frame->bh->b_data,
> + info->info_length)) {
> + ext4_warning(dir->i_sb, "dx entry: limit != root limit "
> + "inode #%lu: dx entry: limit %u != root limit %u",
> + dir->i_ino, dx_get_limit(entries),
> + dx_root_limit(dir, (struct ext4_dir_entry_2 *)
> + frame->bh->b_data,
> + info->info_length));

Use the old message with ext4_warning_inode(), which doesn't have the
duplicate "dx entry:" part, and doesn't need the "inode #%lu: " anymore.

Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-12-05 01:02:23

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v3 2/3] ext4: dirdata feature


> On Nov 30, 2017, at 8:17 AM, Artem Blagodarenko <[email protected]> wrote:
>
> From: Andreas Dilger <[email protected]>
>
> This patch implements feature which allows ext4 fs users (e.g. Lustre)
> to store data in ext4 dirent. Data is stored in ext4 dirent after
> file-name, this space is accounted in de->rec_len.
> Flag EXT4_DIRENT_LUFID added to d_type if extra data
> is present.
>
> Make use of dentry->d_fsdata to pass fid to ext4. so no
> changes in ext4_add_entry() interface required.
>
> Signed-off-by: Andreas Dilger <[email protected]>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> fs/ext4/dir.c | 17 ++++---
> fs/ext4/ext4.h | 99 ++++++++++++++++++++++++++++++++++++++---
> fs/ext4/inline.c | 18 ++++----
> fs/ext4/namei.c | 132 ++++++++++++++++++++++++++++++++++++++++++-------------
> fs/ext4/super.c | 3 +-
> 5 files changed, 218 insertions(+), 51 deletions(-)
>
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index b04e882179c6..0c4dddb0f07a 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -67,11 +67,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
> const int rlen = ext4_rec_len_from_disk(de->rec_len,
> dir->i_sb->s_blocksize);
>
> - if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
> + if (unlikely(rlen < EXT4_DIR_NAME_LEN(1)))
> error_msg = "rec_len is smaller than minimal";
> else if (unlikely(rlen % 4 != 0))
> error_msg = "rec_len % 4 != 0";
> - else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
> + else if (unlikely(rlen < EXT4_DIR_REC_LEN(de)))
> error_msg = "rec_len is too small for name_len";
> else if (unlikely(((char *) de - buf) + rlen > size))
> error_msg = "directory entry across range";
> @@ -218,7 +218,8 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
> * failure will be detected in the
> * dirent test below. */
> if (ext4_rec_len_from_disk(de->rec_len,
> - sb->s_blocksize) < EXT4_DIR_REC_LEN(1))
> + sb->s_blocksize) <

This could be aligned after second '(' on previous line.

> + EXT4_DIR_NAME_LEN(1))

This should be aligned after 'if ('

> @@ -441,12 +442,18 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
> struct fname *fname, *new_fn;
> struct dir_private_info *info;
> int len;
> + int extra_data = 0;
>
> info = dir_file->private_data;
> p = &info->root.rb_node;
>
> /* Create and allocate the fname structure */
> - len = sizeof(struct fname) + ent_name->len + 1;
> + if (dirent->file_type & ~EXT4_FT_MASK)
> + extra_data = ext4_get_dirent_data_len(dirent);
> +
> + len = sizeof(struct fname) + dirent->name_len + extra_data + 1;

> +
> +

Remove extra blank line here.

> new_fn = kzalloc(len, GFP_KERNEL);
> if (!new_fn)
> return -ENOMEM;
> @@ -455,7 +462,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
> new_fn->inode = le32_to_cpu(dirent->inode);
> new_fn->name_len = ent_name->len;
> new_fn->file_type = dirent->file_type;
> - memcpy(new_fn->name, ent_name->name, ent_name->len);
> + memcpy(new_fn->name, ent_name->name, ent_name->len + extra_data);
> new_fn->name[ent_name->len] = 0;
>
> while (*p) {
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index e2abe01c8c6b..3678657d8e47 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1111,6 +1111,7 @@ struct ext4_inode_info {
> * Mount flags set via mount options or defaults
> */
> #define EXT4_MOUNT_NO_MBCACHE 0x00001 /* Do not use mbcache */
> +#define EXT4_MOUNT_DIRDATA 0x00002 /* Data in directory entries*/
> #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
> #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
> #define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
> @@ -1804,7 +1805,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
> EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
> EXT4_FEATURE_INCOMPAT_ENCRYPT | \
> EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
> - EXT4_FEATURE_INCOMPAT_LARGEDIR)
> + EXT4_FEATURE_INCOMPAT_LARGEDIR | \
> + EXT4_FEATURE_INCOMPAT_DIRDATA)
> #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
> EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
> EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
> @@ -1965,6 +1967,56 @@ struct ext4_dir_entry_tail {
>
> #define EXT4_FT_DIR_CSUM 0xDE
>
> +#define EXT4_FT_MASK 0xf
> +
> +#if EXT4_FT_MAX > EXT4_FT_MASK
> +#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
> +#endif

> +
> +/*
> + * d_type has 4 unused bits, so it can hold four types data. these different
> + * type of data (e.g. lustre data, high 32 bits of 64-bit inode number) can be
> + * stored, in flag order, after file-name in ext4 dirent.
> + */
> +/*
> + * this flag is added to d_type if ext4 dirent has extra data after
> + * filename. this data length is variable and length is stored in first byte
> + * of data. data start after filename NUL byte.
> + * This is used by Lustre FS.
> + */
> +#define EXT4_DIRENT_LUFID 0x10
> +#define EXT4_DIRENT_INODE 0x20
> +#define DIRENT_INODE_LEN 2

Shouldn't DIRENT_INODE_LEN be 4 bytes? This should probably be added in the
next patch in any case.

> +
> +#define EXT4_LUFID_MAGIC 0xAD200907UL
> +
> +struct ext4_dirent_data_header {
> + /* length of this header + the whole data blob */
> + __u8 ddh_length;
> +} __packed;
> +
> +struct ext4_dirent_lufid {
> + struct ext4_dirent_data_header dl_header; /* 1 + 16n */
> + __u8 dl_data[0];
> +} __packed;
> +
> +struct ext4_dentry_param {
> + __u32 edp_magic; /* EXT4_LUFID_MAGIC */
> + struct ext4_dirent_lufid edp_lufid;
> +} __packed;
> +
> +static inline struct ext4_dirent_data_header *
> + ext4_dentry_get_data(struct super_block *sb,

IMHO, this declaration would be formatted better like:

static inline
struct ext4_dirent_data_header *ext4_dentry_get_data(struct super_block *sb,
struct ext4_dentry_param *p)

> +{
> + if (!ext4_has_feature_dirdata(sb))
> + return NULL;
> + if (p && p->edp_magic == EXT4_LUFID_MAGIC)
> + return &p->edp_lufid.dl_header;
> + else
> + return NULL;
> +}
> +
> /*
> * EXT4_DIR_PAD defines the directory entries boundaries
> *
> @@ -1972,8 +2024,14 @@ struct ext4_dir_entry_tail {
> */
> #define EXT4_DIR_PAD 4
> #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
> -#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
> +
> +/* the name + inode data without any extra dirdata */

two spaces before "any"

> +#define EXT4_DIR_NAME_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
> ~EXT4_DIR_ROUND)
> +/* the total size of the dirent including any extra data */

... extra dirdata

> +#define EXT4_DIR_REC_LEN(de) (EXT4_DIR_NAME_LEN(de->name_len +\
> + ext4_get_dirent_data_len(de)))
> +
> #define EXT4_MAX_REC_LEN ((1<<16)-1)
>
> /*
> @@ -2376,7 +2434,10 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
> struct buffer_head *bh,
> void *buf, int buf_size,
> struct ext4_filename *fname,
> - struct ext4_dir_entry_2 **dest_de);
> + struct ext4_dir_entry_2 **dest_de,
> + bool is_dotdot,
> + bool *write_short_dotdot,
> + unsigned short dotdot_reclen);
> void ext4_insert_dentry(struct inode *inode,
> struct ext4_dir_entry_2 *de,
> int buf_size,
> @@ -2392,10 +2453,16 @@ static const unsigned char ext4_filetype_table[] = {
>
> static inline unsigned char get_dtype(struct super_block *sb, int filetype)
> {
> - if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX)
> + int fl_index = filetype & EXT4_FT_MASK;
> +
> + if (!ext4_has_feature_filetype(sb) || fl_index >= EXT4_FT_MAX)
> return DT_UNKNOWN;
>
> - return ext4_filetype_table[filetype];
> + if (!test_opt(sb, DIRDATA))
> + return (ext4_filetype_table[fl_index]);
> +
> + return (ext4_filetype_table[fl_index]) |
> + (filetype & ~EXT4_FT_MASK);
> }
> extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
> void *buf, int buf_size);
> @@ -3271,6 +3338,28 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
>
> extern const struct iomap_ops ext4_iomap_ops;
>
> +/*
> + * Compute the total directory entry data length.
> + * This includes the filename and an implicit NUL terminator (always present),
> + * and optional extensions. Each extension has a bit set in the high 4 bits of
> + * de->file_type, and the extension length is the first byte in each entry.
> + */
> +static inline int ext4_get_dirent_data_len(struct ext4_dir_entry_2 *de)
> +{
> + char *len = de->name + de->name_len + 1 /* NUL terminator */;

I think what Darrick had intended here was to cast the dirdata into struct
ext4_dirent_data_header so that "*len" was not being used directly:

struct ext4_dirent_data_header *ddh = (void *)(de->name + de->name_len + 1);

> + int dlen = 0;
> + __u8 extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
> +
> + while (extra_data_flags) {
> + if (extra_data_flags & 1) {
> + dlen += *len + (dlen == 0);
> + len += *len;
> + }

Then ddh->ddh_length is accessed here instead of "*len" like, maybe with
a helper like:

#define ext4_dirdata_next(ddh) \
(struct ext4_dirent_data_header *)((char *)ddh + ddh->ddh_length)

if (extra_data_flags & 1) {
dlen += ddh->ddh_length + (dlen == 0);
ddh = ext4_dirdata_next(ddh);
}

> + extra_data_flags >>= 1;
> + }
> + return dlen;
> +}
> +
> #endif /* __KERNEL__ */
>
> #define EFSBADCRC EBADMSG /* Bad CRC detected */
> diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
> index 28c5c3abddb3..666891dc03cd 100644
> --- a/fs/ext4/inline.c
> +++ b/fs/ext4/inline.c
> @@ -1026,7 +1026,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
> struct ext4_dir_entry_2 *de;
>
> err = ext4_find_dest_de(dir, inode, iloc->bh, inline_start,
> - inline_size, fname, &de);
> + inline_size, fname, &de, 0, NULL, 0);
> if (err)
> return err;
>
> @@ -1103,7 +1103,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
> int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
> int new_size = get_max_inline_xattr_value_size(dir, iloc);
>
> - if (new_size - old_size <= EXT4_DIR_REC_LEN(1))
> + if (new_size - old_size <= EXT4_DIR_NAME_LEN(1))
> return -ENOSPC;
>
> ret = ext4_update_inline_data(handle, dir,
> @@ -1384,8 +1384,8 @@ int htree_inlinedir_to_tree(struct file *dir_file,
> fake.name_len = 1;
> strcpy(fake.name, ".");
> fake.rec_len = ext4_rec_len_to_disk(
> - EXT4_DIR_REC_LEN(fake.name_len),
> - inline_size);
> + EXT4_DIR_NAME_LEN(fake.name_len),
> + inline_size);
> ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
> de = &fake;
> pos = EXT4_INLINE_DOTDOT_OFFSET;
> @@ -1394,8 +1394,8 @@ int htree_inlinedir_to_tree(struct file *dir_file,
> fake.name_len = 2;
> strcpy(fake.name, "..");
> fake.rec_len = ext4_rec_len_to_disk(
> - EXT4_DIR_REC_LEN(fake.name_len),
> - inline_size);
> + EXT4_DIR_NAME_LEN(fake.name_len),
> + inline_size);
> ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
> de = &fake;
> pos = EXT4_INLINE_DOTDOT_SIZE;
> @@ -1492,8 +1492,8 @@ int ext4_read_inline_dir(struct file *file,
> * So we will use extra_offset and extra_size to indicate them
> * during the inline dir iteration.
> */
> - dotdot_offset = EXT4_DIR_REC_LEN(1);
> - dotdot_size = dotdot_offset + EXT4_DIR_REC_LEN(2);
> + dotdot_offset = EXT4_DIR_NAME_LEN(1);
> + dotdot_size = dotdot_offset + EXT4_DIR_NAME_LEN(2);
> extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
> extra_size = extra_offset + inline_size;
>
> @@ -1528,7 +1528,7 @@ int ext4_read_inline_dir(struct file *file,
> * failure will be detected in the
> * dirent test below. */
> if (ext4_rec_len_from_disk(de->rec_len, extra_size)
> - < EXT4_DIR_REC_LEN(1))
> + < EXT4_DIR_NAME_LEN(1))
> break;
> i += ext4_rec_len_from_disk(de->rec_len,
> extra_size);
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 7c649cf2b630..67edab5572d8 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -239,7 +239,8 @@ static unsigned dx_get_count(struct dx_entry *entries);
> static unsigned dx_get_limit(struct dx_entry *entries);
> static void dx_set_count(struct dx_entry *entries, unsigned value);
> static void dx_set_limit(struct dx_entry *entries, unsigned value);
> -static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
> +static inline unsigned int dx_root_limit(struct inode *dir,
> + struct ext4_dir_entry_2 *dot_de, unsigned int infosize);
> static unsigned dx_node_limit(struct inode *dir);
> static struct dx_frame *dx_probe(struct ext4_filename *fname,
> struct inode *dir,
> @@ -552,10 +553,15 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
> ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
> }
>
> -static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
> +static inline unsigned int dx_root_limit(struct inode *dir,
> + struct ext4_dir_entry_2 *dot_de, unsigned int infosize)
> {
> - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
> - EXT4_DIR_REC_LEN(2) - infosize;
> + struct ext4_dir_entry_2 *dotdot_de;
> + unsigned int entry_space;
> +
> + dotdot_de = ext4_next_entry(dot_de, dir->i_sb->s_blocksize);
> + entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(dot_de) -
> + EXT4_DIR_REC_LEN(dotdot_de) - infosize;
>
> if (ext4_has_metadata_csum(dir->i_sb))
> entry_space -= sizeof(struct dx_tail);
> @@ -564,7 +570,8 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
>
> static inline unsigned dx_node_limit(struct inode *dir)
> {
> - unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
> + unsigned int entry_space = dir->i_sb->s_blocksize -
> + EXT4_DIR_NAME_LEN(0);
>
> if (ext4_has_metadata_csum(dir->i_sb))
> entry_space -= sizeof(struct dx_tail);
> @@ -676,7 +683,7 @@ static struct stats dx_show_leaf(struct inode *dir,
> (unsigned) ((char *) de - base));
> #endif
> }
> - space += EXT4_DIR_REC_LEN(de->name_len);
> + space += EXT4_DIR_REC_LEN(de);
> names++;
> }
> de = ext4_next_entry(de, size);
> @@ -984,7 +991,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
> de = (struct ext4_dir_entry_2 *) bh->b_data;
> top = (struct ext4_dir_entry_2 *) ((char *) de +
> dir->i_sb->s_blocksize -
> - EXT4_DIR_REC_LEN(0));
> + EXT4_DIR_NAME_LEN(0));
> #ifdef CONFIG_EXT4_FS_ENCRYPTION
> /* Check if the directory is encrypted */
> if (ext4_encrypted_inode(dir)) {
> @@ -1567,6 +1574,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
> inode = NULL;
> if (bh) {
> __u32 ino = le32_to_cpu(de->inode);
> +
> brelse(bh);
> if (!ext4_valid_inum(dir->i_sb, ino)) {
> EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
> @@ -1635,7 +1643,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
> while (count--) {
> struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
> (from + (map->offs<<2));
> - rec_len = EXT4_DIR_REC_LEN(de->name_len);
> + rec_len = EXT4_DIR_REC_LEN(de);
> memcpy (to, de, rec_len);
> ((struct ext4_dir_entry_2 *) to)->rec_len =
> ext4_rec_len_to_disk(rec_len, blocksize);
> @@ -1659,7 +1667,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
> while ((char*)de < base + blocksize) {
> next = ext4_next_entry(de, blocksize);
> if (de->inode && de->name_len) {
> - rec_len = EXT4_DIR_REC_LEN(de->name_len);
> + rec_len = EXT4_DIR_REC_LEN(de);
> if (de > to)
> memmove(to, de, rec_len);
> to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
> @@ -1790,10 +1798,13 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
> struct buffer_head *bh,
> void *buf, int buf_size,
> struct ext4_filename *fname,
> - struct ext4_dir_entry_2 **dest_de)
> + struct ext4_dir_entry_2 **dest_de,
> + bool is_dotdot,
> + bool *write_short_dotdot,
> + unsigned short dotdot_reclen)
> {
> struct ext4_dir_entry_2 *de;
> - unsigned short reclen = EXT4_DIR_REC_LEN(fname_len(fname));
> + unsigned short reclen = EXT4_DIR_NAME_LEN(fname_len(fname));
> int nlen, rlen;
> unsigned int offset = 0;
> char *top;
> @@ -1806,10 +1817,28 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
> return -EFSCORRUPTED;
> if (ext4_match(fname, de))
> return -EEXIST;
> - nlen = EXT4_DIR_REC_LEN(de->name_len);
> + nlen = EXT4_DIR_REC_LEN(de);
> rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
> + /* Check first for enough space for the full entry */
> if ((de->inode ? rlen - nlen : rlen) >= reclen)
> break;
> + /* Then for dotdot entries, check for the smaller space
> + * required for just the entry, no FID
> + */
> + if (is_dotdot) {
> + if ((de->inode ? rlen - nlen : rlen) >=
> + dotdot_reclen) {
> + *write_short_dotdot = true;
> + break;
> + }
> + /* The new ".." entry mut be written over the
> + * previous ".." entry, which is the first
> + * entry traversed by this scan. If it doesn't
> + * fit, something is badly wrong, so -EIO.
> + */
> + return -EIO;
> + }
> +
> de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
> offset += rlen;
> }
> @@ -1828,7 +1857,8 @@ void ext4_insert_dentry(struct inode *inode,
>
> int nlen, rlen;
>
> - nlen = EXT4_DIR_REC_LEN(de->name_len);
> + nlen = EXT4_DIR_REC_LEN(de);
> +
> rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
> if (de->inode) {
> struct ext4_dir_entry_2 *de1 =
> @@ -1852,21 +1882,46 @@ void ext4_insert_dentry(struct inode *inode,
> * space. It will return -ENOSPC if no space is available, and -EIO
> * and -EEXIST if directory entry already exists.
> */
> -static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
> +static int add_dirent_to_buf(handle_t *handle,
> + struct dentry *dentry,
> + struct ext4_filename *fname,
> struct inode *dir,
> struct inode *inode, struct ext4_dir_entry_2 *de,
> struct buffer_head *bh)
> {
> unsigned int blocksize = dir->i_sb->s_blocksize;
> int csum_size = 0;
> - int err;
> + unsigned short reclen, dotdot_reclen = 0;
> + int err, dlen = 0;
> + bool is_dotdot = false, write_short_dotdot = false;
> + struct ext4_dirent_data_header *ddh;
> + int namelen = dentry->d_name.len;
>
> if (ext4_has_metadata_csum(inode->i_sb))
> csum_size = sizeof(struct ext4_dir_entry_tail);
>
> + ddh = ext4_dentry_get_data(inode->i_sb, (struct ext4_dentry_param *)
> + dentry->d_fsdata);
> + if (ddh)
> + dlen = ddh->ddh_length + 1 /* NUL separator */;
> +
> + is_dotdot = (namelen == 2 &&
> + memcmp(dentry->d_name.name, "..", 2) == 0);
> +
> + /* dotdot entries must be in the second place in a directory block,
> + * so calculate an alternate length without the dirdata so they can
> + * always be made to fit in the existing slot
> + */
> + if (is_dotdot)
> + dotdot_reclen = EXT4_DIR_NAME_LEN(namelen);
> +
> + reclen = EXT4_DIR_NAME_LEN(namelen + dlen + 3);
> +
> if (!de) {
> err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
> - blocksize - csum_size, fname, &de);
> + blocksize - csum_size, fname, &de,
> + is_dotdot,
> + &write_short_dotdot, dotdot_reclen);
> if (err)
> return err;
> }
> @@ -1880,6 +1935,24 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
> /* By now the buffer is marked for journaling */
> ext4_insert_dentry(inode, de, blocksize, fname);
>
> + /* If we're writing short form of "dotdot", don't add data section */
> + if (ddh && !write_short_dotdot) {
> + de->name[namelen] = 0;
> + memcpy(&de->name[namelen + 1], ddh, ddh->ddh_length);
> + de->file_type |= EXT4_DIRENT_LUFID;
> + data_offset = ddh->ddh_length;

I don't see where "data_offset" is declared? It looks like that is in the next
patch.

> + }
> +
> + if (inode) {


This whole part handling "i_ino_hi" should go into the next patch?

> + __u32 *i_ino_hi;
> +
> + de->name[namelen + 1 + data_offset] = 5;

> + i_ino_hi = (__u32 *)&de->name[namelen + 1 + data_offset + 1];
> + *i_ino_hi = cpu_to_le32((__u32)(inode->i_ino >> 32));


> @@ -1976,20 +2049,17 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,

> dotdot_de->rec_len =
> ext4_rec_len_to_disk(blocksize - le16_to_cpu(dot_de->rec_len),
> blocksize);
> -
> /* initialize hashing info */
> dx_info = dx_get_dx_info(dot_de);
> memset(dx_info, 0, sizeof(*dx_info));
> dx_info->info_length = sizeof(*dx_info);
> dx_info->hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
> -
> entries = (void *)dx_info + sizeof(*dx_info);
> -

Not sure why these blank lines are being removed?

> dx_set_block(entries, 1);
> dx_set_count(entries, 1);
> - dx_set_limit(entries, dx_root_limit(dir, (struct ext4_dir_entry_2 *)
> - frame->bh->b_data,
> - sizeof(*dx_info)));
> + dx_set_limit(entries, dx_root_limit(dir,
> + (struct ext4_dir_entry_2 *)frame->bh->b_data,
> + sizeof(*dx_info)));
>
> /* Initialize as for dx_probe */
> fname->hinfo.hash_version = dx_info->hash_version;
> @@ -2017,7 +2087,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
> goto out_frames;
> }
>
> - retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
> + retval = add_dirent_to_buf(handle, NULL, fname, dir, inode, de, bh2);
> out_frames:
> /*
> * Even if the block split failed, we have to properly write
> @@ -2094,7 +2164,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
> bh = NULL;
> goto out;
> }
> - retval = add_dirent_to_buf(handle, &fname, dir, inode,
> + retval = add_dirent_to_buf(handle, dentry, &fname, dir, inode,
> NULL, bh);
> if (retval != -ENOSPC)
> goto out;
> @@ -2123,7 +2193,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
> initialize_dirent_tail(t, blocksize);
> }
>
> - retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
> + retval = add_dirent_to_buf(handle, dentry, &fname, dir, inode, de, bh);
> out:
> ext4_fname_free_filename(&fname);
> brelse(bh);
> @@ -2165,7 +2235,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
> if (err)
> goto journal_error;
>
> - err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
> + err = add_dirent_to_buf(handle, NULL, fname, dir, inode, NULL, bh);
> if (err != -ENOSPC)
> goto cleanup;
>
> @@ -2291,7 +2361,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
> err = PTR_ERR(de);
> goto cleanup;
> }
> - err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
> + err = add_dirent_to_buf(handle, NULL, fname, dir, inode, de, bh);
> goto cleanup;
>
> journal_error:
> @@ -2557,7 +2627,7 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
> {
> de->inode = cpu_to_le32(inode->i_ino);
> de->name_len = 1;
> - de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
> + de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de),
> blocksize);
> strcpy(de->name, ".");
> ext4_set_de_type(inode->i_sb, de, S_IFDIR);
> @@ -2567,11 +2637,11 @@ struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
> de->name_len = 2;
> if (!dotdot_real_len)
> de->rec_len = ext4_rec_len_to_disk(blocksize -
> - (csum_size + EXT4_DIR_REC_LEN(1)),
> + (csum_size + EXT4_DIR_NAME_LEN(1)),
> blocksize);
> else
> de->rec_len = ext4_rec_len_to_disk(
> - EXT4_DIR_REC_LEN(de->name_len), blocksize);
> + EXT4_DIR_REC_LEN(de), blocksize);
> strcpy(de->name, "..");
> ext4_set_de_type(inode->i_sb, de, S_IFDIR);
>
> @@ -2700,7 +2770,7 @@ bool ext4_empty_dir(struct inode *inode)
> }
>
> sb = inode->i_sb;
> - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
> + if (inode->i_size < EXT4_DIR_NAME_LEN(1) + EXT4_DIR_NAME_LEN(2)) {
> EXT4_ERROR_INODE(inode, "invalid size");
> return true;
> }
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index b0915b734a38..ead9406d9cff 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1339,7 +1339,7 @@ enum {
> Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
> Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
> Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
> - Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
> + Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_dirdata,
> Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
> Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
> Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
> @@ -1400,6 +1400,7 @@ static const match_table_t tokens = {
> {Opt_noquota, "noquota"},
> {Opt_quota, "quota"},
> {Opt_usrquota, "usrquota"},
> + {Opt_dirdata, "dirdata"},
> {Opt_prjquota, "prjquota"},
> {Opt_barrier, "barrier=%u"},
> {Opt_barrier, "barrier"},
> --
> 2.13.6 (Apple Git-96)
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP

2017-12-05 21:27:06

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH v3 3/3] ext4: Add 64-bit inode number support

On Nov 30, 2017, at 8:17 AM, Artem Blagodarenko <[email protected]> wrote:
>
> Use dirdata to store high bits of 64bit inode
> number.
>
> Signed-off-by: Artem Blagodarenko <[email protected]>
> ---
> fs/ext4/dir.c | 4 +--
> fs/ext4/ext4.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++----------
> fs/ext4/ialloc.c | 19 ++++++++-----
> fs/ext4/inode.c | 5 ++++
> fs/ext4/namei.c | 53 ++++++++++++++++++++++++++++-------
> fs/ext4/resize.c | 8 +++---
> fs/ext4/super.c | 14 +++++++---
> 7 files changed, 147 insertions(+), 41 deletions(-)
>
> diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
> index 0c4dddb0f07a..d971bc68903c 100644
> --- a/fs/ext4/dir.c
> +++ b/fs/ext4/dir.c
> @@ -76,7 +76,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
> else if (unlikely(((char *) de - buf) + rlen > size))
> error_msg = "directory entry across range";
> else if (unlikely(le32_to_cpu(de->inode) >
> - le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
> + ext4_get_inodes_count(dir->i_sb)))
> @@ -382,7 +382,7 @@ struct fname {
> __u32 minor_hash;
> struct rb_node rb_hash;
> struct fname *next;
> - __u32 inode;
> + __u64 inode;
> __u8 name_len;
> __u8 file_type;
> char name[0];
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3678657d8e47..18e30589b704 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1331,7 +1331,10 @@ struct ext4_super_block {
> __le32 s_lpf_ino; /* Location of the lost+found inode */
> __le32 s_prj_quota_inum; /* inode for tracking project quota */
> __le32 s_checksum_seed; /* crc32c(uuid) if csum_seed set */
> - __le32 s_reserved[98]; /* Padding to the end of the block */
> + __le32 s_inodes_count_hi; /* higth part of inode count */
> + __le32 s_free_inodes_count_hi; /* Free inodes count */
> + __le32 s_prj_quota_inum_hi; /* high part of project quota inode */
> + __le32 s_reserved[95]; /* Padding to the end of the block */

What about s_last_orphan, s_first_error_ino, and s_last_error_ino?

> __le32 s_checksum; /* crc32c(superblock) */
> };
>
> @@ -1539,18 +1542,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
> return container_of(inode, struct ext4_inode_info, vfs_inode);
> }
>
> -static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
> -{
> - return ino == EXT4_ROOT_INO ||
> - ino == EXT4_USR_QUOTA_INO ||
> - ino == EXT4_GRP_QUOTA_INO ||
> - ino == EXT4_BOOT_LOADER_INO ||
> - ino == EXT4_JOURNAL_INO ||
> - ino == EXT4_RESIZE_INO ||
> - (ino >= EXT4_FIRST_INO(sb) &&
> - ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
> -}
> -
> /*
> * Inode dynamic state flags
> */
> @@ -1689,6 +1680,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
> #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
> #define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
> #define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
> +#define EXT4_FEATURE_INCOMPAT_INODE64 0x20000
>
> #define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
> static inline bool ext4_has_feature_##name(struct super_block *sb) \
> @@ -1777,6 +1769,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(csum_seed, CSUM_SEED)
> EXT4_FEATURE_INCOMPAT_FUNCS(largedir, LARGEDIR)
> EXT4_FEATURE_INCOMPAT_FUNCS(inline_data, INLINE_DATA)
> EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
> +EXT4_FEATURE_INCOMPAT_FUNCS(inode64, INODE64)
> +
>
> #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
> #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
> @@ -1805,6 +1799,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt, ENCRYPT)
> EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
> EXT4_FEATURE_INCOMPAT_ENCRYPT | \
> EXT4_FEATURE_INCOMPAT_CSUM_SEED | \
> + EXT4_FEATURE_INCOMPAT_INODE64 | \
> EXT4_FEATURE_INCOMPAT_LARGEDIR | \
> EXT4_FEATURE_INCOMPAT_DIRDATA)
> #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
> @@ -2000,6 +1995,11 @@ struct ext4_dirent_lufid {
> __u8 dl_data[0];
> } __packed;
>
> +struct ext4_dirent_inohi {
> + struct ext4_dirent_data_header di_header; /* 1 + 4 */
> + __le32 di_inohi;
> +} __packed;

It would be better to keep the struct name "ext4_dirent_inode64" to match
the feature name?

I also notice that you need to update "s_inode_goal" to be "unsigned long",
so that you can easily test inode allocations beyond 2^32. You also need
to fix the ext4_ext_migrate() function to have a __u64 goal.

There is an interaction with the xattr_inode feature that needs to be fixed.
The e_value_inum field is only a __u32, and there doesn't appear to be any
space to hold the high 32 bits of the inode number therein. One option (not
sure what you think of this) is to always use the same __u32 i_ino_hi for the
xattr inode as for the regular inode that references it? That is OK for
unshared xattrs, and for shared inodes it would mean one shared inode per 2^32
inodes by changing ext4_xattr_inode_cache_find() to only find an existing cached
entry if they have the same high 32 bits in the inode number.

There also looks to be some interaction with inline.c::htree_inlinedir_to_tree()
storing the inode number into the "fake" dirent, but it isn't handling 64-bit
inodes. Similarly, ext4_try_create_inline_dir() and ext4_add_dirent_to_inline()
need some attention.

> @@ -2476,7 +2476,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
>
> /* ialloc.c */
> extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
> - const struct qstr *qstr, __u32 goal,
> + const struct qstr *qstr, __u64 goal,
> uid_t *owner, __u32 i_flags,
> int handle_type, unsigned int line_no,
> int nblocks);
> @@ -2903,6 +2903,63 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
> return 1 << sbi->s_log_groups_per_flex;
> }
>
> +static inline unsigned long ext4_get_inodes_count(struct super_block *sb)
> +{
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> + unsigned long inodes_count = le32_to_cpu(es->s_inodes_count);
> +
> + if (ext4_has_feature_inode64(sb))
> + inodes_count |=
> + (unsigned long)le32_to_cpu(es->s_inodes_count_hi)
> + << 32;
> + return inodes_count;
> +}
> +
> +static inline void ext4_set_inodes_count(struct super_block *sb,
> + unsigned long val)
> +{
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> +
> + if (ext4_has_feature_inode64(sb))
> + es->s_inodes_count_hi = cpu_to_le32(val >> 32);
> +
> + es->s_inodes_count = cpu_to_le32(val);
> +}
> +
> +static inline unsigned long ext4_get_free_inodes_count(struct super_block *sb)
> +{
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> + unsigned long inodes_count = le32_to_cpu(es->s_free_inodes_count);
> +
> + if (ext4_has_feature_inode64(sb))
> + inodes_count |=
> + (unsigned long)le32_to_cpu(es->s_free_inodes_count_hi)
> + << 32;
> + return inodes_count;
> +}
> +
> +static inline void ext4_set_free_inodes_count(struct super_block *sb,
> + unsigned long val)
> +{
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> +
> + if (ext4_has_feature_inode64(sb))
> + es->s_free_inodes_count_hi = cpu_to_le32(val >> 32);
> +
> + es->s_free_inodes_count = cpu_to_le32(val);
> +}
> +
> +static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
> +{
> + return ino == EXT4_ROOT_INO ||
> + ino == EXT4_USR_QUOTA_INO ||
> + ino == EXT4_GRP_QUOTA_INO ||
> + ino == EXT4_JOURNAL_INO ||
> + ino == EXT4_RESIZE_INO ||
> + (ino >= EXT4_FIRST_INO(sb) &&
> + ino <= ext4_get_inodes_count(sb));
> +}
> +
> #define ext4_std_error(sb, errno) \
> do { \
> if ((errno)) \
> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index ee823022aa34..e23dc4133e84 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -303,7 +303,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
> ext4_clear_inode(inode);
>
> es = EXT4_SB(sb)->s_es;
> - if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
> + if (ino < EXT4_FIRST_INO(sb) || ino > ext4_get_inodes_count(sb)) {
> ext4_error(sb, "reserved or nonexistent inode %lu", ino);
> goto error_return;
> }
> @@ -770,7 +770,7 @@ static int find_inode_bit(struct super_block *sb, ext4_group_t group,
> */
> struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
> umode_t mode, const struct qstr *qstr,
> - __u32 goal, uid_t *owner, __u32 i_flags,
> + __u64 goal, uid_t *owner, __u32 i_flags,
> int handle_type, unsigned int line_no,
> int nblocks)
> {
> @@ -887,7 +887,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
> if (!goal)
> goal = sbi->s_inode_goal;
>
> - if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
> + if (goal && goal <= ext4_get_inodes_count(sb)) {
> group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
> ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
> ret2 = 0;
> @@ -1149,6 +1149,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
> __le32 gen = cpu_to_le32(inode->i_generation);
> csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
> sizeof(inum));
> + if (inode->i_ino >> 32) {
> + inum = cpu_to_le32(inode->i_ino >> 32);
> + csum = ext4_chksum(sbi, sbi->s_csum_seed,
> + (__u8 *)&inum, sizeof(inum));
> + }
> ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
> sizeof(gen));
> }
> @@ -1226,7 +1231,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
> /* Verify that we are loading a valid orphan from disk */
> struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
> {
> - unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
> + unsigned long max_ino = ext4_get_inodes_count(sb);
> ext4_group_t block_group;
> int bit;
> struct buffer_head *bitmap_bh = NULL;
> @@ -1330,9 +1335,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
> bitmap_count += x;
> }
> brelse(bitmap_bh);
> - printk(KERN_DEBUG "ext4_count_free_inodes: "
> - "stored = %u, computed = %lu, %lu\n",
> - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
> + printk(KERN_DEBUG "ext4_count_free_inodes:\n"

This should be kept on a single line.

> + "stored = %lu, computed = %lu, %lu\n",

> + ext4_get_inodes_count(sb), desc_count, bitmap_count);
> return desc_count;
> #else
> desc_count = 0;
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 31db875bc7a1..9caefee1bce9 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4691,6 +4691,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
> __le32 gen = raw_inode->i_generation;
> csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
> sizeof(inum));
> + if (inode->i_ino >> 32) {
> + inum = cpu_to_le32(inode->i_ino >> 32);
> + csum = ext4_chksum(sbi, sbi->s_csum_seed,
> + (__u8 *)&inum, sizeof(inum));
> + }
> ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
> sizeof(gen));
> }
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 67edab5572d8..0ef33556c51d 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -1573,11 +1573,45 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
> return (struct dentry *) bh;
> inode = NULL;
> if (bh) {
> - __u32 ino = le32_to_cpu(de->inode);
> + unsigned long ino;
> +
> + ino = le32_to_cpu(de->inode);
> + if (ext4_has_feature_inode64(dir->i_sb) &&
> + (de->file_type & EXT4_DIRENT_INODE)) {
> + struct ext4_dirent_data_header *ddh =
> + (struct ext4_dirent_data_header *)
> + &de->name[de->name_len];

This should always skip the NUL separator after the name, and not make it
part of EXT4_DIRENT_LUFID:

struct ext4_dirent_data_header *ddh =
(struct ext4_dirent_data_header *)
(&de->name[de->name_len] + 1);

Otherwise, it isn't clear whether EXT4_DIRENT_INODE64 handling should
skip the NUL byte if there is no EXT4_DIRENT_LUFID record.

Should we also be checking "if ((char *)ddh > &de->name[de->rec_len])" here?

> +
> + if (de->file_type & EXT4_DIRENT_LUFID) {
> + /* skip LUFID record if present */
> + ddh = (struct ext4_dirent_data_header *)
> + &de->name[de->name_len + 1 +
> + ddh->ddh_length];

Having a "ext4_dirdata_next()" helper would be convenient here, or at least
avoid recomputing the offset:

ddh = (struct ext4_dirent_data_header *)
((char *)ddh + ddh->ddh_length);

> + }
> +
> + if ((char *)ddh > &de->name[de->rec_len]) {
> + EXT4_ERROR_INODE(dir,
> + "corrupted dirdata entry\n");
> + return ERR_PTR(-EFSCORRUPTED);
> + }

> +
> + if (ddh->ddh_length == (sizeof(__u32) + 1)) {

(defect) this should check for EXT4_DIRENT_INODE64 before checking the size,
otherwise this would accidentally catch 1/256 inodes that happen to have "05"
in the low byte.

> + __le32 ino_hi;
> + struct ext4_dirent_lufid *dlf =
> + (struct ext4_dirent_lufid *)ddh;

(defect) this should be struct ext4_dirent_inohi *di?

> +
> + memcpy(&ino_hi, dlf->dl_data, sizeof(__u32));

Then here it would be:
memcpy(&ino_hi, di->di_inohi, sizeof(ino_hi));

> + ino |= (__u64)le32_to_cpu(ino_hi) << 32;
> + } else {
> + EXT4_ERROR_INODE(dir,
> + "corrupted dirdata inode number\n");
> + return ERR_PTR(-EFSCORRUPTED);
> + }
> + }

I also saw that ext4_rename_dir_prepare(), ext4_rename_dir_finish(),
ext4_rename_delete(), ext4_cross_rename(), ext4_empty_dir(), ext4_rmdir(),
ext4_rename(), ext4_unlink(), and ext4_setent() need to be updated to
handle 64-bit inodes, since they are comparing i_ino to de->inode when
handling the directory entry. It looks like it would be useful to have
a helper function like ext4_dirent_ino(de) that extracts the 64-bit inode
from the dirent, which could be used in ext4_lookup() (it would be the whole
block of code added above that is checking for ext4_has_feature_inode64()).

It makes sense to add the ext4_dirent_ino() and ext4_{get,set}_inodes_count()
helper functions in a preliminary patch (with only 32-bit inodes) and then
add the code handling 64-bit inodes in a second patch? That would move all
the boring changes out of this patch, so it can focus on the important changes.


> @@ -1588,7 +1622,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
> inode = ext4_iget_normal(dir->i_sb, ino);
> if (inode == ERR_PTR(-ESTALE)) {
> EXT4_ERROR_INODE(dir,
> - "deleted inode referenced: %u",
> + "deleted inode referenced: %lu",
> ino);
> return ERR_PTR(-EFSCORRUPTED);
> }
> @@ -1892,7 +1926,7 @@ static int add_dirent_to_buf(handle_t *handle,
> unsigned int blocksize = dir->i_sb->s_blocksize;
> int csum_size = 0;
> unsigned short reclen, dotdot_reclen = 0;
> - int err, dlen = 0;
> + int err, dlen = 0, data_offset = 0;
> bool is_dotdot = false, write_short_dotdot = false;
> struct ext4_dirent_data_header *ddh;
> int namelen = dentry->d_name.len;
> @@ -1944,13 +1978,12 @@ static int add_dirent_to_buf(handle_t *handle,
> }
>
> if (inode) {
> - __u32 *i_ino_hi;
> -
> - de->name[namelen + 1 + data_offset] = 5;
> - i_ino_hi = (__u32 *)&de->name[namelen + 1 + data_offset + 1];
> - *i_ino_hi = cpu_to_le32((__u32)(inode->i_ino >> 32));
> - de->file_type |= EXT4_DIRENT_INODE;
> + struct ext4_dirent_inohi *di = (struct ext4_dirent_inohi *)
> + &de->name[namelen + 1 + data_offset];
> + di->di_header.ddh_length = sizeof(*di);
> + di->di_inohi = cpu_to_le32(inode->i_ino >> 32);
> de->inode = cpu_to_le32(inode->i_ino & 0xFFFFFFFF);
> + de->file_type |= EXT4_DIRENT_INODE;
> }

Since "i_ino_hi" may not be 4-byte aligned, using it directly as a pointer
may cause problems on some architectures. This should instead use memcpy()
like above:

struct ext4_dirent_inohi {
struct ext4_dirent_data_header di_header; /* 1 + 4 */
__le32 di_inohi;
} __packed;

if (inode) {
struct ext4_dirent_inohi *di;
__u32 i_ino_hi;

i_ino_hi = cpu_to_le32((__u32)(inode->i_ino >> 32));
di = (void *)&de->name[namelen + 1 + data_offset];

di->di_header.ddh_length = sizeof(*di);
memcpy(di->di_inohi, &i_ino_hi, sizeof(i_ino_hi));

> + de->file_type |= EXT4_DIRENT_INODE;
> + de->inode = cpu_to_le32(inode->i_ino & 0xFFFFFFFF);

Setting "de->inode" isn't needed, it was set in ext4_insert_dentry() above.

For consistency, this should also increment data_offset so it will be
correct if/when the next dirdata field is added.

data_offset += di->di_header.ddh_length;
}

> diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
> index 035cd3f4785e..d0d5acd1a70d 100644
> --- a/fs/ext4/resize.c
> +++ b/fs/ext4/resize.c
> @@ -1337,10 +1337,10 @@ static void ext4_update_super(struct super_block *sb,
>
> ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
> ext4_free_blocks_count_set(es, ext4_free_blocks_count(es) + free_blocks);
> - le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
> - flex_gd->count);
> - le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
> - flex_gd->count);
> + ext4_set_inodes_count(sb, ext4_get_inodes_count(sb) +
> + EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
> + ext4_set_free_inodes_count(sb, ext4_get_free_inodes_count(sb) +
> + EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
>
> ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
> /*
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index ead9406d9cff..a06252f9aada 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -3489,6 +3489,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> goto cantfind_ext4;
> }
>
> + if (ext4_has_feature_inode64(sb) &&
> + (sizeof(u64) != sizeof(unsigned long))) {
> + ext4_msg(sb, KERN_ERR, "64 bit inodes need 64 bit kernel.");

"64-bit"

> + goto failed_mount;
> + }
> +
> /* Load the checksum driver */
> if (ext4_has_feature_metadata_csum(sb) ||
> ext4_has_feature_ea_inode(sb)) {
> @@ -4248,7 +4254,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> GFP_KERNEL);
> if (!err) {
> unsigned long freei = ext4_count_free_inodes(sb);
> - sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
> + ext4_set_free_inodes_count(sb, freei);
> err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
> GFP_KERNEL);
> }
> @@ -4705,9 +4711,9 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(
> &EXT4_SB(sb)->s_freeclusters_counter)));
> if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
> - es->s_free_inodes_count =
> - cpu_to_le32(percpu_counter_sum_positive(
> - &EXT4_SB(sb)->s_freeinodes_counter));
> + ext4_set_free_inodes_count(sb,
> + cpu_to_le32(percpu_counter_sum_positive(
> + &EXT4_SB(sb)->s_freeinodes_counter)));
> BUFFER_TRACE(sbh, "marking dirty");
> ext4_superblock_csum_set(sb);
> if (sync)
> --
> 2.13.6 (Apple Git-96)
>


Cheers, Andreas






Attachments:
signature.asc (195.00 B)
Message signed with OpenPGP