Implement suport for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.
ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock bh. This might not be the first sb on the file system if
'sb=' mount option is used.
In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
- the primary superblock might not be the first superblock and so it
might not be the one used by userspace tools if read directly
off the disk.
- because the primary superblock might not be the first superblock we
potentialy have to update it as part of backup superblock update.
However the first sb location is a bit more complicated than the rest
so we have to account for that.
Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.
Signed-off-by: Lukas Czerner <[email protected]>
---
fs/ext4/ext4.h | 6 +-
fs/ext4/ioctl.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/super.c | 4 +-
3 files changed, 176 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3825195539d7..0856afb629e3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1297,6 +1297,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
+#define EXT4_LABEL_MAX 16
+
/*
* Structure of the super block
*/
@@ -1346,7 +1348,7 @@ struct ext4_super_block {
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
+/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
@@ -3109,6 +3111,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
+extern __le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 606dee9e08a3..367aa80b050d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -850,6 +850,166 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
return err;
}
+static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
+{
+ char label[EXT4_LABEL_MAX + 1];
+
+ /*
+ * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
+ * FSLABEL_MAX must include terminating null byte, while s_volume_name
+ * does not have to.
+ */
+ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
+
+ memset(label, 0, sizeof(label));
+ lock_buffer(sbi->s_sbh);
+ strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ unlock_buffer(sbi->s_sbh);
+
+ if (copy_to_user(user_label, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
+{
+ size_t len;
+ handle_t *handle;
+ ext4_group_t ngroups;
+ ext4_fsblk_t sb_block;
+ struct buffer_head *bh;
+ int ret = 0, ret2, grp;
+ unsigned long offset = 0;
+ char new_label[EXT4_LABEL_MAX + 1];
+ struct super_block *sb = file_inode(filp)->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+
+ /* Sanity check, this should never happen */
+ BUILD_BUG_ON(EXT4_LABEL_MAX > sizeof(es->s_volume_name));
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /*
+ * Copy the maximum length allowed for ext4 label with one more to
+ * find the required terminating null byte in order to test the
+ * label length. The on disk label doesn't need to be null terminated.
+ */
+ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
+ return -EFAULT;
+
+ len = strnlen(new_label, EXT4_LABEL_MAX + 1);
+ if (len > EXT4_LABEL_MAX)
+ return -EINVAL;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, EXT4_MAX_TRANS_DATA);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto err_out;
+ }
+ /* Update the primary superblock first */
+ ret = ext4_journal_get_write_access(handle, sb,
+ sbi->s_sbh,
+ EXT4_JTR_NONE);
+ if (ret)
+ goto err_journal;
+
+ lock_buffer(sbi->s_sbh);
+ memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
+ memcpy(es->s_volume_name, new_label, len);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
+
+ ret = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
+ if (ret)
+ goto err_journal;
+ sync_dirty_buffer(sbi->s_sbh);
+
+ /* Update backup superblocks */
+ ngroups = ext4_get_groups_count(sb);
+ for (grp = 0; grp < ngroups; grp++) {
+
+ if (!ext4_bg_has_super(sb, grp))
+ continue;
+
+ /*
+ * For the group 0 there is always 1k padding, so we have
+ * either adjust offset, or sb_block depending on blocksize
+ */
+ if (grp == 0) {
+ sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(sb_block, sb->s_blocksize);
+ } else {
+ sb_block = ext4_group_first_block_no(sb, grp);
+ offset = 0;
+ }
+
+ /*
+ * Skip primary superblock, it's already done. Note that the
+ * primary superblock is not always at group 0
+ */
+ if (sbi->s_sbh->b_blocknr == sb_block)
+ continue;
+
+ ret = ext4_journal_ensure_credits_fn(handle, 1,
+ EXT4_MAX_TRANS_DATA,
+ 0, 0);
+ if (ret < 0)
+ break;
+
+ bh = ext4_sb_bread(sb, sb_block, 0);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
+ break;
+ }
+
+ ext4_debug("update backup superblock %llu\n", sb_block);
+ BUFFER_TRACE(bh, "get_write_access");
+ ret = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (ret) {
+ brelse(bh);
+ break;
+ }
+
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ lock_buffer(bh);
+ if (ext4_has_metadata_csum(sb) &&
+ es->s_checksum != ext4_superblock_csum(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
+ "superblock %llu\n", sb_block);
+ unlock_buffer(bh);
+ brelse(bh);
+ ret = -EFSBADCRC;
+ break;
+ }
+ memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
+ memcpy(es->s_volume_name, new_label, len);
+ if (ext4_has_metadata_csum(sb))
+ es->s_checksum = ext4_superblock_csum(sb, es);
+ unlock_buffer(bh);
+
+ ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(ret))
+ ext4_std_error(sb, ret);
+ brelse(bh);
+ }
+
+err_journal:
+ ret2 = ext4_journal_stop(handle);
+ if (ret2 && !ret)
+ ret = ret2;
+err_out:
+ mnt_drop_write_file(filp);
+ ext4_std_error(sb, ret);
+ return ret;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1266,6 +1426,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CHECKPOINT:
return ext4_ioctl_checkpoint(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
+
+ case FS_IOC_SETFSLABEL:
+ return ext4_ioctl_setlabel(filp,
+ (const void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1347,6 +1514,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_CHECKPOINT:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a320c54202d9..f6c2f44ab221 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
--
2.31.1
Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.
ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock bh. This might not be the first sb on the file system if
'sb=' mount option is used.
In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
- the primary superblock might not be the first superblock and so it
might not be the one used by userspace tools if read directly
off the disk.
- because the primary superblock might not be the first superblock we
potentialy have to update it as part of backup superblock update.
However the first sb location is a bit more complicated than the rest
so we have to account for that.
Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.
Signed-off-by: Lukas Czerner <[email protected]>
---
V2: Fix typo. Place constant in BUILD_BUG_ON comparison on the right side
fs/ext4/ext4.h | 6 +-
fs/ext4/ioctl.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++
fs/ext4/super.c | 4 +-
3 files changed, 176 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3825195539d7..0856afb629e3 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1297,6 +1297,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
+#define EXT4_LABEL_MAX 16
+
/*
* Structure of the super block
*/
@@ -1346,7 +1348,7 @@ struct ext4_super_block {
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
+/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
@@ -3109,6 +3111,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
+extern __le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 606dee9e08a3..1199090e0dbb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -850,6 +850,166 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
return err;
}
+static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
+{
+ char label[EXT4_LABEL_MAX + 1];
+
+ /*
+ * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
+ * FSLABEL_MAX must include terminating null byte, while s_volume_name
+ * does not have to.
+ */
+ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
+
+ memset(label, 0, sizeof(label));
+ lock_buffer(sbi->s_sbh);
+ strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ unlock_buffer(sbi->s_sbh);
+
+ if (copy_to_user(user_label, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
+{
+ size_t len;
+ handle_t *handle;
+ ext4_group_t ngroups;
+ ext4_fsblk_t sb_block;
+ struct buffer_head *bh;
+ int ret = 0, ret2, grp;
+ unsigned long offset = 0;
+ char new_label[EXT4_LABEL_MAX + 1];
+ struct super_block *sb = file_inode(filp)->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
+
+ /* Sanity check, this should never happen */
+ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ /*
+ * Copy the maximum length allowed for ext4 label with one more to
+ * find the required terminating null byte in order to test the
+ * label length. The on disk label doesn't need to be null terminated.
+ */
+ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
+ return -EFAULT;
+
+ len = strnlen(new_label, EXT4_LABEL_MAX + 1);
+ if (len > EXT4_LABEL_MAX)
+ return -EINVAL;
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, EXT4_MAX_TRANS_DATA);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto err_out;
+ }
+ /* Update the primary superblock first */
+ ret = ext4_journal_get_write_access(handle, sb,
+ sbi->s_sbh,
+ EXT4_JTR_NONE);
+ if (ret)
+ goto err_journal;
+
+ lock_buffer(sbi->s_sbh);
+ memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
+ memcpy(es->s_volume_name, new_label, len);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(sbi->s_sbh);
+
+ ret = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
+ if (ret)
+ goto err_journal;
+ sync_dirty_buffer(sbi->s_sbh);
+
+ /* Update backup superblocks */
+ ngroups = ext4_get_groups_count(sb);
+ for (grp = 0; grp < ngroups; grp++) {
+
+ if (!ext4_bg_has_super(sb, grp))
+ continue;
+
+ /*
+ * For the group 0 there is always 1k padding, so we have
+ * either adjust offset, or sb_block depending on blocksize
+ */
+ if (grp == 0) {
+ sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(sb_block, sb->s_blocksize);
+ } else {
+ sb_block = ext4_group_first_block_no(sb, grp);
+ offset = 0;
+ }
+
+ /*
+ * Skip primary superblock, it's already done. Note that the
+ * primary superblock is not always at group 0
+ */
+ if (sbi->s_sbh->b_blocknr == sb_block)
+ continue;
+
+ ret = ext4_journal_ensure_credits_fn(handle, 1,
+ EXT4_MAX_TRANS_DATA,
+ 0, 0);
+ if (ret < 0)
+ break;
+
+ bh = ext4_sb_bread(sb, sb_block, 0);
+ if (IS_ERR(bh)) {
+ ret = PTR_ERR(bh);
+ break;
+ }
+
+ ext4_debug("update backup superblock %llu\n", sb_block);
+ BUFFER_TRACE(bh, "get_write_access");
+ ret = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (ret) {
+ brelse(bh);
+ break;
+ }
+
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ lock_buffer(bh);
+ if (ext4_has_metadata_csum(sb) &&
+ es->s_checksum != ext4_superblock_csum(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
+ "superblock %llu\n", sb_block);
+ unlock_buffer(bh);
+ brelse(bh);
+ ret = -EFSBADCRC;
+ break;
+ }
+ memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
+ memcpy(es->s_volume_name, new_label, len);
+ if (ext4_has_metadata_csum(sb))
+ es->s_checksum = ext4_superblock_csum(sb, es);
+ unlock_buffer(bh);
+
+ ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (unlikely(ret))
+ ext4_std_error(sb, ret);
+ brelse(bh);
+ }
+
+err_journal:
+ ret2 = ext4_journal_stop(handle);
+ if (ret2 && !ret)
+ ret = ret2;
+err_out:
+ mnt_drop_write_file(filp);
+ ext4_std_error(sb, ret);
+ return ret;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1266,6 +1426,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CHECKPOINT:
return ext4_ioctl_checkpoint(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
+
+ case FS_IOC_SETFSLABEL:
+ return ext4_ioctl_setlabel(filp,
+ (const void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1347,6 +1514,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_CHECKPOINT:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a320c54202d9..f6c2f44ab221 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
--
2.31.1
On Mon, Nov 29, 2021 at 01:28:09PM -0700, Andreas Dilger wrote:
> On Nov 12, 2021, at 1:20 AM, Lukas Czerner <[email protected]> wrote:
> >
> > Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
> > online reading and setting of file system label.
> >
> > ext4_ioctl_getlabel() is simple, just get the label from the primary
> > superblock bh. This might not be the first sb on the file system if
> > 'sb=' mount option is used.
> >
> > In ext4_ioctl_setlabel() we update what ext4 currently views as a
> > primary superblock and then proceed to update backup superblocks. There
> > are two caveats:
> > - the primary superblock might not be the first superblock and so it
> > might not be the one used by userspace tools if read directly
> > off the disk.
> > - because the primary superblock might not be the first superblock we
> > potentialy have to update it as part of backup superblock update.
> > However the first sb location is a bit more complicated than the rest
> > so we have to account for that.
> >
> > Tested with generic/492 with various configurations. I also checked the
> > behavior with 'sb=' mount options, including very large file systems
> > with and without sparse_super/sparse_super2.
> >
> > Signed-off-by: Lukas Czerner <[email protected]>
> > ---
>
> One minor issue/question inline.
>
> > +static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
> > +{
> > + size_t len;
> > + handle_t *handle;
> > + ext4_group_t ngroups;
> > + ext4_fsblk_t sb_block;
> > + struct buffer_head *bh;
> > + int ret = 0, ret2, grp;
> > + unsigned long offset = 0;
> > + char new_label[EXT4_LABEL_MAX + 1];
> > + struct super_block *sb = file_inode(filp)->i_sb;
> > + struct ext4_sb_info *sbi = EXT4_SB(sb);
> > + struct ext4_super_block *es = sbi->s_es;
> > +
> > + /* Sanity check, this should never happen */
> > + BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
> > +
> > + if (!capable(CAP_SYS_ADMIN))
> > + return -EPERM;
> > + /*
> > + * Copy the maximum length allowed for ext4 label with one more to
> > + * find the required terminating null byte in order to test the
> > + * label length. The on disk label doesn't need to be null terminated.
> > + */
> > + if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
> > + return -EFAULT;
> > +
> > + len = strnlen(new_label, EXT4_LABEL_MAX + 1);
> > + if (len > EXT4_LABEL_MAX)
> > + return -EINVAL;
> > +
> > + ret = mnt_want_write_file(filp);
> > + if (ret)
> > + return ret;
> > +
> > + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, EXT4_MAX_TRANS_DATA);
> > + if (IS_ERR(handle)) {
> > + ret = PTR_ERR(handle);
> > + goto err_out;
> > + }
> > + /* Update the primary superblock first */
> > + ret = ext4_journal_get_write_access(handle, sb,
> > + sbi->s_sbh,
> > + EXT4_JTR_NONE);
> > + if (ret)
> > + goto err_journal;
> > +
> > + lock_buffer(sbi->s_sbh);
> > + memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
> > + memcpy(es->s_volume_name, new_label, len);
>
> (minor) this introduces a very small window where s_volume_name is unset.
> Since "new_label" is already a temporary buffer of the correct size, it
> would be better IMHO to zero it out, copy the new label from userspace
> into it, and then copy EXT4_LABEL_MAX bytes of new_label to s_volume_name.
>
> It still isn't perfect, but reduces the window significantly.
Very good point, I'll fix that in the next version.
Thanks!
-Lukas
>
> > + /* Update backup superblocks */
> > + ngroups = ext4_get_groups_count(sb);
> > + for (grp = 0; grp < ngroups; grp++) {
>
> :
> :
>
> > + ext4_debug("update backup superblock %llu\n", sb_block);
> > + BUFFER_TRACE(bh, "get_write_access");
> > + ret = ext4_journal_get_write_access(handle, sb,
> > + bh,
> > + EXT4_JTR_NONE);
> > + if (ret) {
> > + brelse(bh);
> > + break;
> > + }
> > +
> > + es = (struct ext4_super_block *) (bh->b_data + offset);
> > + lock_buffer(bh);
> > + if (ext4_has_metadata_csum(sb) &&
> > + es->s_checksum != ext4_superblock_csum(sb, es)) {
> > + ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
> > + "superblock %llu\n", sb_block);
> > + unlock_buffer(bh);
> > + brelse(bh);
> > + ret = -EFSBADCRC;
> > + break;
> > + }
> > + memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
> > + memcpy(es->s_volume_name, new_label, len);
>
> Same here.
>
> The rest looks fine.
>
> Cheers, Andreas
>
>
>
>
>
On Nov 12, 2021, at 1:20 AM, Lukas Czerner <[email protected]> wrote:
>
> Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
> online reading and setting of file system label.
>
> ext4_ioctl_getlabel() is simple, just get the label from the primary
> superblock bh. This might not be the first sb on the file system if
> 'sb=' mount option is used.
>
> In ext4_ioctl_setlabel() we update what ext4 currently views as a
> primary superblock and then proceed to update backup superblocks. There
> are two caveats:
> - the primary superblock might not be the first superblock and so it
> might not be the one used by userspace tools if read directly
> off the disk.
> - because the primary superblock might not be the first superblock we
> potentialy have to update it as part of backup superblock update.
> However the first sb location is a bit more complicated than the rest
> so we have to account for that.
>
> Tested with generic/492 with various configurations. I also checked the
> behavior with 'sb=' mount options, including very large file systems
> with and without sparse_super/sparse_super2.
>
> Signed-off-by: Lukas Czerner <[email protected]>
> ---
One minor issue/question inline.
> +static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
> +{
> + size_t len;
> + handle_t *handle;
> + ext4_group_t ngroups;
> + ext4_fsblk_t sb_block;
> + struct buffer_head *bh;
> + int ret = 0, ret2, grp;
> + unsigned long offset = 0;
> + char new_label[EXT4_LABEL_MAX + 1];
> + struct super_block *sb = file_inode(filp)->i_sb;
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> + struct ext4_super_block *es = sbi->s_es;
> +
> + /* Sanity check, this should never happen */
> + BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> + /*
> + * Copy the maximum length allowed for ext4 label with one more to
> + * find the required terminating null byte in order to test the
> + * label length. The on disk label doesn't need to be null terminated.
> + */
> + if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
> + return -EFAULT;
> +
> + len = strnlen(new_label, EXT4_LABEL_MAX + 1);
> + if (len > EXT4_LABEL_MAX)
> + return -EINVAL;
> +
> + ret = mnt_want_write_file(filp);
> + if (ret)
> + return ret;
> +
> + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, EXT4_MAX_TRANS_DATA);
> + if (IS_ERR(handle)) {
> + ret = PTR_ERR(handle);
> + goto err_out;
> + }
> + /* Update the primary superblock first */
> + ret = ext4_journal_get_write_access(handle, sb,
> + sbi->s_sbh,
> + EXT4_JTR_NONE);
> + if (ret)
> + goto err_journal;
> +
> + lock_buffer(sbi->s_sbh);
> + memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
> + memcpy(es->s_volume_name, new_label, len);
(minor) this introduces a very small window where s_volume_name is unset.
Since "new_label" is already a temporary buffer of the correct size, it
would be better IMHO to zero it out, copy the new label from userspace
into it, and then copy EXT4_LABEL_MAX bytes of new_label to s_volume_name.
It still isn't perfect, but reduces the window significantly.
> + /* Update backup superblocks */
> + ngroups = ext4_get_groups_count(sb);
> + for (grp = 0; grp < ngroups; grp++) {
:
:
> + ext4_debug("update backup superblock %llu\n", sb_block);
> + BUFFER_TRACE(bh, "get_write_access");
> + ret = ext4_journal_get_write_access(handle, sb,
> + bh,
> + EXT4_JTR_NONE);
> + if (ret) {
> + brelse(bh);
> + break;
> + }
> +
> + es = (struct ext4_super_block *) (bh->b_data + offset);
> + lock_buffer(bh);
> + if (ext4_has_metadata_csum(sb) &&
> + es->s_checksum != ext4_superblock_csum(sb, es)) {
> + ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
> + "superblock %llu\n", sb_block);
> + unlock_buffer(bh);
> + brelse(bh);
> + ret = -EFSBADCRC;
> + break;
> + }
> + memset(es->s_volume_name, 0, sizeof(es->s_volume_name));
> + memcpy(es->s_volume_name, new_label, len);
Same here.
The rest looks fine.
Cheers, Andreas
On Fri, Nov 12, 2021 at 09:20:19AM +0100, Lukas Czerner wrote:
> + /* Update backup superblocks */
> + ngroups = ext4_get_groups_count(sb);
> + for (grp = 0; grp < ngroups; grp++) {
> +
...
> + ret = ext4_journal_ensure_credits_fn(handle, 1,
> + EXT4_MAX_TRANS_DATA,
> + 0, 0);
> + if (ret < 0)
> + break;
This doesn't look right. This will try to make sure there is at least
one credit left on the handle, and if there isn't it will attempt to
add EXT4_MAX_TRANS_DATA to the handle --- and if there isn't enough
room remaining in the journal to add that number of credits, no
credits will be added, and ext4_journal_ensure_credits_fn() will
return a positive integer (in our current implementation it will
always return 1).
So once run out of credits, and there is no more room in the journal,
we we will proceed, and when we try to modify the backup superblock, a
WARN_ON will be triggered and ext4_handle_dirty_metadata() will
trigger an ext4_error(), which would be unfortunate.
I'd also point out that for very large file systems, I'm not convinced
that we need to atomically update all of the backup superblocks at the
same time. Sure, probably makes sense to update the primary, and
superblocks for block groups 0 and 1 atomically (or s_backup_bgs[0,1]
a sparse_super2 file system) using the journal.
But after that? I'd suggest not running the updates for the rest
through the journal at all, and just write them out directly. Nothing
else will try to read or write the backup superblock blocks, so
there's no reason why we have to be super careful writing out the
rest. If we crash after we've only updated the first 20 backup
superblocks --- that's probably 18 more than a user will actually use
in the first place.
That allows us to simply reserve 3 credits, and we won't need to try
to extend the handle, which means we don't have to implement some kind
of fallback logic in case the handle extension fails.
One other comment. Eventually (and not so in the distant future)
we're going to want to use the same superblock updating logic to
handle changing the UUID, and possibly, for other tune2fs operations.
The reason for this is that there are some people who are trying to
update the UUID and resize the file system to fit the size of the
cloud block device (e.g., either an Amazon EBS or GCE's PD) in
separate systemd unit scripts. This results in race conditions that
can cause either the tune2fs or resize2fs to fail --- rarely, but if
you are starting up thousands and thousands of VM's per day, even the
rare becomes common place. This is the reason of e2fsprogs commit
6338a8467564 ("libext2fs: retry reading superblock on open when
checksum is bad") but that turns out not to be enough; although it
does reduce the incidence rate by another order of magnitude or two.
So.... we should probably have a mutex which prevents two ioctls
which is modifying the superblock from running at the same time. It's
*probably* going to be OK for now, since the second ioctl racing to
update the superblock will update the checksum, and so long as we have
journalling enabled, we shouldn't have a bad checksum end up on disk.
But we're going to want to add an ioctl to fetch the superblock, and
at that point we'll definitely need the mutex to protect the
superblock getter from getting an inconsistent view of the superblock.
The other thing that might be nice would be if the superblock update
function was abstracted out, and the FS_IOC_SETLABEL ioctl provided a
callback function which updates the label.
Neither of these two suggestions are strictly necessary for your patch
series (although the mutex will prevent problems with racing
FS_IOC_SETLABEL and FS_IOC_GETLABEL ioctls), so if you don't want to
make these changes now, I'm not going to insist on them; we can
always make these improvements when we implement FS_IOC_SETUUID,
FS_IOC_GETUUID, and EXT4_IOC_GET_SB. (BTW, I believe Darrick has
patches to implement FS_IOC_[SG]ETUUID for xfs and possibly some other
file systems, IIRC, but those have never been landed in Linus's tree.)
And finally, thanks for working on FS_IOC_SETLABEL! It has been on my
todo list for a long time, but it's never managed to make the top of
the priority queue...
Cheers,
= Ted
On Mon, Nov 29, 2021 at 10:00:08PM -0500, Theodore Y. Ts'o wrote:
> On Fri, Nov 12, 2021 at 09:20:19AM +0100, Lukas Czerner wrote:
> > + /* Update backup superblocks */
> > + ngroups = ext4_get_groups_count(sb);
> > + for (grp = 0; grp < ngroups; grp++) {
> > +
> ...
> > + ret = ext4_journal_ensure_credits_fn(handle, 1,
> > + EXT4_MAX_TRANS_DATA,
> > + 0, 0);
> > + if (ret < 0)
> > + break;
>
> This doesn't look right. This will try to make sure there is at least
> one credit left on the handle, and if there isn't it will attempt to
> add EXT4_MAX_TRANS_DATA to the handle --- and if there isn't enough
> room remaining in the journal to add that number of credits, no
> credits will be added, and ext4_journal_ensure_credits_fn() will
> return a positive integer (in our current implementation it will
> always return 1).
Oops, I was sure I've seen this somewhere in the code, but I guess I was
wrong. Should have checked what it actually returns. Thanks for pointing
this out.
>
> So once run out of credits, and there is no more room in the journal,
> we we will proceed, and when we try to modify the backup superblock, a
> WARN_ON will be triggered and ext4_handle_dirty_metadata() will
> trigger an ext4_error(), which would be unfortunate.
>
> I'd also point out that for very large file systems, I'm not convinced
> that we need to atomically update all of the backup superblocks at the
> same time. Sure, probably makes sense to update the primary, and
> superblocks for block groups 0 and 1 atomically (or s_backup_bgs[0,1]
> a sparse_super2 file system) using the journal.
>
> But after that? I'd suggest not running the updates for the rest
> through the journal at all, and just write them out directly. Nothing
> else will try to read or write the backup superblock blocks, so
> there's no reason why we have to be super careful writing out the
> rest. If we crash after we've only updated the first 20 backup
> superblocks --- that's probably 18 more than a user will actually use
> in the first place.
>
> That allows us to simply reserve 3 credits, and we won't need to try
> to extend the handle, which means we don't have to implement some kind
> of fallback logic in case the handle extension fails.
I think I agree. But in this case should we at least attempt to check
and update the backup superblocks in fsck? Not sure if we do that
already.
>
>
> One other comment. Eventually (and not so in the distant future)
> we're going to want to use the same superblock updating logic to
> handle changing the UUID, and possibly, for other tune2fs operations.
> The reason for this is that there are some people who are trying to
> update the UUID and resize the file system to fit the size of the
> cloud block device (e.g., either an Amazon EBS or GCE's PD) in
> separate systemd unit scripts. This results in race conditions that
> can cause either the tune2fs or resize2fs to fail --- rarely, but if
> you are starting up thousands and thousands of VM's per day, even the
> rare becomes common place. This is the reason of e2fsprogs commit
> 6338a8467564 ("libext2fs: retry reading superblock on open when
> checksum is bad") but that turns out not to be enough; although it
> does reduce the incidence rate by another order of magnitude or two.
>
> So.... we should probably have a mutex which prevents two ioctls
> which is modifying the superblock from running at the same time. It's
> *probably* going to be OK for now, since the second ioctl racing to
> update the superblock will update the checksum, and so long as we have
> journalling enabled, we shouldn't have a bad checksum end up on disk.
> But we're going to want to add an ioctl to fetch the superblock, and
> at that point we'll definitely need the mutex to protect the
> superblock getter from getting an inconsistent view of the superblock.
>
> The other thing that might be nice would be if the superblock update
> function was abstracted out, and the FS_IOC_SETLABEL ioctl provided a
> callback function which updates the label.
>
> Neither of these two suggestions are strictly necessary for your patch
> series (although the mutex will prevent problems with racing
> FS_IOC_SETLABEL and FS_IOC_GETLABEL ioctls), so if you don't want to
> make these changes now, I'm not going to insist on them; we can
> always make these improvements when we implement FS_IOC_SETUUID,
> FS_IOC_GETUUID, and EXT4_IOC_GET_SB. (BTW, I believe Darrick has
> patches to implement FS_IOC_[SG]ETUUID for xfs and possibly some other
> file systems, IIRC, but those have never been landed in Linus's tree.)
It's not a critical functionality so it can wait. I'll think about
implementing the superblock modification system. Thanks for the useful
pointers.
>
> And finally, thanks for working on FS_IOC_SETLABEL! It has been on my
> todo list for a long time, but it's never managed to make the top of
> the priority queue...
No problem, I am happy to help.
-Lukas
>
> Cheers,
>
> = Ted
>
On Tue, Nov 30, 2021 at 10:49:50AM +0100, Lukas Czerner wrote:
> > But after that? I'd suggest not running the updates for the rest
> > through the journal at all, and just write them out directly. Nothing
> > else will try to read or write the backup superblock blocks, so
> > there's no reason why we have to be super careful writing out the
> > rest. If we crash after we've only updated the first 20 backup
> > superblocks --- that's probably 18 more than a user will actually use
> > in the first place.
> >
> > That allows us to simply reserve 3 credits, and we won't need to try
> > to extend the handle, which means we don't have to implement some kind
> > of fallback logic in case the handle extension fails.
>
> I think I agree. But in this case should we at least attempt to check
> and update the backup superblocks in fsck? Not sure if we do that
> already.
Well, after a successful file system check by fsck, we'll update all
of the backup superblocks. If we've done a full file system check we
know that the primary superblock is consistent with the rest of the
file system, so at that point it's safe to write it to all of the
backup superblocks in the file system.
But if we haven't done the full file system check, we won't know
whether it is the primary or the backup superblock which is incorrect.
I guess we could do the basic superblock checks, and if there are at
least two additional superblocks, we see if we have do a 2 out of 3
voting check. Or if there are differences between the primary and the
backup we could force a full check.
I think in practice though, so long as the primary and two backup
superblocks are part of the jbd2 transaction, that should be good
enough in terms of recovery since usually most users only use the
first backup superblock to recover if the primary is damaged. Whether
we update the rest of the backup superblocks improves things, but it's
not really going to make a difference 99.99% of the time.
- Ted
Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.
ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock bh. This might not be the first sb on the file system if
'sb=' mount option is used.
In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
- the primary superblock might not be the first superblock and so it
might not be the one used by userspace tools if read directly
off the disk.
- because the primary superblock might not be the first superblock we
potentialy have to update it as part of backup superblock update.
However the first sb location is a bit more complicated than the rest
so we have to account for that.
The superblock modification is created generic enough so the infrastructure
can be used for other potential superblock modification operations
operations, such as chaning UUID.
Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.
Signed-off-by: Lukas Czerner <[email protected]>
---
V2: Fix typo. Place constant in BUILD_BUG_ON comparison on the right side
V3: Setlabel completely reworked
fs/ext4/ext4.h | 9 +-
fs/ext4/ioctl.c | 292 ++++++++++++++++++++++++++++++++++++
fs/ext4/resize.c | 19 ++-
fs/ext4/super.c | 4 +-
include/trace/events/ext4.h | 22 +++
5 files changed, 339 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 404dd50856e5..f355deb619a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1298,6 +1298,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
+#define EXT4_LABEL_MAX 16
+
/*
* Structure of the super block
*/
@@ -1347,7 +1349,7 @@ struct ext4_super_block {
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
+/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
@@ -3096,6 +3098,9 @@ extern int ext4_group_extend(struct super_block *sb,
struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count);
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
+extern unsigned int ext4_list_backups(struct super_block *sb,
+ unsigned int *three, unsigned int *five,
+ unsigned int *seven);
/* super.c */
extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
@@ -3110,6 +3115,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
+extern __le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 606dee9e08a3..285862288ecb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -27,6 +27,231 @@
#include "fsmap.h"
#include <trace/events/ext4.h>
+typedef void ext4_modify_sb_callback(struct ext4_super_block *es,
+ const void *arg);
+
+/*
+ * Superblock modification callback function for changing file system
+ * label
+ */
+static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
+{
+ /* Sanity check, this should never happen */
+ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
+
+ memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
+}
+
+int ext4_modify_primary_sb(struct super_block *sb, handle_t *handle,
+ ext4_modify_sb_callback func,
+ const void *arg)
+{
+ int err = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh = sbi->s_sbh;
+ struct ext4_super_block *es = sbi->s_es;
+
+ trace_ext4_modify_sb(sb, bh->b_blocknr, 1);
+
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_err;
+
+ lock_buffer(bh);
+ func(es, arg);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(bh);
+
+ if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) {
+ ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ }
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_err;
+ err = sync_dirty_buffer(bh);
+out_err:
+ ext4_std_error(sb, err);
+ return err;
+}
+
+/*
+ * Update one backup superblcok in the group 'grp' using the primary
+ * superblock data. If the handle is NULL the modification is not
+ * journalled.
+ *
+ * Returns: 0 when no modification was done (no superblock in the group)
+ * 1 when the modification was successful
+ * <0 on error
+ */
+static int ext4_update_backup_sb(struct super_block *sb, handle_t *handle,
+ ext4_group_t grp)
+{
+ int err = 0;
+ ext4_fsblk_t sb_block;
+ struct buffer_head *bh;
+ unsigned long offset = 0;
+
+ if (!ext4_bg_has_super(sb, grp))
+ return 0;
+
+ /*
+ * For the group 0 there is always 1k padding, so we have
+ * either adjust offset, or sb_block depending on blocksize
+ */
+ if (grp == 0) {
+ sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(sb_block, sb->s_blocksize);
+ } else {
+ sb_block = ext4_group_first_block_no(sb, grp);
+ offset = 0;
+ }
+
+ trace_ext4_modify_sb(sb, sb_block, handle ? 1 : 0);
+
+ bh = sb_getblk(sb, sb_block);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ if (handle) {
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_bh;
+ }
+
+ lock_buffer(bh);
+ memcpy(bh->b_data + offset, EXT4_SB(sb)->s_es,
+ sizeof(struct ext4_super_block));
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if (err)
+ goto out_bh;
+
+ if (handle) {
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_bh;
+ } else {
+ BUFFER_TRACE(bh, "marking dirty");
+ mark_buffer_dirty(bh);
+ }
+ err = sync_dirty_buffer(bh);
+
+out_bh:
+ brelse(bh);
+ ext4_std_error(sb, err);
+ return (err) ? err : 1;
+}
+
+/*
+ * Modify primary and backup superblocks using the provided function
+ * func and argument arg.
+ *
+ * Only the primary superblock and at most two backup superblock
+ * modifications are journalled; the rest is modified without journal.
+ * This is safe because e2fsck will re-write them if there is a problem,
+ * and we're very unlikely to ever need more than two backups.
+ */
+int ext4_modify_superblocks_fn(struct super_block *sb,
+ ext4_modify_sb_callback func,
+ const void *arg)
+{
+ handle_t *handle;
+ ext4_group_t ngroups;
+ unsigned int three = 1;
+ unsigned int five = 5;
+ unsigned int seven = 7;
+ int err = 0, ret, i;
+ ext4_group_t grp, primary_grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /*
+ * We can't modify superblocks while the online resize is running
+ */
+ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
+ &sbi->s_ext4_flags)) {
+ ext4_msg(sb, KERN_ERR, "Can't modify superblock while"
+ "performing online resize");
+ return -EBUSY;
+ }
+
+ /*
+ * We're only going to modify primary superblock and two
+ * backup superblocks in this transaction.
+ */
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out;
+ }
+
+ /* Modify primary superblock */
+ err = ext4_modify_primary_sb(sb, handle, func, arg);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "Failed to modify primary "
+ "superblock");
+ goto out_journal;
+ }
+
+ primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr);
+ ngroups = ext4_get_groups_count(sb);
+
+ /*
+ * Update backup superblocks. We have to start from group 0
+ * because it might not be where the primary superblock is
+ * if the fs is mounted with -o sb=<backup_sb_block>
+ */
+ i = 0;
+ grp = 0;
+ while (grp < ngroups) {
+ /* Skip primary superblock */
+ if (grp == primary_grp)
+ goto next_grp;
+
+ ret = ext4_update_backup_sb(sb, handle, grp);
+ if (ret < 0) {
+ err = ret;
+ goto out_journal;
+ }
+
+ i += ret;
+ if (handle && i > 1) {
+ /*
+ * We're only journalling primary superblock and
+ * two backup superblocks; the rest is not
+ * journalled.
+ */
+ err = ext4_journal_stop(handle);
+ if (err)
+ goto out;
+ handle = NULL;
+ }
+next_grp:
+ grp = ext4_list_backups(sb, &three, &five, &seven);
+ }
+
+out_journal:
+ if (handle) {
+ ret = ext4_journal_stop(handle);
+ if (ret && !err)
+ err = ret;
+ }
+out:
+ clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags);
+ smp_mb__after_atomic();
+ return err ? err : 0;
+}
+
/**
* Swap memory between @a and @b for @len bytes.
*
@@ -850,6 +1075,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
return err;
}
+static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
+{
+ size_t len;
+ int ret = 0;
+ char new_label[EXT4_LABEL_MAX + 1];
+ struct super_block *sb = file_inode(filp)->i_sb;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * Copy the maximum length allowed for ext4 label with one more to
+ * find the required terminating null byte in order to test the
+ * label length. The on disk label doesn't need to be null terminated.
+ */
+ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
+ return -EFAULT;
+
+ len = strnlen(new_label, EXT4_LABEL_MAX + 1);
+ if (len > EXT4_LABEL_MAX)
+ return -EINVAL;
+
+ /*
+ * Clear the buffer after the new label
+ */
+ memset(new_label + len, 0, EXT4_LABEL_MAX - len);
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = ext4_modify_superblocks_fn(sb, ext4_sb_setlabel, new_label);
+
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
+{
+ char label[EXT4_LABEL_MAX + 1];
+
+ /*
+ * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
+ * FSLABEL_MAX must include terminating null byte, while s_volume_name
+ * does not have to.
+ */
+ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
+
+ memset(label, 0, sizeof(label));
+ lock_buffer(sbi->s_sbh);
+ strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ unlock_buffer(sbi->s_sbh);
+
+ if (copy_to_user(user_label, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1266,6 +1549,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CHECKPOINT:
return ext4_ioctl_checkpoint(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
+
+ case FS_IOC_SETFSLABEL:
+ return ext4_ioctl_setlabel(filp,
+ (const void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1347,6 +1637,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_CHECKPOINT:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b63cb88ccdae..ee8f02f406cb 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -717,12 +717,23 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
* sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
* For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
*/
-static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
- unsigned *five, unsigned *seven)
+unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
+ unsigned int *five, unsigned int *seven)
{
- unsigned *min = three;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ unsigned int *min = three;
int mult = 3;
- unsigned ret;
+ unsigned int ret;
+
+ if (ext4_has_feature_sparse_super2(sb)) {
+ do {
+ if (*min > 2)
+ return UINT_MAX;
+ ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
+ *min += 1;
+ } while (!ret);
+ return ret;
+ }
if (!ext4_has_feature_sparse_super(sb)) {
ret = *min;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e33b5eca694..d79182793675 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 0ea36b2b0662..17d5a8d0fbfd 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2837,6 +2837,28 @@ TRACE_EVENT(ext4_fc_track_range,
__entry->end)
);
+TRACE_EVENT(ext4_modify_sb,
+ TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, unsigned flags),
+
+ TP_ARGS(sb, fsblk, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ext4_fsblk_t, fsblk)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->fsblk = fsblk;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev %d,%d fsblk %llu flags %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fsblk, __entry->flags)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
--
2.31.1
Hi Ted,
in the previous version of the patch you asked that the superblock
modification is generic enough so that it can be used for future
features (like changing UUID for example). Does it look more or less
like what you had in mind ?
You also mentioned the superblock locking so that we can do multiple
modifications operations safely, including some future GET_SUPER ioctl.
This is not part of the patch, but I have some ideas about more changes.
There are couple of places in ext4 where we change superblock using
journal; set features, generate s_encrypt_pw_salt, modify s_last_orphan,
s_last_mounted and there is also ext4_update_super() in
flush_stashed_error_work(). Also all the wild things done in resize.c.
I think we should consolidate all or most of those under a single helper
and I was thiking about how to go about it cleanly. We could play games
with modifying s_es directly, which just points into s_sbh. And rely on
the fact that it's read only once so we maybe should be able to modify
it and then do the journal dance afterwards? I don't know if that's even
allowed, but it sounds weird to me.
Other option would be to have an exact, but in-memory representation of
on-disk superblock, make any necessary modifications on that structure,
have a lock protecting it and then creating a generic commit_superblock
function (journalled and non-journalled) to translate fields to s_es
(perhaps reuse ext4_update_super) and commit it to disk.
One disadvantage might be that on-disk modification from userspace on
mounted file systems will not work anymore, since it will always be
overwriten by the in-kernel in-memory representation.
What do you think ?
Btw, I'd rather not wait with get/set label until after all of that if
possible, so that I can get it off my mind for now.
Thanks!
-Lukas
On Fri, Dec 10, 2021 at 04:16:24PM +0100, Lukas Czerner wrote:
> Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
> online reading and setting of file system label.
>
> ext4_ioctl_getlabel() is simple, just get the label from the primary
> superblock bh. This might not be the first sb on the file system if
> 'sb=' mount option is used.
>
> In ext4_ioctl_setlabel() we update what ext4 currently views as a
> primary superblock and then proceed to update backup superblocks. There
> are two caveats:
> - the primary superblock might not be the first superblock and so it
> might not be the one used by userspace tools if read directly
> off the disk.
> - because the primary superblock might not be the first superblock we
> potentialy have to update it as part of backup superblock update.
> However the first sb location is a bit more complicated than the rest
> so we have to account for that.
>
> The superblock modification is created generic enough so the infrastructure
> can be used for other potential superblock modification operations
> operations, such as chaning UUID.
>
> Tested with generic/492 with various configurations. I also checked the
> behavior with 'sb=' mount options, including very large file systems
> with and without sparse_super/sparse_super2.
>
> Signed-off-by: Lukas Czerner <[email protected]>
> ---
> V2: Fix typo. Place constant in BUILD_BUG_ON comparison on the right side
> V3: Setlabel completely reworked
>
> fs/ext4/ext4.h | 9 +-
> fs/ext4/ioctl.c | 292 ++++++++++++++++++++++++++++++++++++
> fs/ext4/resize.c | 19 ++-
> fs/ext4/super.c | 4 +-
> include/trace/events/ext4.h | 22 +++
> 5 files changed, 339 insertions(+), 7 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 404dd50856e5..f355deb619a2 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1298,6 +1298,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
> /* Metadata checksum algorithm codes */
> #define EXT4_CRC32C_CHKSUM 1
>
> +#define EXT4_LABEL_MAX 16
> +
> /*
> * Structure of the super block
> */
> @@ -1347,7 +1349,7 @@ struct ext4_super_block {
> /*60*/ __le32 s_feature_incompat; /* incompatible feature set */
> __le32 s_feature_ro_compat; /* readonly-compatible feature set */
> /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
> -/*78*/ char s_volume_name[16]; /* volume name */
> +/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
> /*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
> /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
> /*
> @@ -3096,6 +3098,9 @@ extern int ext4_group_extend(struct super_block *sb,
> struct ext4_super_block *es,
> ext4_fsblk_t n_blocks_count);
> extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
> +extern unsigned int ext4_list_backups(struct super_block *sb,
> + unsigned int *three, unsigned int *five,
> + unsigned int *seven);
>
> /* super.c */
> extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
> @@ -3110,6 +3115,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
> extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
> extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
> extern int ext4_calculate_overhead(struct super_block *sb);
> +extern __le32 ext4_superblock_csum(struct super_block *sb,
> + struct ext4_super_block *es);
> extern void ext4_superblock_csum_set(struct super_block *sb);
> extern int ext4_alloc_flex_bg_array(struct super_block *sb,
> ext4_group_t ngroup);
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 606dee9e08a3..285862288ecb 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -27,6 +27,231 @@
> #include "fsmap.h"
> #include <trace/events/ext4.h>
>
> +typedef void ext4_modify_sb_callback(struct ext4_super_block *es,
> + const void *arg);
> +
> +/*
> + * Superblock modification callback function for changing file system
> + * label
> + */
> +static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
> +{
> + /* Sanity check, this should never happen */
> + BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
> +
> + memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
> +}
> +
> +int ext4_modify_primary_sb(struct super_block *sb, handle_t *handle,
> + ext4_modify_sb_callback func,
> + const void *arg)
> +{
> + int err = 0;
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> + struct buffer_head *bh = sbi->s_sbh;
> + struct ext4_super_block *es = sbi->s_es;
> +
> + trace_ext4_modify_sb(sb, bh->b_blocknr, 1);
> +
> + BUFFER_TRACE(bh, "get_write_access");
> + err = ext4_journal_get_write_access(handle, sb,
> + bh,
> + EXT4_JTR_NONE);
> + if (err)
> + goto out_err;
> +
> + lock_buffer(bh);
> + func(es, arg);
> + ext4_superblock_csum_set(sb);
> + unlock_buffer(bh);
> +
> + if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) {
> + ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
> + "superblock detected");
> + clear_buffer_write_io_error(bh);
> + set_buffer_uptodate(bh);
> + }
> +
> + err = ext4_handle_dirty_metadata(handle, NULL, bh);
> + if (err)
> + goto out_err;
> + err = sync_dirty_buffer(bh);
> +out_err:
> + ext4_std_error(sb, err);
> + return err;
> +}
> +
> +/*
> + * Update one backup superblcok in the group 'grp' using the primary
> + * superblock data. If the handle is NULL the modification is not
> + * journalled.
> + *
> + * Returns: 0 when no modification was done (no superblock in the group)
> + * 1 when the modification was successful
> + * <0 on error
> + */
> +static int ext4_update_backup_sb(struct super_block *sb, handle_t *handle,
> + ext4_group_t grp)
> +{
> + int err = 0;
> + ext4_fsblk_t sb_block;
> + struct buffer_head *bh;
> + unsigned long offset = 0;
> +
> + if (!ext4_bg_has_super(sb, grp))
> + return 0;
> +
> + /*
> + * For the group 0 there is always 1k padding, so we have
> + * either adjust offset, or sb_block depending on blocksize
> + */
> + if (grp == 0) {
> + sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
> + offset = do_div(sb_block, sb->s_blocksize);
> + } else {
> + sb_block = ext4_group_first_block_no(sb, grp);
> + offset = 0;
> + }
> +
> + trace_ext4_modify_sb(sb, sb_block, handle ? 1 : 0);
> +
> + bh = sb_getblk(sb, sb_block);
> + if (IS_ERR(bh))
> + return PTR_ERR(bh);
> +
> + if (handle) {
> + BUFFER_TRACE(bh, "get_write_access");
> + err = ext4_journal_get_write_access(handle, sb,
> + bh,
> + EXT4_JTR_NONE);
> + if (err)
> + goto out_bh;
> + }
> +
> + lock_buffer(bh);
> + memcpy(bh->b_data + offset, EXT4_SB(sb)->s_es,
> + sizeof(struct ext4_super_block));
> + set_buffer_uptodate(bh);
> + unlock_buffer(bh);
> +
> + if (err)
> + goto out_bh;
> +
> + if (handle) {
> + err = ext4_handle_dirty_metadata(handle, NULL, bh);
> + if (err)
> + goto out_bh;
> + } else {
> + BUFFER_TRACE(bh, "marking dirty");
> + mark_buffer_dirty(bh);
> + }
> + err = sync_dirty_buffer(bh);
> +
> +out_bh:
> + brelse(bh);
> + ext4_std_error(sb, err);
> + return (err) ? err : 1;
> +}
> +
> +/*
> + * Modify primary and backup superblocks using the provided function
> + * func and argument arg.
> + *
> + * Only the primary superblock and at most two backup superblock
> + * modifications are journalled; the rest is modified without journal.
> + * This is safe because e2fsck will re-write them if there is a problem,
> + * and we're very unlikely to ever need more than two backups.
> + */
> +int ext4_modify_superblocks_fn(struct super_block *sb,
> + ext4_modify_sb_callback func,
> + const void *arg)
> +{
> + handle_t *handle;
> + ext4_group_t ngroups;
> + unsigned int three = 1;
> + unsigned int five = 5;
> + unsigned int seven = 7;
> + int err = 0, ret, i;
> + ext4_group_t grp, primary_grp;
> + struct ext4_sb_info *sbi = EXT4_SB(sb);
> +
> + /*
> + * We can't modify superblocks while the online resize is running
> + */
> + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
> + &sbi->s_ext4_flags)) {
> + ext4_msg(sb, KERN_ERR, "Can't modify superblock while"
> + "performing online resize");
> + return -EBUSY;
> + }
> +
> + /*
> + * We're only going to modify primary superblock and two
> + * backup superblocks in this transaction.
> + */
> + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3);
> + if (IS_ERR(handle)) {
> + err = PTR_ERR(handle);
> + goto out;
> + }
> +
> + /* Modify primary superblock */
> + err = ext4_modify_primary_sb(sb, handle, func, arg);
> + if (err) {
> + ext4_msg(sb, KERN_ERR, "Failed to modify primary "
> + "superblock");
> + goto out_journal;
> + }
> +
> + primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr);
> + ngroups = ext4_get_groups_count(sb);
> +
> + /*
> + * Update backup superblocks. We have to start from group 0
> + * because it might not be where the primary superblock is
> + * if the fs is mounted with -o sb=<backup_sb_block>
> + */
> + i = 0;
> + grp = 0;
> + while (grp < ngroups) {
> + /* Skip primary superblock */
> + if (grp == primary_grp)
> + goto next_grp;
> +
> + ret = ext4_update_backup_sb(sb, handle, grp);
> + if (ret < 0) {
> + err = ret;
> + goto out_journal;
> + }
> +
> + i += ret;
> + if (handle && i > 1) {
> + /*
> + * We're only journalling primary superblock and
> + * two backup superblocks; the rest is not
> + * journalled.
> + */
> + err = ext4_journal_stop(handle);
> + if (err)
> + goto out;
> + handle = NULL;
> + }
> +next_grp:
> + grp = ext4_list_backups(sb, &three, &five, &seven);
> + }
> +
> +out_journal:
> + if (handle) {
> + ret = ext4_journal_stop(handle);
> + if (ret && !err)
> + err = ret;
> + }
> +out:
> + clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags);
> + smp_mb__after_atomic();
> + return err ? err : 0;
> +}
> +
> /**
> * Swap memory between @a and @b for @len bytes.
> *
> @@ -850,6 +1075,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
> return err;
> }
>
> +static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
> +{
> + size_t len;
> + int ret = 0;
> + char new_label[EXT4_LABEL_MAX + 1];
> + struct super_block *sb = file_inode(filp)->i_sb;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + /*
> + * Copy the maximum length allowed for ext4 label with one more to
> + * find the required terminating null byte in order to test the
> + * label length. The on disk label doesn't need to be null terminated.
> + */
> + if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
> + return -EFAULT;
> +
> + len = strnlen(new_label, EXT4_LABEL_MAX + 1);
> + if (len > EXT4_LABEL_MAX)
> + return -EINVAL;
> +
> + /*
> + * Clear the buffer after the new label
> + */
> + memset(new_label + len, 0, EXT4_LABEL_MAX - len);
> +
> + ret = mnt_want_write_file(filp);
> + if (ret)
> + return ret;
> +
> + ret = ext4_modify_superblocks_fn(sb, ext4_sb_setlabel, new_label);
> +
> + mnt_drop_write_file(filp);
> + return ret;
> +}
> +
> +static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
> +{
> + char label[EXT4_LABEL_MAX + 1];
> +
> + /*
> + * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
> + * FSLABEL_MAX must include terminating null byte, while s_volume_name
> + * does not have to.
> + */
> + BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
> +
> + memset(label, 0, sizeof(label));
> + lock_buffer(sbi->s_sbh);
> + strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
> + unlock_buffer(sbi->s_sbh);
> +
> + if (copy_to_user(user_label, label, sizeof(label)))
> + return -EFAULT;
> + return 0;
> +}
> +
> static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> {
> struct inode *inode = file_inode(filp);
> @@ -1266,6 +1549,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> case EXT4_IOC_CHECKPOINT:
> return ext4_ioctl_checkpoint(filp, arg);
>
> + case FS_IOC_GETFSLABEL:
> + return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
> +
> + case FS_IOC_SETFSLABEL:
> + return ext4_ioctl_setlabel(filp,
> + (const void __user *)arg);
> +
> default:
> return -ENOTTY;
> }
> @@ -1347,6 +1637,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
> case EXT4_IOC_GETSTATE:
> case EXT4_IOC_GET_ES_CACHE:
> case EXT4_IOC_CHECKPOINT:
> + case FS_IOC_GETFSLABEL:
> + case FS_IOC_SETFSLABEL:
> break;
> default:
> return -ENOIOCTLCMD;
> diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
> index b63cb88ccdae..ee8f02f406cb 100644
> --- a/fs/ext4/resize.c
> +++ b/fs/ext4/resize.c
> @@ -717,12 +717,23 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
> * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
> * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
> */
> -static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
> - unsigned *five, unsigned *seven)
> +unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
> + unsigned int *five, unsigned int *seven)
> {
> - unsigned *min = three;
> + struct ext4_super_block *es = EXT4_SB(sb)->s_es;
> + unsigned int *min = three;
> int mult = 3;
> - unsigned ret;
> + unsigned int ret;
> +
> + if (ext4_has_feature_sparse_super2(sb)) {
> + do {
> + if (*min > 2)
> + return UINT_MAX;
> + ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
> + *min += 1;
> + } while (!ret);
> + return ret;
> + }
>
> if (!ext4_has_feature_sparse_super(sb)) {
> ret = *min;
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 4e33b5eca694..d79182793675 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
> return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
> }
>
> -static __le32 ext4_superblock_csum(struct super_block *sb,
> - struct ext4_super_block *es)
> +__le32 ext4_superblock_csum(struct super_block *sb,
> + struct ext4_super_block *es)
> {
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> int offset = offsetof(struct ext4_super_block, s_checksum);
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index 0ea36b2b0662..17d5a8d0fbfd 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -2837,6 +2837,28 @@ TRACE_EVENT(ext4_fc_track_range,
> __entry->end)
> );
>
> +TRACE_EVENT(ext4_modify_sb,
> + TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, unsigned flags),
> +
> + TP_ARGS(sb, fsblk, flags),
> +
> + TP_STRUCT__entry(
> + __field(dev_t, dev)
> + __field(ext4_fsblk_t, fsblk)
> + __field(unsigned int, flags)
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = sb->s_dev;
> + __entry->fsblk = fsblk;
> + __entry->flags = flags;
> + ),
> +
> + TP_printk("dev %d,%d fsblk %llu flags %u",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->fsblk, __entry->flags)
> +);
> +
> #endif /* _TRACE_EXT4_H */
>
> /* This part must be outside protection */
> --
> 2.31.1
>
Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.
ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock bh. This might not be the first sb on the file system if
'sb=' mount option is used.
In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
- the primary superblock might not be the first superblock and so it
might not be the one used by userspace tools if read directly
off the disk.
- because the primary superblock might not be the first superblock we
potentialy have to update it as part of backup superblock update.
However the first sb location is a bit more complicated than the rest
so we have to account for that.
The superblock modification is created generic enough so the infrastructure
can be used for other potential superblock modification operations
operations, such as chaning UUID.
Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.
Signed-off-by: Lukas Czerner <[email protected]>
---
V2: Fix typo. Place constant in BUILD_BUG_ON comparison on the right side
V3: Setlabel completely reworked
V4: ext4_modify_primary_sb() and ext4_modify_superblocks_fn() should be
static
fs/ext4/ext4.h | 9 +-
fs/ext4/ioctl.c | 294 ++++++++++++++++++++++++++++++++++++
fs/ext4/resize.c | 19 ++-
fs/ext4/super.c | 4 +-
include/trace/events/ext4.h | 22 +++
5 files changed, 341 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 404dd50856e5..f355deb619a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1298,6 +1298,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
+#define EXT4_LABEL_MAX 16
+
/*
* Structure of the super block
*/
@@ -1347,7 +1349,7 @@ struct ext4_super_block {
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
+/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
@@ -3096,6 +3098,9 @@ extern int ext4_group_extend(struct super_block *sb,
struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count);
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
+extern unsigned int ext4_list_backups(struct super_block *sb,
+ unsigned int *three, unsigned int *five,
+ unsigned int *seven);
/* super.c */
extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
@@ -3110,6 +3115,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
+extern __le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 606dee9e08a3..ea938a41c861 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -27,6 +27,233 @@
#include "fsmap.h"
#include <trace/events/ext4.h>
+typedef void ext4_modify_sb_callback(struct ext4_super_block *es,
+ const void *arg);
+
+/*
+ * Superblock modification callback function for changing file system
+ * label
+ */
+static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
+{
+ /* Sanity check, this should never happen */
+ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
+
+ memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
+}
+
+static
+int ext4_modify_primary_sb(struct super_block *sb, handle_t *handle,
+ ext4_modify_sb_callback func,
+ const void *arg)
+{
+ int err = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh = sbi->s_sbh;
+ struct ext4_super_block *es = sbi->s_es;
+
+ trace_ext4_modify_sb(sb, bh->b_blocknr, 1);
+
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_err;
+
+ lock_buffer(bh);
+ func(es, arg);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(bh);
+
+ if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) {
+ ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ }
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_err;
+ err = sync_dirty_buffer(bh);
+out_err:
+ ext4_std_error(sb, err);
+ return err;
+}
+
+/*
+ * Update one backup superblcok in the group 'grp' using the primary
+ * superblock data. If the handle is NULL the modification is not
+ * journalled.
+ *
+ * Returns: 0 when no modification was done (no superblock in the group)
+ * 1 when the modification was successful
+ * <0 on error
+ */
+static int ext4_update_backup_sb(struct super_block *sb, handle_t *handle,
+ ext4_group_t grp)
+{
+ int err = 0;
+ ext4_fsblk_t sb_block;
+ struct buffer_head *bh;
+ unsigned long offset = 0;
+
+ if (!ext4_bg_has_super(sb, grp))
+ return 0;
+
+ /*
+ * For the group 0 there is always 1k padding, so we have
+ * either adjust offset, or sb_block depending on blocksize
+ */
+ if (grp == 0) {
+ sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(sb_block, sb->s_blocksize);
+ } else {
+ sb_block = ext4_group_first_block_no(sb, grp);
+ offset = 0;
+ }
+
+ trace_ext4_modify_sb(sb, sb_block, handle ? 1 : 0);
+
+ bh = sb_getblk(sb, sb_block);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ if (handle) {
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_bh;
+ }
+
+ lock_buffer(bh);
+ memcpy(bh->b_data + offset, EXT4_SB(sb)->s_es,
+ sizeof(struct ext4_super_block));
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if (err)
+ goto out_bh;
+
+ if (handle) {
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_bh;
+ } else {
+ BUFFER_TRACE(bh, "marking dirty");
+ mark_buffer_dirty(bh);
+ }
+ err = sync_dirty_buffer(bh);
+
+out_bh:
+ brelse(bh);
+ ext4_std_error(sb, err);
+ return (err) ? err : 1;
+}
+
+/*
+ * Modify primary and backup superblocks using the provided function
+ * func and argument arg.
+ *
+ * Only the primary superblock and at most two backup superblock
+ * modifications are journalled; the rest is modified without journal.
+ * This is safe because e2fsck will re-write them if there is a problem,
+ * and we're very unlikely to ever need more than two backups.
+ */
+static
+int ext4_modify_superblocks_fn(struct super_block *sb,
+ ext4_modify_sb_callback func,
+ const void *arg)
+{
+ handle_t *handle;
+ ext4_group_t ngroups;
+ unsigned int three = 1;
+ unsigned int five = 5;
+ unsigned int seven = 7;
+ int err = 0, ret, i;
+ ext4_group_t grp, primary_grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /*
+ * We can't modify superblocks while the online resize is running
+ */
+ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
+ &sbi->s_ext4_flags)) {
+ ext4_msg(sb, KERN_ERR, "Can't modify superblock while"
+ "performing online resize");
+ return -EBUSY;
+ }
+
+ /*
+ * We're only going to modify primary superblock and two
+ * backup superblocks in this transaction.
+ */
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out;
+ }
+
+ /* Modify primary superblock */
+ err = ext4_modify_primary_sb(sb, handle, func, arg);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "Failed to modify primary "
+ "superblock");
+ goto out_journal;
+ }
+
+ primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr);
+ ngroups = ext4_get_groups_count(sb);
+
+ /*
+ * Update backup superblocks. We have to start from group 0
+ * because it might not be where the primary superblock is
+ * if the fs is mounted with -o sb=<backup_sb_block>
+ */
+ i = 0;
+ grp = 0;
+ while (grp < ngroups) {
+ /* Skip primary superblock */
+ if (grp == primary_grp)
+ goto next_grp;
+
+ ret = ext4_update_backup_sb(sb, handle, grp);
+ if (ret < 0) {
+ err = ret;
+ goto out_journal;
+ }
+
+ i += ret;
+ if (handle && i > 1) {
+ /*
+ * We're only journalling primary superblock and
+ * two backup superblocks; the rest is not
+ * journalled.
+ */
+ err = ext4_journal_stop(handle);
+ if (err)
+ goto out;
+ handle = NULL;
+ }
+next_grp:
+ grp = ext4_list_backups(sb, &three, &five, &seven);
+ }
+
+out_journal:
+ if (handle) {
+ ret = ext4_journal_stop(handle);
+ if (ret && !err)
+ err = ret;
+ }
+out:
+ clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags);
+ smp_mb__after_atomic();
+ return err ? err : 0;
+}
+
/**
* Swap memory between @a and @b for @len bytes.
*
@@ -850,6 +1077,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
return err;
}
+static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
+{
+ size_t len;
+ int ret = 0;
+ char new_label[EXT4_LABEL_MAX + 1];
+ struct super_block *sb = file_inode(filp)->i_sb;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * Copy the maximum length allowed for ext4 label with one more to
+ * find the required terminating null byte in order to test the
+ * label length. The on disk label doesn't need to be null terminated.
+ */
+ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
+ return -EFAULT;
+
+ len = strnlen(new_label, EXT4_LABEL_MAX + 1);
+ if (len > EXT4_LABEL_MAX)
+ return -EINVAL;
+
+ /*
+ * Clear the buffer after the new label
+ */
+ memset(new_label + len, 0, EXT4_LABEL_MAX - len);
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = ext4_modify_superblocks_fn(sb, ext4_sb_setlabel, new_label);
+
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
+{
+ char label[EXT4_LABEL_MAX + 1];
+
+ /*
+ * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
+ * FSLABEL_MAX must include terminating null byte, while s_volume_name
+ * does not have to.
+ */
+ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
+
+ memset(label, 0, sizeof(label));
+ lock_buffer(sbi->s_sbh);
+ strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ unlock_buffer(sbi->s_sbh);
+
+ if (copy_to_user(user_label, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1266,6 +1551,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CHECKPOINT:
return ext4_ioctl_checkpoint(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
+
+ case FS_IOC_SETFSLABEL:
+ return ext4_ioctl_setlabel(filp,
+ (const void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1347,6 +1639,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_CHECKPOINT:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b63cb88ccdae..ee8f02f406cb 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -717,12 +717,23 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
* sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
* For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
*/
-static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
- unsigned *five, unsigned *seven)
+unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
+ unsigned int *five, unsigned int *seven)
{
- unsigned *min = three;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ unsigned int *min = three;
int mult = 3;
- unsigned ret;
+ unsigned int ret;
+
+ if (ext4_has_feature_sparse_super2(sb)) {
+ do {
+ if (*min > 2)
+ return UINT_MAX;
+ ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
+ *min += 1;
+ } while (!ret);
+ return ret;
+ }
if (!ext4_has_feature_sparse_super(sb)) {
ret = *min;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e33b5eca694..d79182793675 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 0ea36b2b0662..17d5a8d0fbfd 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2837,6 +2837,28 @@ TRACE_EVENT(ext4_fc_track_range,
__entry->end)
);
+TRACE_EVENT(ext4_modify_sb,
+ TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk, unsigned flags),
+
+ TP_ARGS(sb, fsblk, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ext4_fsblk_t, fsblk)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->fsblk = fsblk;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev %d,%d fsblk %llu flags %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fsblk, __entry->flags)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
--
2.31.1
On Fri, Dec 10, 2021 at 04:22:20PM +0100, Lukas Czerner wrote:
>
> There are couple of places in ext4 where we change superblock using
> journal; set features, generate s_encrypt_pw_salt, modify s_last_orphan,
> s_last_mounted and there is also ext4_update_super() in
> flush_stashed_error_work(). Also all the wild things done in resize.c.
>
> I think we should consolidate all or most of those under a single helper
> and I was thiking about how to go about it cleanly.
There are some changes which I think need to be restricted to only the
primary superblock. This includes updates to s_last_orphan,
s_last_mounted, and flushing error information by
flush_stashed_error_work(). The last is because if we've found some
kind of file system corruption, the problem might have been in a
corrupted superblock. So we don't necessary want to mess with the
backup superblocks, since that might propagate the problem to all of
the backup superblocks. And s_last_orphan, s_last_mounted, are
updated all the time, and they should only be updating the primary
superblock because (a) the performance impacts if we need to update
multiple sueprblocks, and (b) one of the ways we can avoid backup
superblocks from being corrupted is to avoid updating them.
So we should only be updating backup superblocks when we *have* to,
because we're updating something fundamental about the file system
metadata --- such as the size of the file system, when we're doing an
online resize --- or we're changing the UUID, or the Label, etc. BTW,
updating features is something that we generally avoid in new kernel
code; we've done it in the past, but it's better for the system
adminsirator to explicitly needs to enable a feature, as opposed to
having the kernel updating the feature when we create a huge file, for
example.
> We could play games with modifying s_es directly, which just points
> into s_sbh. And rely on the fact that it's read only once so we
> maybe should be able to modify it and then do the journal dance
> afterwards? I don't know if that's even allowed, but it sounds weird
> to me.
Well, that's how we do things today when we update the primary
superblock, and I think that's the right thing to do thing. We need
to modify s_sbh->b_data in any case so that the journalling works
correctly in any case.
For the backup superblocks, for the ones which we are updating as part
of the transaction, we need to do it via a their bh using the jbd2
updating protocol. What I have in mind is to pass into the "update
superblock" function a callback function which actually modifies the
appropriate primary or backup superblock. So there would be a
callback function that updates s_uuid, or s_volume_name, etc.
So the updating_superblock function would do the following:
* get a handle that updates 3 blocks (the primary and two backups)
* call jbd2_journal_get_write_access() on s_bh
* call callback function to update primary superblock (s_bh)
* update the superblock checksum
* call ext4_handle_dirty_metadata on s_bh
* For the first two backup superblocks
- get a bh for the backup superblock
- if the bh is not buffer_verified, check the checksum on
the backup superblock, and if it is not valid, call
ext4_error() indicating that the backup superblock is invalid,
and skip updating it.
- get write access on the bh for the backup superblock
- call the callback funnction to update the backup superblock
- call ext4_handle_dirty_metadata
* call jbd2_journal_stop(handle)
Does this make sense to you?
> One disadvantage might be that on-disk modification from userspace on
> mounted file systems will not work anymore, since it will always be
> overwriten by the in-kernel in-memory representation.
Well, eventually I'd like to deprecate, and perhaps outright disallow
on-disk modification from userspace. But we need to create ioctls
that can do everything tune2fs can validly do on a mounted file
system, and then wait to make sure the newer version e2fsprogs has
been installed everywhere that where a user might try to install an
updated kernel before we can change the kernel to disallow direct
updates to the ext4 superblock.
Given that users may be installing random upstream kernel on a RHEL or
SLES system, and they may be doing that without updating e2fsprogs
first, anything which breaks current versions of e2fsprogs is going to
cause a huge amount of pain when a platinum customer calls either Red
Hat or Google Cloud's support personnel, and you and I won't want to
get dragged into a support call with an irate customer with a huge
cloud or RHEL spend and where the customer relationship exec is trying
very hard to keep the customer happy....
So out of sheer self defense, it's going to be a while before we can
deprecate direct modification of the superblock by programs like
tune2fs. As in, probably 8 to 10 years. :-/
- Ted
On Fri, Dec 10, 2021 at 06:05:13PM -0500, Theodore Y. Ts'o wrote:
> On Fri, Dec 10, 2021 at 04:22:20PM +0100, Lukas Czerner wrote:
> >
> > There are couple of places in ext4 where we change superblock using
> > journal; set features, generate s_encrypt_pw_salt, modify s_last_orphan,
> > s_last_mounted and there is also ext4_update_super() in
> > flush_stashed_error_work(). Also all the wild things done in resize.c.
> >
> > I think we should consolidate all or most of those under a single helper
> > and I was thiking about how to go about it cleanly.
>
> There are some changes which I think need to be restricted to only the
> primary superblock. This includes updates to s_last_orphan,
Yes. I agree. It was never my intention to suggest that we have to
update backup superblocks with every primary superblock update.
> s_last_mounted, and flushing error information by
> flush_stashed_error_work(). The last is because if we've found some
> kind of file system corruption, the problem might have been in a
> corrupted superblock. So we don't necessary want to mess with the
> backup superblocks, since that might propagate the problem to all of
> the backup superblocks. And s_last_orphan, s_last_mounted, are
> updated all the time, and they should only be updating the primary
> superblock because (a) the performance impacts if we need to update
> multiple sueprblocks, and (b) one of the ways we can avoid backup
> superblocks from being corrupted is to avoid updating them.
Hmm that's a good point. What't I've done in v3 is to just copy the data
from primary to secondary superblock location without even bothering to
read the backup superblock, but it really looks like it's not desirable.
>
> So we should only be updating backup superblocks when we *have* to,
> because we're updating something fundamental about the file system
> metadata --- such as the size of the file system, when we're doing an
> online resize --- or we're changing the UUID, or the Label, etc. BTW,
> updating features is something that we generally avoid in new kernel
> code; we've done it in the past, but it's better for the system
> adminsirator to explicitly needs to enable a feature, as opposed to
> having the kernel updating the feature when we create a huge file, for
> example.
Agreed.
>
> > We could play games with modifying s_es directly, which just points
> > into s_sbh. And rely on the fact that it's read only once so we
> > maybe should be able to modify it and then do the journal dance
> > afterwards? I don't know if that's even allowed, but it sounds weird
> > to me.
>
> Well, that's how we do things today when we update the primary
> superblock, and I think that's the right thing to do thing. We need
> to modify s_sbh->b_data in any case so that the journalling works
> correctly in any case.
What I had in mind was more along the lines of:
* update primary superblock
* call jbd2_journal_get_write_access() on s_sbh
* update the superblock checksum
* call ext4_handle_dirty_metadata on s_sbh
All that in order to disassociate the small piece of code changing the
superblock data from the journalling and commiting code that is always
the same. My idea was to replace the chunk of code in all places where
we change the primary superblock using journal (not talking about backup
superblock here at all, that's a separate issue) with
* update primary superblock, say for example:
sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
* call sync_primary_superblock() function to jourhal and commit s_sbh
And for that I though that we would need a separate sb structure from
s_es so that we avoid changing s_sbh before calling
jbd2_journal_get_write_access() on it. But maybe I am overthinking it.
>
> For the backup superblocks, for the ones which we are updating as part
> of the transaction, we need to do it via a their bh using the jbd2
> updating protocol. What I have in mind is to pass into the "update
> superblock" function a callback function which actually modifies the
> appropriate primary or backup superblock. So there would be a
> callback function that updates s_uuid, or s_volume_name, etc.
>
> So the updating_superblock function would do the following:
>
> * get a handle that updates 3 blocks (the primary and two backups)
> * call jbd2_journal_get_write_access() on s_bh
> * call callback function to update primary superblock (s_bh)
> * update the superblock checksum
> * call ext4_handle_dirty_metadata on s_bh
> * For the first two backup superblocks
> - get a bh for the backup superblock
> - if the bh is not buffer_verified, check the checksum on
> the backup superblock, and if it is not valid, call
> ext4_error() indicating that the backup superblock is invalid,
> and skip updating it.
> - get write access on the bh for the backup superblock
> - call the callback funnction to update the backup superblock
> - call ext4_handle_dirty_metadata
> * call jbd2_journal_stop(handle)
>
> Does this make sense to you?
That's pretty much what is done in v3 of the patch, except I just copy
the data from primary superblock to backup superblock which I now agree
is not ideal for reasons you already highlighted above. I'll change that
so that ext4_update_backup_sb() also accepts the callback function.
>
> > One disadvantage might be that on-disk modification from userspace on
> > mounted file systems will not work anymore, since it will always be
> > overwriten by the in-kernel in-memory representation.
>
> Well, eventually I'd like to deprecate, and perhaps outright disallow
> on-disk modification from userspace. But we need to create ioctls
> that can do everything tune2fs can validly do on a mounted file
> system, and then wait to make sure the newer version e2fsprogs has
> been installed everywhere that where a user might try to install an
> updated kernel before we can change the kernel to disallow direct
> updates to the ext4 superblock.
>
> Given that users may be installing random upstream kernel on a RHEL or
> SLES system, and they may be doing that without updating e2fsprogs
> first, anything which breaks current versions of e2fsprogs is going to
> cause a huge amount of pain when a platinum customer calls either Red
> Hat or Google Cloud's support personnel, and you and I won't want to
> get dragged into a support call with an irate customer with a huge
> cloud or RHEL spend and where the customer relationship exec is trying
> very hard to keep the customer happy....
>
> So out of sheer self defense, it's going to be a while before we can
> deprecate direct modification of the superblock by programs like
> tune2fs. As in, probably 8 to 10 years. :-/
Sigh... Yeah, we're stuck with having to keep in mind that userspace could
be changing the superblock right under our hands.
Thanks!
-Lukas
>
> - Ted
>
Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
online reading and setting of file system label.
ext4_ioctl_getlabel() is simple, just get the label from the primary
superblock. This might not be the first sb on the file system if
'sb=' mount option is used.
In ext4_ioctl_setlabel() we update what ext4 currently views as a
primary superblock and then proceed to update backup superblocks. There
are two caveats:
- the primary superblock might not be the first superblock and so it
might not be the one used by userspace tools if read directly
off the disk.
- because the primary superblock might not be the first superblock we
potentialy have to update it as part of backup superblock update.
However the first sb location is a bit more complicated than the rest
so we have to account for that.
The superblock modification is created generic enough so the
infrastructure can be used for other potential superblock modification
operations, such as chaning UUID.
Tested with generic/492 with various configurations. I also checked the
behavior with 'sb=' mount options, including very large file systems
with and without sparse_super/sparse_super2.
Signed-off-by: Lukas Czerner <[email protected]>
---
V2: Fix typo. Place constant in BUILD_BUG_ON comparison on the right side
V3: Setlabel completely reworked
V4: ext4_modify_primary_sb() and ext4_modify_superblocks_fn() should be
static
V5: Make ext4_update_backup_sb() to modify backup superblock using
callback function instead of copying data from primary sb
fs/ext4/ext4.h | 9 +-
fs/ext4/ioctl.c | 309 ++++++++++++++++++++++++++++++++++++
fs/ext4/resize.c | 19 ++-
fs/ext4/super.c | 4 +-
include/trace/events/ext4.h | 23 +++
5 files changed, 357 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 404dd50856e5..f355deb619a2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1298,6 +1298,8 @@ extern void ext4_set_bits(void *bm, int cur, int len);
/* Metadata checksum algorithm codes */
#define EXT4_CRC32C_CHKSUM 1
+#define EXT4_LABEL_MAX 16
+
/*
* Structure of the super block
*/
@@ -1347,7 +1349,7 @@ struct ext4_super_block {
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
-/*78*/ char s_volume_name[16]; /* volume name */
+/*78*/ char s_volume_name[EXT4_LABEL_MAX]; /* volume name */
/*88*/ char s_last_mounted[64] __nonstring; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
@@ -3096,6 +3098,9 @@ extern int ext4_group_extend(struct super_block *sb,
struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count);
extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
+extern unsigned int ext4_list_backups(struct super_block *sb,
+ unsigned int *three, unsigned int *five,
+ unsigned int *seven);
/* super.c */
extern struct buffer_head *ext4_sb_bread(struct super_block *sb,
@@ -3110,6 +3115,8 @@ extern int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait);
extern void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block);
extern int ext4_seq_options_show(struct seq_file *seq, void *offset);
extern int ext4_calculate_overhead(struct super_block *sb);
+extern __le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es);
extern void ext4_superblock_csum_set(struct super_block *sb);
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
ext4_group_t ngroup);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 606dee9e08a3..e9b3332edf11 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -27,6 +27,248 @@
#include "fsmap.h"
#include <trace/events/ext4.h>
+typedef void ext4_update_sb_callback(struct ext4_super_block *es,
+ const void *arg);
+
+/*
+ * Superblock modification callback function for changing file system
+ * label
+ */
+static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
+{
+ /* Sanity check, this should never happen */
+ BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
+
+ memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
+}
+
+static
+int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
+ ext4_update_sb_callback func,
+ const void *arg)
+{
+ int err = 0;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct buffer_head *bh = sbi->s_sbh;
+ struct ext4_super_block *es = sbi->s_es;
+
+ trace_ext4_update_sb(sb, bh->b_blocknr, 1);
+
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_err;
+
+ lock_buffer(bh);
+ func(es, arg);
+ ext4_superblock_csum_set(sb);
+ unlock_buffer(bh);
+
+ if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) {
+ ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
+ "superblock detected");
+ clear_buffer_write_io_error(bh);
+ set_buffer_uptodate(bh);
+ }
+
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_err;
+ err = sync_dirty_buffer(bh);
+out_err:
+ ext4_std_error(sb, err);
+ return err;
+}
+
+/*
+ * Update one backup superblock in the group 'grp' using the callback
+ * function 'func' and argument 'arg'. If the handle is NULL the
+ * modification is not journalled.
+ *
+ * Returns: 0 when no modification was done (no superblock in the group)
+ * 1 when the modification was successful
+ * <0 on error
+ */
+static int ext4_update_backup_sb(struct super_block *sb,
+ handle_t *handle, ext4_group_t grp,
+ ext4_update_sb_callback func, const void *arg)
+{
+ int err = 0;
+ ext4_fsblk_t sb_block;
+ struct buffer_head *bh;
+ unsigned long offset = 0;
+ struct ext4_super_block *es;
+
+ if (!ext4_bg_has_super(sb, grp))
+ return 0;
+
+ /*
+ * For the group 0 there is always 1k padding, so we have
+ * either adjust offset, or sb_block depending on blocksize
+ */
+ if (grp == 0) {
+ sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
+ offset = do_div(sb_block, sb->s_blocksize);
+ } else {
+ sb_block = ext4_group_first_block_no(sb, grp);
+ offset = 0;
+ }
+
+ trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0);
+
+ bh = ext4_sb_bread(sb, sb_block, 0);
+ if (IS_ERR(bh))
+ return PTR_ERR(bh);
+
+ if (handle) {
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, sb,
+ bh,
+ EXT4_JTR_NONE);
+ if (err)
+ goto out_bh;
+ }
+
+ es = (struct ext4_super_block *) (bh->b_data + offset);
+ lock_buffer(bh);
+ if (ext4_has_metadata_csum(sb) &&
+ es->s_checksum != ext4_superblock_csum(sb, es)) {
+ ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
+ "superblock %llu\n", sb_block);
+ unlock_buffer(bh);
+ err = -EFSBADCRC;
+ goto out_bh;
+ }
+ func(es, arg);
+ if (ext4_has_metadata_csum(sb))
+ es->s_checksum = ext4_superblock_csum(sb, es);
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if (err)
+ goto out_bh;
+
+ if (handle) {
+ err = ext4_handle_dirty_metadata(handle, NULL, bh);
+ if (err)
+ goto out_bh;
+ } else {
+ BUFFER_TRACE(bh, "marking dirty");
+ mark_buffer_dirty(bh);
+ }
+ err = sync_dirty_buffer(bh);
+
+out_bh:
+ brelse(bh);
+ ext4_std_error(sb, err);
+ return (err) ? err : 1;
+}
+
+/*
+ * Update primary and backup superblocks using the provided function
+ * func and argument arg.
+ *
+ * Only the primary superblock and at most two backup superblock
+ * modifications are journalled; the rest is modified without journal.
+ * This is safe because e2fsck will re-write them if there is a problem,
+ * and we're very unlikely to ever need more than two backups.
+ */
+static
+int ext4_update_superblocks_fn(struct super_block *sb,
+ ext4_update_sb_callback func,
+ const void *arg)
+{
+ handle_t *handle;
+ ext4_group_t ngroups;
+ unsigned int three = 1;
+ unsigned int five = 5;
+ unsigned int seven = 7;
+ int err = 0, ret, i;
+ ext4_group_t grp, primary_grp;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ /*
+ * We can't update superblocks while the online resize is running
+ */
+ if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
+ &sbi->s_ext4_flags)) {
+ ext4_msg(sb, KERN_ERR, "Can't modify superblock while"
+ "performing online resize");
+ return -EBUSY;
+ }
+
+ /*
+ * We're only going to update primary superblock and two
+ * backup superblocks in this transaction.
+ */
+ handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out;
+ }
+
+ /* Update primary superblock */
+ err = ext4_update_primary_sb(sb, handle, func, arg);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "Failed to update primary "
+ "superblock");
+ goto out_journal;
+ }
+
+ primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr);
+ ngroups = ext4_get_groups_count(sb);
+
+ /*
+ * Update backup superblocks. We have to start from group 0
+ * because it might not be where the primary superblock is
+ * if the fs is mounted with -o sb=<backup_sb_block>
+ */
+ i = 0;
+ grp = 0;
+ while (grp < ngroups) {
+ /* Skip primary superblock */
+ if (grp == primary_grp)
+ goto next_grp;
+
+ ret = ext4_update_backup_sb(sb, handle, grp, func, arg);
+ if (ret < 0) {
+ /* Ignore bad checksum; try to update next sb */
+ if (ret == -EFSBADCRC)
+ goto next_grp;
+ err = ret;
+ goto out_journal;
+ }
+
+ i += ret;
+ if (handle && i > 1) {
+ /*
+ * We're only journalling primary superblock and
+ * two backup superblocks; the rest is not
+ * journalled.
+ */
+ err = ext4_journal_stop(handle);
+ if (err)
+ goto out;
+ handle = NULL;
+ }
+next_grp:
+ grp = ext4_list_backups(sb, &three, &five, &seven);
+ }
+
+out_journal:
+ if (handle) {
+ ret = ext4_journal_stop(handle);
+ if (ret && !err)
+ err = ret;
+ }
+out:
+ clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags);
+ smp_mb__after_atomic();
+ return err ? err : 0;
+}
+
/**
* Swap memory between @a and @b for @len bytes.
*
@@ -850,6 +1092,64 @@ static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
return err;
}
+static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
+{
+ size_t len;
+ int ret = 0;
+ char new_label[EXT4_LABEL_MAX + 1];
+ struct super_block *sb = file_inode(filp)->i_sb;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /*
+ * Copy the maximum length allowed for ext4 label with one more to
+ * find the required terminating null byte in order to test the
+ * label length. The on disk label doesn't need to be null terminated.
+ */
+ if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
+ return -EFAULT;
+
+ len = strnlen(new_label, EXT4_LABEL_MAX + 1);
+ if (len > EXT4_LABEL_MAX)
+ return -EINVAL;
+
+ /*
+ * Clear the buffer after the new label
+ */
+ memset(new_label + len, 0, EXT4_LABEL_MAX - len);
+
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
+
+ ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label);
+
+ mnt_drop_write_file(filp);
+ return ret;
+}
+
+static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
+{
+ char label[EXT4_LABEL_MAX + 1];
+
+ /*
+ * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
+ * FSLABEL_MAX must include terminating null byte, while s_volume_name
+ * does not have to.
+ */
+ BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
+
+ memset(label, 0, sizeof(label));
+ lock_buffer(sbi->s_sbh);
+ strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ unlock_buffer(sbi->s_sbh);
+
+ if (copy_to_user(user_label, label, sizeof(label)))
+ return -EFAULT;
+ return 0;
+}
+
static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -1266,6 +1566,13 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case EXT4_IOC_CHECKPOINT:
return ext4_ioctl_checkpoint(filp, arg);
+ case FS_IOC_GETFSLABEL:
+ return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
+
+ case FS_IOC_SETFSLABEL:
+ return ext4_ioctl_setlabel(filp,
+ (const void __user *)arg);
+
default:
return -ENOTTY;
}
@@ -1347,6 +1654,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_GETSTATE:
case EXT4_IOC_GET_ES_CACHE:
case EXT4_IOC_CHECKPOINT:
+ case FS_IOC_GETFSLABEL:
+ case FS_IOC_SETFSLABEL:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b63cb88ccdae..ee8f02f406cb 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -717,12 +717,23 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
* sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
* For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
*/
-static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
- unsigned *five, unsigned *seven)
+unsigned int ext4_list_backups(struct super_block *sb, unsigned int *three,
+ unsigned int *five, unsigned int *seven)
{
- unsigned *min = three;
+ struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ unsigned int *min = three;
int mult = 3;
- unsigned ret;
+ unsigned int ret;
+
+ if (ext4_has_feature_sparse_super2(sb)) {
+ do {
+ if (*min > 2)
+ return UINT_MAX;
+ ret = le32_to_cpu(es->s_backup_bgs[*min - 1]);
+ *min += 1;
+ } while (!ret);
+ return ret;
+ }
if (!ext4_has_feature_sparse_super(sb)) {
ret = *min;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4e33b5eca694..d79182793675 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -260,8 +260,8 @@ static int ext4_verify_csum_type(struct super_block *sb,
return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
}
-static __le32 ext4_superblock_csum(struct super_block *sb,
- struct ext4_super_block *es)
+__le32 ext4_superblock_csum(struct super_block *sb,
+ struct ext4_super_block *es)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int offset = offsetof(struct ext4_super_block, s_checksum);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 0ea36b2b0662..19e957b7f941 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2837,6 +2837,29 @@ TRACE_EVENT(ext4_fc_track_range,
__entry->end)
);
+TRACE_EVENT(ext4_update_sb,
+ TP_PROTO(struct super_block *sb, ext4_fsblk_t fsblk,
+ unsigned int flags),
+
+ TP_ARGS(sb, fsblk, flags),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(ext4_fsblk_t, fsblk)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->fsblk = fsblk;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev %d,%d fsblk %llu flags %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->fsblk, __entry->flags)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
--
2.31.1
On Mon, 13 Dec 2021 14:56:18 +0100, Lukas Czerner wrote:
> Implement support for FS_IOC_GETFSLABEL and FS_IOC_SETFSLABEL ioctls for
> online reading and setting of file system label.
>
> ext4_ioctl_getlabel() is simple, just get the label from the primary
> superblock. This might not be the first sb on the file system if
> 'sb=' mount option is used.
>
> [...]
Applied, thanks!
[1/1] ext4: implement support for get/set fs label
commit: 37d1c2c49c13f7a34900a7d3e479326a7bb32364
Best regards,
--
Theodore Ts'o <[email protected]>