2016-11-16 17:52:57

by Eric Biggers

[permalink] [raw]
Subject: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On a filesystem with no journal, a symlink longer than about 32
characters (exact length depending on padding for encryption) could not
be followed or read immediately after being created in an encrypted
directory. This happened because when the symlink data went through the
delayed allocation path instead of the journaling path, the symlink was
incorrectly detected as a "fast" symlink rather than a "slow" symlink
until its data was written out.

To fix this, use different inode operations for fast and slow encrypted
symlinks. This parallels how fast and slow unencrypted symlinks use
different inode operations.

I did not fix this by updating ext4_inode_is_fast_symlink() to account
for delayed allocation because there didn't seem to be a good way to do
that in a race-free way. i_data_sem would need to be acquired to avoid
racing with ext4_da_update_reserve_space() updating
i_reserved_data_blocks and i_blocks, but i_data_sem did not seem
appropriate to take from the context of ext4_inode_is_fast_symlink().

Signed-off-by: Eric Biggers <[email protected]>
---
fs/ext4/ext4.h | 1 +
fs/ext4/inode.c | 28 +++++++++++++++++++---------
fs/ext4/namei.c | 10 +++++++---
fs/ext4/symlink.c | 28 +++++++++++++++++++++++++---
4 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 53d6d46..a0096af 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3087,6 +3087,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping,

/* symlink.c */
extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
+extern const struct inode_operations ext4_encrypted_fast_symlink_inode_operations;
extern const struct inode_operations ext4_symlink_inode_operations;
extern const struct inode_operations ext4_fast_symlink_inode_operations;

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b1b4c85..be9e369 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -144,6 +144,9 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,

/*
* Test whether an inode is a fast symlink.
+ *
+ * Careful: the result may be incorrect if the symlink was just created and is
+ * pending delayed allocation (only possible on no-journal filesystems).
*/
int ext4_inode_is_fast_symlink(struct inode *inode)
{
@@ -4646,16 +4649,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
inode->i_op = &ext4_dir_inode_operations;
inode->i_fop = &ext4_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
- if (ext4_encrypted_inode(inode)) {
- inode->i_op = &ext4_encrypted_symlink_inode_operations;
- ext4_set_aops(inode);
- } else if (ext4_inode_is_fast_symlink(inode)) {
- inode->i_link = (char *)ei->i_data;
- inode->i_op = &ext4_fast_symlink_inode_operations;
- nd_terminate_link(ei->i_data, inode->i_size,
- sizeof(ei->i_data) - 1);
+ if (ext4_inode_is_fast_symlink(inode)) {
+ if (ext4_encrypted_inode(inode)) {
+ inode->i_op =
+ &ext4_encrypted_fast_symlink_inode_operations;
+ } else {
+ inode->i_op =
+ &ext4_fast_symlink_inode_operations;
+ inode->i_link = (char *)ei->i_data;
+ nd_terminate_link(ei->i_data, inode->i_size,
+ sizeof(ei->i_data) - 1);
+ }
} else {
- inode->i_op = &ext4_symlink_inode_operations;
+ if (ext4_encrypted_inode(inode))
+ inode->i_op =
+ &ext4_encrypted_symlink_inode_operations;
+ else
+ inode->i_op = &ext4_symlink_inode_operations;
ext4_set_aops(inode);
}
inode_nohighmem(inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index eadba91..acf1786 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3148,11 +3148,12 @@ static int ext4_symlink(struct inode *dir,
goto err_drop_inode;
sd->len = cpu_to_le16(ostr.len);
disk_link.name = (char *) sd;
- inode->i_op = &ext4_encrypted_symlink_inode_operations;
}

if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
- if (!encryption_required)
+ if (encryption_required)
+ inode->i_op = &ext4_encrypted_symlink_inode_operations;
+ else
inode->i_op = &ext4_symlink_inode_operations;
inode_nohighmem(inode);
ext4_set_aops(inode);
@@ -3194,7 +3195,10 @@ static int ext4_symlink(struct inode *dir,
} else {
/* clear the extent format for fast symlink */
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
- if (!encryption_required) {
+ if (encryption_required) {
+ inode->i_op =
+ &ext4_encrypted_fast_symlink_inode_operations;
+ } else {
inode->i_op = &ext4_fast_symlink_inode_operations;
inode->i_link = (char *)&EXT4_I(inode)->i_data;
}
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 557b3b0..684bb78 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -24,7 +24,8 @@

static const char *ext4_encrypted_get_link(struct dentry *dentry,
struct inode *inode,
- struct delayed_call *done)
+ struct delayed_call *done,
+ bool fast)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
@@ -40,7 +41,7 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
if (res)
return ERR_PTR(res);

- if (ext4_inode_is_fast_symlink(inode)) {
+ if (fast) {
caddr = (char *) EXT4_I(inode)->i_data;
max_size = sizeof(EXT4_I(inode)->i_data);
} else {
@@ -82,9 +83,30 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
return ERR_PTR(res);
}

+static const char *ext4_encrypted_get_link_slow(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
+{
+ return ext4_encrypted_get_link(dentry, inode, done, false);
+}
+
+static const char *ext4_encrypted_get_link_fast(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
+{
+ return ext4_encrypted_get_link(dentry, inode, done, true);
+}
+
const struct inode_operations ext4_encrypted_symlink_inode_operations = {
.readlink = generic_readlink,
- .get_link = ext4_encrypted_get_link,
+ .get_link = ext4_encrypted_get_link_slow,
+ .setattr = ext4_setattr,
+ .listxattr = ext4_listxattr,
+};
+
+const struct inode_operations ext4_encrypted_fast_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .get_link = ext4_encrypted_get_link_fast,
.setattr = ext4_setattr,
.listxattr = ext4_listxattr,
};
--
2.8.0.rc3.226.g39d4020



2016-11-18 02:20:05

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On Nov 16, 2016, at 10:50 AM, Eric Biggers <[email protected]> wrote:
>
> On a filesystem with no journal, a symlink longer than about 32
> characters (exact length depending on padding for encryption) could not
> be followed or read immediately after being created in an encrypted
> directory. This happened because when the symlink data went through the
> delayed allocation path instead of the journaling path, the symlink was
> incorrectly detected as a "fast" symlink rather than a "slow" symlink
> until its data was written out.

IMHO, this again exposes an issue that we've seen with "fast" vs. "slow"
symlink detection several times in the past whenever there is a data block
allocated for a fast symlink (e.g. when xattrs were allowed on symlinks).

int ext4_inode_is_fast_symlink(struct inode *inode)
{
int ea_blocks = EXT4_I(inode)->i_file_acl ?
EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;

if (ext4_has_inline_data(inode))
return 0;

return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
}

Instead of depending on the i_blocks count to detect slow symlinks, we
should just check the i_size < EXT4_N_BLOCKS * 4 (or <=, need to verify).
I believe this has always been true for fast symlinks, so it should be
OK to make this change. That will isolate us from future changes that
may add block allocations to symlinks.

Cheers, Andreas

> To fix this, use different inode operations for fast and slow encrypted
> symlinks. This parallels how fast and slow unencrypted symlinks use
> different inode operations.
>
> I did not fix this by updating ext4_inode_is_fast_symlink() to account
> for delayed allocation because there didn't seem to be a good way to do
> that in a race-free way. i_data_sem would need to be acquired to avoid
> racing with ext4_da_update_reserve_space() updating
> i_reserved_data_blocks and i_blocks, but i_data_sem did not seem
> appropriate to take from the context of ext4_inode_is_fast_symlink().
>
> Signed-off-by: Eric Biggers <[email protected]>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 28 +++++++++++++++++++---------
> fs/ext4/namei.c | 10 +++++++---
> fs/ext4/symlink.c | 28 +++++++++++++++++++++++++---
> 4 files changed, 52 insertions(+), 15 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 53d6d46..a0096af 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -3087,6 +3087,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping,
>
> /* symlink.c */
> extern const struct inode_operations ext4_encrypted_symlink_inode_operations;
> +extern const struct inode_operations ext4_encrypted_fast_symlink_inode_operations;
> extern const struct inode_operations ext4_symlink_inode_operations;
> extern const struct inode_operations ext4_fast_symlink_inode_operations;
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index b1b4c85..be9e369 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -144,6 +144,9 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
>
> /*
> * Test whether an inode is a fast symlink.
> + *
> + * Careful: the result may be incorrect if the symlink was just created and is
> + * pending delayed allocation (only possible on no-journal filesystems).
> */
> int ext4_inode_is_fast_symlink(struct inode *inode)
> {
> @@ -4646,16 +4649,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
> inode->i_op = &ext4_dir_inode_operations;
> inode->i_fop = &ext4_dir_operations;
> } else if (S_ISLNK(inode->i_mode)) {
> - if (ext4_encrypted_inode(inode)) {
> - inode->i_op = &ext4_encrypted_symlink_inode_operations;
> - ext4_set_aops(inode);
> - } else if (ext4_inode_is_fast_symlink(inode)) {
> - inode->i_link = (char *)ei->i_data;
> - inode->i_op = &ext4_fast_symlink_inode_operations;
> - nd_terminate_link(ei->i_data, inode->i_size,
> - sizeof(ei->i_data) - 1);
> + if (ext4_inode_is_fast_symlink(inode)) {
> + if (ext4_encrypted_inode(inode)) {
> + inode->i_op =
> + &ext4_encrypted_fast_symlink_inode_operations;
> + } else {
> + inode->i_op =
> + &ext4_fast_symlink_inode_operations;
> + inode->i_link = (char *)ei->i_data;
> + nd_terminate_link(ei->i_data, inode->i_size,
> + sizeof(ei->i_data) - 1);
> + }
> } else {
> - inode->i_op = &ext4_symlink_inode_operations;
> + if (ext4_encrypted_inode(inode))
> + inode->i_op =
> + &ext4_encrypted_symlink_inode_operations;
> + else
> + inode->i_op = &ext4_symlink_inode_operations;
> ext4_set_aops(inode);
> }
> inode_nohighmem(inode);
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index eadba91..acf1786 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -3148,11 +3148,12 @@ static int ext4_symlink(struct inode *dir,
> goto err_drop_inode;
> sd->len = cpu_to_le16(ostr.len);
> disk_link.name = (char *) sd;
> - inode->i_op = &ext4_encrypted_symlink_inode_operations;
> }
>
> if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
> - if (!encryption_required)
> + if (encryption_required)
> + inode->i_op = &ext4_encrypted_symlink_inode_operations;
> + else
> inode->i_op = &ext4_symlink_inode_operations;
> inode_nohighmem(inode);
> ext4_set_aops(inode);
> @@ -3194,7 +3195,10 @@ static int ext4_symlink(struct inode *dir,
> } else {
> /* clear the extent format for fast symlink */
> ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
> - if (!encryption_required) {
> + if (encryption_required) {
> + inode->i_op =
> + &ext4_encrypted_fast_symlink_inode_operations;
> + } else {
> inode->i_op = &ext4_fast_symlink_inode_operations;
> inode->i_link = (char *)&EXT4_I(inode)->i_data;
> }
> diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
> index 557b3b0..684bb78 100644
> --- a/fs/ext4/symlink.c
> +++ b/fs/ext4/symlink.c
> @@ -24,7 +24,8 @@
>
> static const char *ext4_encrypted_get_link(struct dentry *dentry,
> struct inode *inode,
> - struct delayed_call *done)
> + struct delayed_call *done,
> + bool fast)
> {
> struct page *cpage = NULL;
> char *caddr, *paddr = NULL;
> @@ -40,7 +41,7 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
> if (res)
> return ERR_PTR(res);
>
> - if (ext4_inode_is_fast_symlink(inode)) {
> + if (fast) {
> caddr = (char *) EXT4_I(inode)->i_data;
> max_size = sizeof(EXT4_I(inode)->i_data);
> } else {
> @@ -82,9 +83,30 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
> return ERR_PTR(res);
> }
>
> +static const char *ext4_encrypted_get_link_slow(struct dentry *dentry,
> + struct inode *inode,
> + struct delayed_call *done)
> +{
> + return ext4_encrypted_get_link(dentry, inode, done, false);
> +}
> +
> +static const char *ext4_encrypted_get_link_fast(struct dentry *dentry,
> + struct inode *inode,
> + struct delayed_call *done)
> +{
> + return ext4_encrypted_get_link(dentry, inode, done, true);
> +}
> +
> const struct inode_operations ext4_encrypted_symlink_inode_operations = {
> .readlink = generic_readlink,
> - .get_link = ext4_encrypted_get_link,
> + .get_link = ext4_encrypted_get_link_slow,
> + .setattr = ext4_setattr,
> + .listxattr = ext4_listxattr,
> +};
> +
> +const struct inode_operations ext4_encrypted_fast_symlink_inode_operations = {
> + .readlink = generic_readlink,
> + .get_link = ext4_encrypted_get_link_fast,
> .setattr = ext4_setattr,
> .listxattr = ext4_listxattr,
> };
> --
> 2.8.0.rc3.226.g39d4020
>


Cheers, Andreas






Attachments:
signature.asc (833.00 B)
Message signed with OpenPGP using GPGMail

2016-11-18 18:47:07

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On Thu, Nov 17, 2016 at 07:20:24PM -0700, Andreas Dilger wrote:
> On Nov 16, 2016, at 10:50 AM, Eric Biggers <[email protected]> wrote:
> >
> > On a filesystem with no journal, a symlink longer than about 32
> > characters (exact length depending on padding for encryption) could not
> > be followed or read immediately after being created in an encrypted
> > directory. This happened because when the symlink data went through the
> > delayed allocation path instead of the journaling path, the symlink was
> > incorrectly detected as a "fast" symlink rather than a "slow" symlink
> > until its data was written out.
>
> IMHO, this again exposes an issue that we've seen with "fast" vs. "slow"
> symlink detection several times in the past whenever there is a data block
> allocated for a fast symlink (e.g. when xattrs were allowed on symlinks).
>
> int ext4_inode_is_fast_symlink(struct inode *inode)
> {
> int ea_blocks = EXT4_I(inode)->i_file_acl ?
> EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
>
> if (ext4_has_inline_data(inode))
> return 0;
>
> return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
> }
>
> Instead of depending on the i_blocks count to detect slow symlinks, we
> should just check the i_size < EXT4_N_BLOCKS * 4 (or <=, need to verify).
> I believe this has always been true for fast symlinks, so it should be
> OK to make this change. That will isolate us from future changes that
> may add block allocations to symlinks.
>

Yes, this would be a much nicer way to detect fast symlinks.

The only thing I'd be concerned about is the possibility of pre-existing "slow"
symlinks that actually have targets short enough to be "fast" symlinks, perhaps
in filesystems created by old drivers or by external tools. If such links
happened to work before, then a change to check i_size would break them.

This may not be an issue in practice. I checked some old ext4 versions, ext2
from Linux 0.99.7, e2fsprogs, Android's ext4_utils, and FreeBSD's ext2 driver.
They all create "fast" symlinks if the length of the symlink target length
excluding the terminating null (i_size) is < 60.

Eric

2016-11-18 21:52:24

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems


> On Nov 18, 2016, at 11:47 AM, Eric Biggers <[email protected]> wrote:
>
> On Thu, Nov 17, 2016 at 07:20:24PM -0700, Andreas Dilger wrote:
>> On Nov 16, 2016, at 10:50 AM, Eric Biggers <[email protected]> wrote:
>>>
>>> On a filesystem with no journal, a symlink longer than about 32
>>> characters (exact length depending on padding for encryption) could not
>>> be followed or read immediately after being created in an encrypted
>>> directory. This happened because when the symlink data went through the
>>> delayed allocation path instead of the journaling path, the symlink was
>>> incorrectly detected as a "fast" symlink rather than a "slow" symlink
>>> until its data was written out.
>>
>> IMHO, this again exposes an issue that we've seen with "fast" vs. "slow"
>> symlink detection several times in the past whenever there is a data block
>> allocated for a fast symlink (e.g. when xattrs were allowed on symlinks).
>>
>> int ext4_inode_is_fast_symlink(struct inode *inode)
>> {
>> int ea_blocks = EXT4_I(inode)->i_file_acl ?
>> EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
>>
>> if (ext4_has_inline_data(inode))
>> return 0;
>>
>> return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
>> }
>>
>> Instead of depending on the i_blocks count to detect slow symlinks, we
>> should just check the i_size < EXT4_N_BLOCKS * 4 (or <=, need to verify).
>> I believe this has always been true for fast symlinks, so it should be
>> OK to make this change. That will isolate us from future changes that
>> may add block allocations to symlinks.
>>
>
> Yes, this would be a much nicer way to detect fast symlinks.
>
> The only thing I'd be concerned about is the possibility of pre-existing
> "slow" symlinks that actually have targets short enough to be "fast"
> symlinks, perhaps in filesystems created by old drivers or by external
> tools. If such links happened to work before, then a change to check
> i_size would break them.
>
> This may not be an issue in practice. I checked some old ext4 versions,
> ext2 from Linux 0.99.7, e2fsprogs, Android's ext4_utils, and FreeBSD's
> ext2 driver.
> They all create "fast" symlinks if the length of the symlink target length
> excluding the terminating null (i_size) is < 60.

I did a similar analysis with similar results.


Cheers, Andreas






Attachments:
signature.asc (833.00 B)
Message signed with OpenPGP using GPGMail

2016-11-21 23:19:28

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On Fri, Nov 18, 2016 at 02:52:22PM -0700, Andreas Dilger wrote:
>
> > Yes, this would be a much nicer way to detect fast symlinks.
> >
> > The only thing I'd be concerned about is the possibility of pre-existing
> > "slow" symlinks that actually have targets short enough to be "fast"
> > symlinks, perhaps in filesystems created by old drivers or by external
> > tools. If such links happened to work before, then a change to check
> > i_size would break them.
> >
> > This may not be an issue in practice. I checked some old ext4 versions,
> > ext2 from Linux 0.99.7, e2fsprogs, Android's ext4_utils, and FreeBSD's
> > ext2 driver.
> > They all create "fast" symlinks if the length of the symlink target length
> > excluding the terminating null (i_size) is < 60.
>
> I did a similar analysis with similar results.
>

Ted, what would you say about Andreas' suggestion to use i_size to distinguish
fast symlinks from slow symlinks?

It looks like this was discussed some years ago but the discussion died out and
no change was made: see https://www.spinics.net/lists/linux-ext4/msg05693.html

Given the investigation I did it seems it would very likely be safe, but we can
never be 100% sure it won't break some obscure tool or (version of a tool) to
create symlinks on ext2/ext3/ext4 filesystems we don't know about.

Eric

2016-11-22 22:49:57

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On Nov 21, 2016, at 4:19 PM, Eric Biggers <[email protected]> wrote:
>
> On Fri, Nov 18, 2016 at 02:52:22PM -0700, Andreas Dilger wrote:
>>
>>> Yes, this would be a much nicer way to detect fast symlinks.
>>>
>>> The only thing I'd be concerned about is the possibility of pre-existing
>>> "slow" symlinks that actually have targets short enough to be "fast"
>>> symlinks, perhaps in filesystems created by old drivers or by external
>>> tools. If such links happened to work before, then a change to check
>>> i_size would break them.
>>>
>>> This may not be an issue in practice. I checked some old ext4 versions,
>>> ext2 from Linux 0.99.7, e2fsprogs, Android's ext4_utils, and FreeBSD's
>>> ext2 driver. They all create "fast" symlinks if the length of the
>>> symlink target length excluding the terminating null (i_size) is < 60.
>>
>> I did a similar analysis with similar results.
>>
>
> Ted, what would you say about Andreas' suggestion to use i_size to
> distinguish fast symlinks from slow symlinks?
>
> It looks like this was discussed some years ago but the discussion died
> out and no change was made: see https://www.spinics.net/lists/linux-ext4/msg05693.html
>
> Given the investigation I did it seems it would very likely be safe, but
> we can never be 100% sure it won't break some obscure tool or (version of
> a tool) to create symlinks on ext2/ext3/ext4 filesystems we don't know
> about.

Conversely, we've had fast symlinks break a few times due to additional
blocks being stored with the fast symlink (external xattr blocks for SELinux,
now encryption), possibly others in the future, and such breakages are
backwards incompatible (i.e. if there is some new way to add a block to
a symlink it will break all older kernels).

Cheers, Andreas






Attachments:
signature.asc (833.00 B)
Message signed with OpenPGP using GPGMail

2016-12-01 19:27:07

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

So in the long term I think we can move to using i_size to determine
fast symlinks, but I think there's a bigger issue hiding here, which
is that we shouldn't be using delayed allocation for symlinks in the
first place. In the first place, symlinks will never be more than a
block, so there's no advantage in using delalloc. In the second
place, it means that on a crash the symlink could invalid (zero
length) --- and on a commit the symlink should be commited to disk.

Eric, do you have a test case which verifies this? Normally I would
think this rarely happens because the dentry cache should hide this
particular issue. I think a simpler fix up, which also avoids the
"symlink could be lost on a crash" problem, is this:


diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b48ca0392b9c..4ffb680780e5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2902,7 +2902,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,

index = pos >> PAGE_SHIFT;

- if (ext4_nonda_switch(inode->i_sb)) {
+ if (ext4_nonda_switch(inode->i_sb) ||
+ S_ISLNK(inode->i_mode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);


- Ted


2016-12-01 19:57:35

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal filesystems

On Thu, Dec 01, 2016 at 02:27:05PM -0500, Theodore Ts'o wrote:
> So in the long term I think we can move to using i_size to determine
> fast symlinks, but I think there's a bigger issue hiding here, which
> is that we shouldn't be using delayed allocation for symlinks in the
> first place. In the first place, symlinks will never be more than a
> block, so there's no advantage in using delalloc. In the second
> place, it means that on a crash the symlink could invalid (zero
> length) --- and on a commit the symlink should be commited to disk.
>
> Eric, do you have a test case which verifies this? Normally I would
> think this rarely happens because the dentry cache should hide this
> particular issue. I think a simpler fix up, which also avoids the
> "symlink could be lost on a crash" problem, is this:
>
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index b48ca0392b9c..4ffb680780e5 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -2902,7 +2902,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
>
> index = pos >> PAGE_SHIFT;
>
> - if (ext4_nonda_switch(inode->i_sb)) {
> + if (ext4_nonda_switch(inode->i_sb) ||
> + S_ISLNK(inode->i_mode)) {
> *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
> return ext4_write_begin(file, mapping, pos,
> len, flags, pagep, fsdata);
>
>
> - Ted
>

Hi Ted,

The problem of a slow encrypted symlink being misinterpreted as a fast one can
be reproduced by generic/360 if you run it just right:

kvm-xfstests -c nojournal -m test_dummy_encryption generic/360

It can also be reproduced by generic/402 from v2 of my encryption xfstests
patchset with 'kvm-xfstests -c nojournal generic/402'. But running that one
requires applying xfstests and xfsprogs patches (until they get upstream).

The problem can be reliably reproduced because the symlink target is not cached
by the VFS. ext4_encrypted_get_link() gets called whenever the symlink is
followed or whenever someone does sys_readlink.

I agree that delayed allocation doesn't make sense for symlinks so your proposed
fix is better. I verified that it passes both of the xfstests mentioned above.

Eric

2016-12-02 17:14:25

by Theodore Ts'o

[permalink] [raw]
Subject: [PATCH] ext4: fix reading new encrypted symlinks on no-journal file systems

On a filesystem with no journal, a symlink longer than about 32
characters (exact length depending on padding for encryption) could not
be followed or read immediately after being created in an encrypted
directory. This happened because when the symlink data went through the
delayed allocation path instead of the journaling path, the symlink was
incorrectly detected as a "fast" symlink rather than a "slow" symlink
until its data was written out.

To fix this, disable delayed allocation for symlinks, since there is
no benefit for delayed allocation anyway.

Reported-by: Eric Biggers <[email protected]>
Signed-off-by: Theodore Ts'o <[email protected]>
---
fs/ext4/inode.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 59a518ad6bb2..a1eac0054203 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2902,7 +2902,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,

index = pos >> PAGE_SHIFT;

- if (ext4_nonda_switch(inode->i_sb)) {
+ if (ext4_nonda_switch(inode->i_sb) ||
+ S_ISLNK(inode->i_mode)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
return ext4_write_begin(file, mapping, pos,
len, flags, pagep, fsdata);
--
2.11.0.rc0.7.gbe5a750


2016-12-02 18:05:34

by Eric Biggers

[permalink] [raw]
Subject: Re: [PATCH] ext4: fix reading new encrypted symlinks on no-journal file systems

On Fri, Dec 02, 2016 at 12:14:22PM -0500, Theodore Ts'o wrote:
> On a filesystem with no journal, a symlink longer than about 32
> characters (exact length depending on padding for encryption) could not
> be followed or read immediately after being created in an encrypted
> directory. This happened because when the symlink data went through the
> delayed allocation path instead of the journaling path, the symlink was
> incorrectly detected as a "fast" symlink rather than a "slow" symlink
> until its data was written out.
>
> To fix this, disable delayed allocation for symlinks, since there is
> no benefit for delayed allocation anyway.
>

Looks good to me.

Eric