Instead of using ext4_bread/ext4_getblk.
When I was trying out Linux 5.19-rc3 some symlinks became inaccessible to
me, with the error "Structure needs cleaning" and the following printed in
the kernel message log:
EXT4-fs error (device nvme0n1p1): ext4_map_blocks:599: inode #7351350:
block 774843950: comm readlink: lblock 0 mapped to illegal pblock
774843950 (length 1)
It looks like the ext4_get_link function introduced in commit 6493792d3299
("ext4: convert symlink external data block mapping to bdev") does not
handle links with inline data correctly. I added explicit handling for this
case using ext4_readpage_inline. This fixes the bug and the affected
symlinks become accessible again.
Fixes: 6493792d3299 ("ext4: convert symlink external data block mapping to bdev")
Signed-off-by: Torge Matthies <[email protected]>
---
fs/ext4/symlink.c | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index d281f5bcc526..ec4fc2d23efc 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,7 +19,10 @@
*/
#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
#include <linux/namei.h>
+#include <linux/pagemap.h>
#include "ext4.h"
#include "xattr.h"
@@ -65,6 +68,37 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
return fscrypt_symlink_getattr(path, stat);
}
+static void ext4_free_link_inline(void *folio)
+{
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
+static const char *ext4_get_link_inline(struct inode *inode,
+ struct delayed_call *callback)
+{
+ struct folio *folio;
+ char *ret;
+ int err;
+
+ folio = folio_alloc(GFP_NOFS, 0);
+ if (!folio)
+ return ERR_PTR(-ENOMEM);
+ folio_lock(folio);
+ folio->index = 0;
+
+ err = ext4_readpage_inline(inode, &folio->page);
+ if (err) {
+ folio_put(folio);
+ return ERR_PTR(err);
+ }
+
+ set_delayed_call(callback, ext4_free_link_inline, folio);
+ ret = folio_address(folio);
+ nd_terminate_link(ret, inode->i_size, inode->i_sb->s_blocksize - 1);
+ return ret;
+}
+
static void ext4_free_link(void *bh)
{
brelse(bh);
@@ -75,6 +109,9 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
{
struct buffer_head *bh;
+ if (ext4_has_inline_data(inode))
+ return ext4_get_link_inline(inode, callback);
+
if (!dentry) {
bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
if (IS_ERR(bh))
--
2.36.1
Hi, Torge.
On 2022/6/28 11:34, Torge Matthies wrote:
> Instead of using ext4_bread/ext4_getblk.
>
> When I was trying out Linux 5.19-rc3 some symlinks became inaccessible to
> me, with the error "Structure needs cleaning" and the following printed in
> the kernel message log:
>
> EXT4-fs error (device nvme0n1p1): ext4_map_blocks:599: inode #7351350:
> block 774843950: comm readlink: lblock 0 mapped to illegal pblock
> 774843950 (length 1)
>
> It looks like the ext4_get_link function introduced in commit 6493792d3299
> ("ext4: convert symlink external data block mapping to bdev") does not
> handle links with inline data correctly. I added explicit handling for this
> case using ext4_readpage_inline. This fixes the bug and the affected
> symlinks become accessible again.
>
> Fixes: 6493792d3299 ("ext4: convert symlink external data block mapping to bdev")
> Signed-off-by: Torge Matthies <[email protected]>
Thanks for the fix patch! I missed the inline_data case for the symlink inode
in commit 6493792d3299.
> ---
> fs/ext4/symlink.c | 37 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 37 insertions(+)
>
> diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
> index d281f5bcc526..ec4fc2d23efc 100644
> --- a/fs/ext4/symlink.c
> +++ b/fs/ext4/symlink.c
> @@ -19,7 +19,10 @@
> */
>
> #include <linux/fs.h>
> +#include <linux/gfp.h>
> +#include <linux/mm.h>
> #include <linux/namei.h>
> +#include <linux/pagemap.h>
> #include "ext4.h"
> #include "xattr.h"
>
> @@ -65,6 +68,37 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
> return fscrypt_symlink_getattr(path, stat);
> }
>
> +static void ext4_free_link_inline(void *folio)
> +{
> + folio_unlock(folio);
> + folio_put(folio);
> +}
> +
> +static const char *ext4_get_link_inline(struct inode *inode,
> + struct delayed_call *callback)
> +{
> + struct folio *folio;
> + char *ret;
> + int err;
> +
> + folio = folio_alloc(GFP_NOFS, 0)> + if (!folio)
> + return ERR_PTR(-ENOMEM);
> + folio_lock(folio);
> + folio->index = 0;
> +
> + err = ext4_readpage_inline(inode, &folio->page);
> + if (err) {
> + folio_put(folio);
> + return ERR_PTR(err);
> + }
> +
We need to handle the case of RCU walk in pick_link(), almost all above
functions could sleep. The inline_data is a left over case, we cannot create
new inline symlink now, maybe we can just disable the RCU walk for simple?
or else we have to introduce some other no sleep helpers to get raw inode's
cached buffer_head.
BTW, why not just open code by calling ext4_read_inline_data()? The folio
conversion seems unnecessary.
Thanks,
Yi.
> + set_delayed_call(callback, ext4_free_link_inline, folio);
> + ret = folio_address(folio);
> + nd_terminate_link(ret, inode->i_size, inode->i_sb->s_blocksize - 1);
> + return ret;
> +}
> +
> static void ext4_free_link(void *bh)
> {
> brelse(bh);
> @@ -75,6 +109,9 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
> {
> struct buffer_head *bh;
>
> + if (ext4_has_inline_data(inode))
> + return ext4_get_link_inline(inode, callback);
> +
> if (!dentry) {
> bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
> if (IS_ERR(bh))
>
Hello Yi,
On Tue, 28 Jun 2022 at 10:40, Zhang Yi <[email protected]> wrote:
>
> Hi, Torge.
>
> On 2022/6/28 11:34, Torge Matthies wrote:
> > Instead of using ext4_bread/ext4_getblk.
> >
> > When I was trying out Linux 5.19-rc3 some symlinks became inaccessible to
> > me, with the error "Structure needs cleaning" and the following printed in
> > the kernel message log:
> >
> > EXT4-fs error (device nvme0n1p1): ext4_map_blocks:599: inode #7351350:
> > block 774843950: comm readlink: lblock 0 mapped to illegal pblock
> > 774843950 (length 1)
> >
> > It looks like the ext4_get_link function introduced in commit 6493792d3299
> > ("ext4: convert symlink external data block mapping to bdev") does not
> > handle links with inline data correctly. I added explicit handling for this
> > case using ext4_readpage_inline. This fixes the bug and the affected
> > symlinks become accessible again.
> >
> > Fixes: 6493792d3299 ("ext4: convert symlink external data block mapping to bdev")
> > Signed-off-by: Torge Matthies <[email protected]>
>
> Thanks for the fix patch! I missed the inline_data case for the symlink inode
> in commit 6493792d3299.
>
> > ---
> > fs/ext4/symlink.c | 37 +++++++++++++++++++++++++++++++++++++
> > 1 file changed, 37 insertions(+)
> >
> > diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
> > index d281f5bcc526..ec4fc2d23efc 100644
> > --- a/fs/ext4/symlink.c
> > +++ b/fs/ext4/symlink.c
> > @@ -19,7 +19,10 @@
> > */
> >
> > #include <linux/fs.h>
> > +#include <linux/gfp.h>
> > +#include <linux/mm.h>
> > #include <linux/namei.h>
> > +#include <linux/pagemap.h>
> > #include "ext4.h"
> > #include "xattr.h"
> >
> > @@ -65,6 +68,37 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
> > return fscrypt_symlink_getattr(path, stat);
> > }
> >
> > +static void ext4_free_link_inline(void *folio)
> > +{
> > + folio_unlock(folio);
> > + folio_put(folio);
> > +}
> > +
> > +static const char *ext4_get_link_inline(struct inode *inode,
> > + struct delayed_call *callback)
> > +{
> > + struct folio *folio;
> > + char *ret;
> > + int err;
> > +
> > + folio = folio_alloc(GFP_NOFS, 0)> + if (!folio)
> > + return ERR_PTR(-ENOMEM);
> > + folio_lock(folio);
> > + folio->index = 0;
> > +
> > + err = ext4_readpage_inline(inode, &folio->page);
> > + if (err) {
> > + folio_put(folio);
> > + return ERR_PTR(err);
> > + }
> > +
>
> We need to handle the case of RCU walk in pick_link(), almost all above
> functions could sleep. The inline_data is a left over case, we cannot create
> new inline symlink now, maybe we can just disable the RCU walk for simple?
> or else we have to introduce some other no sleep helpers to get raw inode's
> cached buffer_head.
I'mma be honest, I don't know what most of that means, I barely managed
to scrape together this patch with the limited kernel knowledge I have.
If you know how to fix these things I'd prefer if you (or someone else)
could send a proper fix in. Consider my first mail as just a bug
report, I was prepared to fix simple problems with my patch, but this
is out of my league.
> BTW, why not just open code by calling ext4_read_inline_data()? The folio
> conversion seems unnecessary.
This way I didn't have to export ext4_read_inline_data from its file.
Feel free to improve this.
-Torge
> Thanks,
> Yi.
>
> > + set_delayed_call(callback, ext4_free_link_inline, folio);
> > + ret = folio_address(folio);
> > + nd_terminate_link(ret, inode->i_size, inode->i_sb->s_blocksize - 1);
> > + return ret;
> > +}
> > +
> > static void ext4_free_link(void *bh)
> > {
> > brelse(bh);
> > @@ -75,6 +109,9 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
> > {
> > struct buffer_head *bh;
> >
> > + if (ext4_has_inline_data(inode))
> > + return ext4_get_link_inline(inode, callback);
> > +
> > if (!dentry) {
> > bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
> > if (IS_ERR(bh))
> >
On 2022/6/28 17:53, Torge Matthies wrote:
> Hello Yi,
>
> On Tue, 28 Jun 2022 at 10:40, Zhang Yi <[email protected]> wrote:
>>
>> Hi, Torge.
>>
>> On 2022/6/28 11:34, Torge Matthies wrote:
>>> Instead of using ext4_bread/ext4_getblk.
>>>
>>> When I was trying out Linux 5.19-rc3 some symlinks became inaccessible to
>>> me, with the error "Structure needs cleaning" and the following printed in
>>> the kernel message log:
>>>
>>> EXT4-fs error (device nvme0n1p1): ext4_map_blocks:599: inode #7351350:
>>> block 774843950: comm readlink: lblock 0 mapped to illegal pblock
>>> 774843950 (length 1)
>>>
>>> It looks like the ext4_get_link function introduced in commit 6493792d3299
>>> ("ext4: convert symlink external data block mapping to bdev") does not
>>> handle links with inline data correctly. I added explicit handling for this
>>> case using ext4_readpage_inline. This fixes the bug and the affected
>>> symlinks become accessible again.
>>>
>>> Fixes: 6493792d3299 ("ext4: convert symlink external data block mapping to bdev")
>>> Signed-off-by: Torge Matthies <[email protected]>
>>
>> Thanks for the fix patch! I missed the inline_data case for the symlink inode
>> in commit 6493792d3299.
>>
>>> ---
>>> fs/ext4/symlink.c | 37 +++++++++++++++++++++++++++++++++++++
>>> 1 file changed, 37 insertions(+)
>>>
>>> diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
>>> index d281f5bcc526..ec4fc2d23efc 100644
>>> --- a/fs/ext4/symlink.c
>>> +++ b/fs/ext4/symlink.c
>>> @@ -19,7 +19,10 @@
>>> */
>>>
>>> #include <linux/fs.h>
>>> +#include <linux/gfp.h>
>>> +#include <linux/mm.h>
>>> #include <linux/namei.h>
>>> +#include <linux/pagemap.h>
>>> #include "ext4.h"
>>> #include "xattr.h"
>>>
>>> @@ -65,6 +68,37 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
>>> return fscrypt_symlink_getattr(path, stat);
>>> }
>>>
>>> +static void ext4_free_link_inline(void *folio)
>>> +{
>>> + folio_unlock(folio);
>>> + folio_put(folio);
>>> +}
>>> +
>>> +static const char *ext4_get_link_inline(struct inode *inode,
>>> + struct delayed_call *callback)
>>> +{
>>> + struct folio *folio;
>>> + char *ret;
>>> + int err;
>>> +
>>> + folio = folio_alloc(GFP_NOFS, 0)> + if (!folio)
>>> + return ERR_PTR(-ENOMEM);
>>> + folio_lock(folio);
>>> + folio->index = 0;
>>> +
>>> + err = ext4_readpage_inline(inode, &folio->page);
>>> + if (err) {
>>> + folio_put(folio);
>>> + return ERR_PTR(err);
>>> + }
>>> +
>>
>> We need to handle the case of RCU walk in pick_link(), almost all above
>> functions could sleep. The inline_data is a left over case, we cannot create
>> new inline symlink now, maybe we can just disable the RCU walk for simple?
>> or else we have to introduce some other no sleep helpers to get raw inode's
>> cached buffer_head.
>
> I'mma be honest, I don't know what most of that means, I barely managed
> to scrape together this patch with the limited kernel knowledge I have.
> If you know how to fix these things I'd prefer if you (or someone else)
> could send a proper fix in. Consider my first mail as just a bug
> report, I was prepared to fix simple problems with my patch, but this
> is out of my league.
>
The RCU walk means walk_component() in LOOKUP_RCU mode, we cannot call functions that
would sleep. And it looks that ext4_encrypted_get_link() has the same problem, I will
send a fix later.
Thanks,
Yi.