The rec_len field in the directory entry has to be a multiple of 4. A
corrupted filesystem image can be used to hit a BUG_ON() in
ext4_rec_len_to_disk(), called from make_indexed_dir().
------------[ cut here ]------------
kernel BUG at fs/ext4/ext4.h:2413!
...
RIP: 0010:make_indexed_dir+0x53f/0x5f0
...
Call Trace:
<TASK>
? add_dirent_to_buf+0x1b2/0x200
ext4_add_entry+0x36e/0x480
ext4_add_nondir+0x2b/0xc0
ext4_create+0x163/0x200
path_openat+0x635/0xe90
do_filp_open+0xb4/0x160
? __create_object.isra.0+0x1de/0x3b0
? _raw_spin_unlock+0x12/0x30
do_sys_openat2+0x91/0x150
__x64_sys_open+0x6c/0xa0
do_syscall_64+0x3c/0x80
entry_SYSCALL_64_after_hwframe+0x46/0xb0
This fix simply returns -EFSCORRUPTED when this happens.
CC: [email protected]
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216540
Signed-off-by: Luís Henriques <[email protected]>
---
fs/ext4/namei.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3a31b662f661..06803292e394 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2254,8 +2254,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
memset(de, 0, len); /* wipe old data */
de = (struct ext4_dir_entry_2 *) data2;
top = data2 + len;
- while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
+ while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
+ if (de->rec_len & 3) {
+ EXT4_ERROR_INODE(
+ dir,
+ "rec_len for '..' not multiple of 4 (%u)",
+ de->rec_len);
+ brelse(bh2);
+ brelse(bh);
+ return -EFSCORRUPTED;
+ }
de = de2;
+ }
de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
(char *) de, blocksize);
Hi Lu?s,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tytso-ext4/dev]
[also build test WARNING on linus/master v6.0 next-20221011]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Lu-s-Henriques/ext4-fix-BUG_ON-when-a-directory-entry-has-an-invalid-rec_len/20221011-235817
base: https://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git dev
config: ia64-randconfig-s042-20221010
compiler: ia64-linux-gcc (GCC) 12.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/6c417ef44372b75303c036c6026bdc455117ca94
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Lu-s-Henriques/ext4-fix-BUG_ON-when-a-directory-entry-has-an-invalid-rec_len/20221011-235817
git checkout 6c417ef44372b75303c036c6026bdc455117ca94
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=ia64 SHELL=/bin/bash fs/ext4/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
sparse warnings: (new ones prefixed by >>)
>> fs/ext4/namei.c:2263:23: sparse: sparse: restricted __le16 degrades to integer
vim +2263 fs/ext4/namei.c
2201
2202 /*
2203 * This converts a one block unindexed directory to a 3 block indexed
2204 * directory, and adds the dentry to the indexed directory.
2205 */
2206 static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
2207 struct inode *dir,
2208 struct inode *inode, struct buffer_head *bh)
2209 {
2210 struct buffer_head *bh2;
2211 struct dx_root *root;
2212 struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2213 struct dx_entry *entries;
2214 struct ext4_dir_entry_2 *de, *de2;
2215 char *data2, *top;
2216 unsigned len;
2217 int retval;
2218 unsigned blocksize;
2219 ext4_lblk_t block;
2220 struct fake_dirent *fde;
2221 int csum_size = 0;
2222
2223 if (ext4_has_metadata_csum(inode->i_sb))
2224 csum_size = sizeof(struct ext4_dir_entry_tail);
2225
2226 blocksize = dir->i_sb->s_blocksize;
2227 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2228 BUFFER_TRACE(bh, "get_write_access");
2229 retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2230 EXT4_JTR_NONE);
2231 if (retval) {
2232 ext4_std_error(dir->i_sb, retval);
2233 brelse(bh);
2234 return retval;
2235 }
2236 root = (struct dx_root *) bh->b_data;
2237
2238 /* The 0th block becomes the root, move the dirents out */
2239 fde = &root->dotdot;
2240 de = (struct ext4_dir_entry_2 *)((char *)fde +
2241 ext4_rec_len_from_disk(fde->rec_len, blocksize));
2242 if ((char *) de >= (((char *) root) + blocksize)) {
2243 EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
2244 brelse(bh);
2245 return -EFSCORRUPTED;
2246 }
2247 len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2248
2249 /* Allocate new block for the 0th block's dirents */
2250 bh2 = ext4_append(handle, dir, &block);
2251 if (IS_ERR(bh2)) {
2252 brelse(bh);
2253 return PTR_ERR(bh2);
2254 }
2255 ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2256 data2 = bh2->b_data;
2257
2258 memcpy(data2, de, len);
2259 memset(de, 0, len); /* wipe old data */
2260 de = (struct ext4_dir_entry_2 *) data2;
2261 top = data2 + len;
2262 while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
> 2263 if (de->rec_len & 3) {
2264 EXT4_ERROR_INODE(
2265 dir,
2266 "rec_len for '..' not multiple of 4 (%u)",
2267 de->rec_len);
2268 brelse(bh2);
2269 brelse(bh);
2270 return -EFSCORRUPTED;
2271 }
2272 de = de2;
2273 }
2274 de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2275 (char *) de, blocksize);
2276
2277 if (csum_size)
2278 ext4_initialize_dirent_tail(bh2, blocksize);
2279
2280 /* Initialize the root; the dot dirents already exist */
2281 de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2282 de->rec_len = ext4_rec_len_to_disk(
2283 blocksize - ext4_dir_rec_len(2, NULL), blocksize);
2284 memset (&root->info, 0, sizeof(root->info));
2285 root->info.info_length = sizeof(root->info);
2286 if (ext4_hash_in_dirent(dir))
2287 root->info.hash_version = DX_HASH_SIPHASH;
2288 else
2289 root->info.hash_version =
2290 EXT4_SB(dir->i_sb)->s_def_hash_version;
2291
2292 entries = root->entries;
2293 dx_set_block(entries, 1);
2294 dx_set_count(entries, 1);
2295 dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2296
2297 /* Initialize as for dx_probe */
2298 fname->hinfo.hash_version = root->info.hash_version;
2299 if (fname->hinfo.hash_version <= DX_HASH_TEA)
2300 fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2301 fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2302
2303 /* casefolded encrypted hashes are computed on fname setup */
2304 if (!ext4_hash_in_dirent(dir))
2305 ext4fs_dirhash(dir, fname_name(fname),
2306 fname_len(fname), &fname->hinfo);
2307
2308 memset(frames, 0, sizeof(frames));
2309 frame = frames;
2310 frame->entries = entries;
2311 frame->at = entries;
2312 frame->bh = bh;
2313
2314 retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2315 if (retval)
2316 goto out_frames;
2317 retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
2318 if (retval)
2319 goto out_frames;
2320
2321 de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2322 if (IS_ERR(de)) {
2323 retval = PTR_ERR(de);
2324 goto out_frames;
2325 }
2326
2327 retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2328 out_frames:
2329 /*
2330 * Even if the block split failed, we have to properly write
2331 * out all the changes we did so far. Otherwise we can end up
2332 * with corrupted filesystem.
2333 */
2334 if (retval)
2335 ext4_mark_inode_dirty(handle, dir);
2336 dx_release(frames);
2337 brelse(bh2);
2338 return retval;
2339 }
2340
--
0-DAY CI Kernel Test Service
https://01.org/lkp
On Tue, Oct 11, 2022 at 08:57:07PM -0400, Theodore Ts'o wrote:
> On Tue, Oct 11, 2022 at 04:57:45PM +0100, Lu?s Henriques wrote:
> > diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> > index 3a31b662f661..06803292e394 100644
> > --- a/fs/ext4/namei.c
> > +++ b/fs/ext4/namei.c
> > @@ -2254,8 +2254,18 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
> > memset(de, 0, len); /* wipe old data */
> > de = (struct ext4_dir_entry_2 *) data2;
> > top = data2 + len;
> > - while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
> > + while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
> > + if (de->rec_len & 3) {
>
> As the kernel test bot as flaged, de->rec_len needs to be byte swapped
> on big endian machines. Also, for block sizes larger than 64k the low
> 2 bits are used to encode rec_len sizes 256k-4. All of this is
> encoded in ext4_rec_len_from_disk().
>
> However, I think a better thing to do is instead of doing this one
> check on rec len, that instead we call ext4_check_dir_entry(), which
> will do this check, and many more besides. It will also avoid some
> code duplication, since it will take care of calling EXT4_ERROR_INODE
> with the appropriate explanatory message.
Awesome, thanks for the explanation, Ted. I'll work on a v2 of the patch
that'll use ext4_check_dir_entry() and send it after running some tests
with it. Thanks for the suggestion!
Cheers,
--
Lu?s