2006-08-10 01:20:38

by Mingming Cao

[permalink] [raw]
Subject: [PATCH 1/9] extents for ext4


Add extent map support to ext4. Patch from Alex Tomas.

On disk extents format:
/*
* this is extent on-disk structure
* it's used at the bottom of the tree
*/
struct ext3_extent {
__le32 ee_block; /* first logical block extent covers */
__le16 ee_len; /* number of blocks covered by extent */
__le16 ee_start_hi; /* high 16 bits of physical block */
__le32 ee_start; /* low 32 bigs of physical block */
};

Signed-Off-By: Alex Tomas <[email protected]>
Signed-Off-By: Mingming Cao <[email protected]>



---

linux-2.6.18-rc4-ming/fs/ext4/Makefile | 2
linux-2.6.18-rc4-ming/fs/ext4/dir.c | 3
linux-2.6.18-rc4-ming/fs/ext4/extents.c | 2075 ++++++++++++++++++
linux-2.6.18-rc4-ming/fs/ext4/ialloc.c | 11
linux-2.6.18-rc4-ming/fs/ext4/inode.c | 17
linux-2.6.18-rc4-ming/fs/ext4/ioctl.c | 1
linux-2.6.18-rc4-ming/fs/ext4/super.c | 10
linux-2.6.18-rc4-ming/include/linux/ext4_fs.h | 31
linux-2.6.18-rc4-ming/include/linux/ext4_fs_extents.h | 196 +
linux-2.6.18-rc4-ming/include/linux/ext4_fs_i.h | 13
linux-2.6.18-rc4-ming/include/linux/ext4_fs_sb.h | 10
linux-2.6.18-rc4-ming/include/linux/ext4_jbd2.h | 15
12 files changed, 2366 insertions(+), 18 deletions(-)

diff -puN fs/ext4/dir.c~ext4-extents fs/ext4/dir.c
--- linux-2.6.18-rc4/fs/ext4/dir.c~ext4-extents 2006-08-09 15:41:44.192226487 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/dir.c 2006-08-09 15:41:44.296227329 -0700
@@ -131,8 +131,7 @@ static int ext4_readdir(struct file * fi
struct buffer_head *bh = NULL;

map_bh.b_state = 0;
- err = ext4_get_blocks_handle(NULL, inode, blk, 1,
- &map_bh, 0, 0);
+ err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
if (err > 0) {
page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
&filp->f_ra,
diff -puN /dev/null fs/ext4/extents.c
--- /dev/null 2006-08-08 14:57:22.983223272 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/extents.c 2006-08-09 15:41:44.303227386 -0700
@@ -0,0 +1,2075 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, [email protected]
+ * Written by Alex Tomas <[email protected]>
+ *
+ * Architecture independence:
+ * Copyright (c) 2005, Bull S.A.
+ * Written by Pierre Peiffer <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+
+/*
+ * Extents support for EXT4
+ *
+ * TODO:
+ * - ext4*_error() should be used in some situations
+ * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
+ * - smart tree reduction
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ext4_fs_extents.h>
+#include <asm/uaccess.h>
+
+
+static int ext4_ext_check_header(const char *function, struct inode *inode,
+ struct ext4_extent_header *eh)
+{
+ const char *error_msg = NULL;
+
+ if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
+ error_msg = "invalid magic";
+ goto corrupted;
+ }
+ if (unlikely(eh->eh_max == 0)) {
+ error_msg = "invalid eh_max";
+ goto corrupted;
+ }
+ if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
+ error_msg = "invalid eh_entries";
+ goto corrupted;
+ }
+ return 0;
+
+corrupted:
+ ext4_error(inode->i_sb, function,
+ "bad header in inode #%lu: %s - magic %x, "
+ "entries %u, max %u, depth %u",
+ inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+ le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
+ le16_to_cpu(eh->eh_depth));
+
+ return -EIO;
+}
+
+static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+{
+ int err;
+
+ if (handle->h_buffer_credits > needed)
+ return handle;
+ if (!ext4_journal_extend(handle, needed))
+ return handle;
+ err = ext4_journal_restart(handle, needed);
+
+ return handle;
+}
+
+/*
+ * could return:
+ * - EROFS
+ * - ENOMEM
+ */
+static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ if (path->p_bh) {
+ /* path points to block */
+ return ext4_journal_get_write_access(handle, path->p_bh);
+ }
+ /* path points to leaf/index in inode body */
+ /* we use in-core data, no need to protect them */
+ return 0;
+}
+
+/*
+ * could return:
+ * - EROFS
+ * - ENOMEM
+ * - EIO
+ */
+static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ int err;
+ if (path->p_bh) {
+ /* path points to block */
+ err = ext4_journal_dirty_metadata(handle, path->p_bh);
+ } else {
+ /* path points to leaf/index in inode body */
+ err = ext4_mark_inode_dirty(handle, inode);
+ }
+ return err;
+}
+
+static int ext4_ext_find_goal(struct inode *inode,
+ struct ext4_ext_path *path,
+ unsigned long block)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long bg_start;
+ unsigned long colour;
+ int depth;
+
+ if (path) {
+ struct ext4_extent *ex;
+ depth = path->p_depth;
+
+ /* try to predict block placement */
+ if ((ex = path[depth].p_ext))
+ return le32_to_cpu(ex->ee_start)
+ + (block - le32_to_cpu(ex->ee_block));
+
+ /* it looks index is empty
+ * try to find starting from index itself */
+ if (path[depth].p_bh)
+ return path[depth].p_bh->b_blocknr;
+ }
+
+ /* OK. use inode's group */
+ bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
+ le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+ colour = (current->pid % 16) *
+ (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+ return bg_start + colour + block;
+}
+
+static int
+ext4_ext_new_block(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex, int *err)
+{
+ int goal, newblock;
+
+ goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+ newblock = ext4_new_block(handle, inode, goal, err);
+ return newblock;
+}
+
+static inline int ext4_ext_space_block(struct inode *inode)
+{
+ int size;
+
+ size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+ if (size > 6)
+ size = 6;
+#endif
+ return size;
+}
+
+static inline int ext4_ext_space_block_idx(struct inode *inode)
+{
+ int size;
+
+ size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+ if (size > 5)
+ size = 5;
+#endif
+ return size;
+}
+
+static inline int ext4_ext_space_root(struct inode *inode)
+{
+ int size;
+
+ size = sizeof(EXT4_I(inode)->i_data);
+ size -= sizeof(struct ext4_extent_header);
+ size /= sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+ if (size > 3)
+ size = 3;
+#endif
+ return size;
+}
+
+static inline int ext4_ext_space_root_idx(struct inode *inode)
+{
+ int size;
+
+ size = sizeof(EXT4_I(inode)->i_data);
+ size -= sizeof(struct ext4_extent_header);
+ size /= sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+ if (size > 4)
+ size = 4;
+#endif
+ return size;
+}
+
+#ifdef EXT_DEBUG
+static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
+{
+ int k, l = path->p_depth;
+
+ ext_debug("path:");
+ for (k = 0; k <= l; k++, path++) {
+ if (path->p_idx) {
+ ext_debug(" %d->%d", le32_to_cpu(path->p_idx->ei_block),
+ le32_to_cpu(path->p_idx->ei_leaf));
+ } else if (path->p_ext) {
+ ext_debug(" %d:%d:%d",
+ le32_to_cpu(path->p_ext->ee_block),
+ le16_to_cpu(path->p_ext->ee_len),
+ le32_to_cpu(path->p_ext->ee_start));
+ } else
+ ext_debug(" []");
+ }
+ ext_debug("\n");
+}
+
+static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
+{
+ int depth = ext_depth(inode);
+ struct ext4_extent_header *eh;
+ struct ext4_extent *ex;
+ int i;
+
+ if (!path)
+ return;
+
+ eh = path[depth].p_hdr;
+ ex = EXT_FIRST_EXTENT(eh);
+
+ for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
+ ext_debug("%d:%d:%d ", le32_to_cpu(ex->ee_block),
+ le16_to_cpu(ex->ee_len),
+ le32_to_cpu(ex->ee_start));
+ }
+ ext_debug("\n");
+}
+#else
+#define ext4_ext_show_path(inode,path)
+#define ext4_ext_show_leaf(inode,path)
+#endif
+
+static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+{
+ int depth = path->p_depth;
+ int i;
+
+ for (i = 0; i <= depth; i++, path++)
+ if (path->p_bh) {
+ brelse(path->p_bh);
+ path->p_bh = NULL;
+ }
+}
+
+/*
+ * binary search for closest index by given block
+ */
+static void
+ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+ struct ext4_extent_header *eh = path->p_hdr;
+ struct ext4_extent_idx *r, *l, *m;
+
+ BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+ BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+ BUG_ON(le16_to_cpu(eh->eh_entries) <= 0);
+
+ ext_debug("binsearch for %d(idx): ", block);
+
+ l = EXT_FIRST_INDEX(eh) + 1;
+ r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1;
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (block < le32_to_cpu(m->ei_block))
+ r = m - 1;
+ else
+ l = m + 1;
+ ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block,
+ m, m->ei_block, r, r->ei_block);
+ }
+
+ path->p_idx = l - 1;
+ ext_debug(" -> %d->%d ", le32_to_cpu(path->p_idx->ei_block),
+ le32_to_cpu(path->p_idx->ei_leaf));
+
+#ifdef CHECK_BINSEARCH
+ {
+ struct ext4_extent_idx *chix, *ix;
+ int k;
+
+ chix = ix = EXT_FIRST_INDEX(eh);
+ for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
+ if (k != 0 &&
+ le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
+ printk("k=%d, ix=0x%p, first=0x%p\n", k,
+ ix, EXT_FIRST_INDEX(eh));
+ printk("%u <= %u\n",
+ le32_to_cpu(ix->ei_block),
+ le32_to_cpu(ix[-1].ei_block));
+ }
+ BUG_ON(k && le32_to_cpu(ix->ei_block)
+ <= le32_to_cpu(ix[-1].ei_block));
+ if (block < le32_to_cpu(ix->ei_block))
+ break;
+ chix = ix;
+ }
+ BUG_ON(chix != path->p_idx);
+ }
+#endif
+
+}
+
+/*
+ * binary search for closest extent by given block
+ */
+static void
+ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+ struct ext4_extent_header *eh = path->p_hdr;
+ struct ext4_extent *r, *l, *m;
+
+ BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+ BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+
+ if (eh->eh_entries == 0) {
+ /*
+ * this leaf is empty yet:
+ * we get such a leaf in split/add case
+ */
+ return;
+ }
+
+ ext_debug("binsearch for %d: ", block);
+
+ l = EXT_FIRST_EXTENT(eh) + 1;
+ r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1;
+
+ while (l <= r) {
+ m = l + (r - l) / 2;
+ if (block < le32_to_cpu(m->ee_block))
+ r = m - 1;
+ else
+ l = m + 1;
+ ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
+ m, m->ee_block, r, r->ee_block);
+ }
+
+ path->p_ext = l - 1;
+ ext_debug(" -> %d:%d:%d ",
+ le32_to_cpu(path->p_ext->ee_block),
+ le32_to_cpu(path->p_ext->ee_start),
+ le16_to_cpu(path->p_ext->ee_len));
+
+#ifdef CHECK_BINSEARCH
+ {
+ struct ext4_extent *chex, *ex;
+ int k;
+
+ chex = ex = EXT_FIRST_EXTENT(eh);
+ for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
+ BUG_ON(k && le32_to_cpu(ex->ee_block)
+ <= le32_to_cpu(ex[-1].ee_block));
+ if (block < le32_to_cpu(ex->ee_block))
+ break;
+ chex = ex;
+ }
+ BUG_ON(chex != path->p_ext);
+ }
+#endif
+
+}
+
+int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
+{
+ struct ext4_extent_header *eh;
+
+ eh = ext_inode_hdr(inode);
+ eh->eh_depth = 0;
+ eh->eh_entries = 0;
+ eh->eh_magic = EXT4_EXT_MAGIC;
+ eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_ext_invalidate_cache(inode);
+ return 0;
+}
+
+struct ext4_ext_path *
+ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
+{
+ struct ext4_extent_header *eh;
+ struct buffer_head *bh;
+ short int depth, i, ppos = 0, alloc = 0;
+
+ eh = ext_inode_hdr(inode);
+ BUG_ON(eh == NULL);
+ if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+ return ERR_PTR(-EIO);
+
+ i = depth = ext_depth(inode);
+
+ /* account possible depth increase */
+ if (!path) {
+ path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2),
+ GFP_NOFS);
+ if (!path)
+ return ERR_PTR(-ENOMEM);
+ alloc = 1;
+ }
+ memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+ path[0].p_hdr = eh;
+
+ /* walk through the tree */
+ while (i) {
+ ext_debug("depth %d: num %d, max %d\n",
+ ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+ ext4_ext_binsearch_idx(inode, path + ppos, block);
+ path[ppos].p_block = le32_to_cpu(path[ppos].p_idx->ei_leaf);
+ path[ppos].p_depth = i;
+ path[ppos].p_ext = NULL;
+
+ bh = sb_bread(inode->i_sb, path[ppos].p_block);
+ if (!bh)
+ goto err;
+
+ eh = ext_block_hdr(bh);
+ ppos++;
+ BUG_ON(ppos > depth);
+ path[ppos].p_bh = bh;
+ path[ppos].p_hdr = eh;
+ i--;
+
+ if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+ goto err;
+ }
+
+ path[ppos].p_depth = i;
+ path[ppos].p_hdr = eh;
+ path[ppos].p_ext = NULL;
+ path[ppos].p_idx = NULL;
+
+ if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+ goto err;
+
+ /* find extent */
+ ext4_ext_binsearch(inode, path + ppos, block);
+
+ ext4_ext_show_path(inode, path);
+
+ return path;
+
+err:
+ ext4_ext_drop_refs(path);
+ if (alloc)
+ kfree(path);
+ return ERR_PTR(-EIO);
+}
+
+/*
+ * insert new index [logical;ptr] into the block at cupr
+ * it check where to insert: before curp or after curp
+ */
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *curp,
+ int logical, int ptr)
+{
+ struct ext4_extent_idx *ix;
+ int len, err;
+
+ if ((err = ext4_ext_get_access(handle, inode, curp)))
+ return err;
+
+ BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
+ if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
+ /* insert after */
+ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
+ len = (len - 1) * sizeof(struct ext4_extent_idx);
+ len = len < 0 ? 0 : len;
+ ext_debug("insert new index %d after: %d. "
+ "move %d from 0x%p to 0x%p\n",
+ logical, ptr, len,
+ (curp->p_idx + 1), (curp->p_idx + 2));
+ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
+ }
+ ix = curp->p_idx + 1;
+ } else {
+ /* insert before */
+ len = len * sizeof(struct ext4_extent_idx);
+ len = len < 0 ? 0 : len;
+ ext_debug("insert new index %d before: %d. "
+ "move %d from 0x%p to 0x%p\n",
+ logical, ptr, len,
+ curp->p_idx, (curp->p_idx + 1));
+ memmove(curp->p_idx + 1, curp->p_idx, len);
+ ix = curp->p_idx;
+ }
+
+ ix->ei_block = cpu_to_le32(logical);
+ ix->ei_leaf = cpu_to_le32(ptr);
+ curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+
+ BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
+ > le16_to_cpu(curp->p_hdr->eh_max));
+ BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+
+ err = ext4_ext_dirty(handle, inode, curp);
+ ext4_std_error(inode->i_sb, err);
+
+ return err;
+}
+
+/*
+ * routine inserts new subtree into the path, using free index entry
+ * at depth 'at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extens and index entries (right to the split point)
+ * into the newly allocated blocks
+ * - initialize subtree
+ */
+static int ext4_ext_split(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext, int at)
+{
+ struct buffer_head *bh = NULL;
+ int depth = ext_depth(inode);
+ struct ext4_extent_header *neh;
+ struct ext4_extent_idx *fidx;
+ struct ext4_extent *ex;
+ int i = at, k, m, a;
+ unsigned long newblock, oldblock;
+ __le32 border;
+ int *ablocks = NULL; /* array of allocated blocks */
+ int err = 0;
+
+ /* make decision: where to split? */
+ /* FIXME: now desicion is simplest: at current extent */
+
+ /* if current leaf will be splitted, then we should use
+ * border from split point */
+ BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ border = path[depth].p_ext[1].ee_block;
+ ext_debug("leaf will be splitted."
+ " next leaf starts at %d\n",
+ le32_to_cpu(border));
+ } else {
+ border = newext->ee_block;
+ ext_debug("leaf will be added."
+ " next leaf starts at %d\n",
+ le32_to_cpu(border));
+ }
+
+ /*
+ * if error occurs, then we break processing
+ * and turn filesystem read-only. so, index won't
+ * be inserted and tree will be in consistent
+ * state. next mount will repair buffers too
+ */
+
+ /*
+ * get array to track all allocated blocks
+ * we need this to handle errors and free blocks
+ * upon them
+ */
+ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
+ if (!ablocks)
+ return -ENOMEM;
+ memset(ablocks, 0, sizeof(unsigned long) * depth);
+
+ /* allocate all needed blocks */
+ ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
+ for (a = 0; a < depth - at; a++) {
+ newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ if (newblock == 0)
+ goto cleanup;
+ ablocks[a] = newblock;
+ }
+
+ /* initialize new leaf */
+ newblock = ablocks[--a];
+ BUG_ON(newblock == 0);
+ bh = sb_getblk(inode->i_sb, newblock);
+ if (!bh) {
+ err = -EIO;
+ goto cleanup;
+ }
+ lock_buffer(bh);
+
+ if ((err = ext4_journal_get_create_access(handle, bh)))
+ goto cleanup;
+
+ neh = ext_block_hdr(bh);
+ neh->eh_entries = 0;
+ neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_depth = 0;
+ ex = EXT_FIRST_EXTENT(neh);
+
+ /* move remain of path[depth] to the new leaf */
+ BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+ /* start copy from next extent */
+ /* TODO: we could do it by single memmove */
+ m = 0;
+ path[depth].p_ext++;
+ while (path[depth].p_ext <=
+ EXT_MAX_EXTENT(path[depth].p_hdr)) {
+ ext_debug("move %d:%d:%d in new leaf %lu\n",
+ le32_to_cpu(path[depth].p_ext->ee_block),
+ le32_to_cpu(path[depth].p_ext->ee_start),
+ le16_to_cpu(path[depth].p_ext->ee_len),
+ newblock);
+ /*memmove(ex++, path[depth].p_ext++,
+ sizeof(struct ext4_extent));
+ neh->eh_entries++;*/
+ path[depth].p_ext++;
+ m++;
+ }
+ if (m) {
+ memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
+ neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+ }
+
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if ((err = ext4_journal_dirty_metadata(handle, bh)))
+ goto cleanup;
+ brelse(bh);
+ bh = NULL;
+
+ /* correct old leaf */
+ if (m) {
+ if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+ goto cleanup;
+ path[depth].p_hdr->eh_entries =
+ cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+ if ((err = ext4_ext_dirty(handle, inode, path + depth)))
+ goto cleanup;
+
+ }
+
+ /* create intermediate indexes */
+ k = depth - at - 1;
+ BUG_ON(k < 0);
+ if (k)
+ ext_debug("create %d intermediate indices\n", k);
+ /* insert new index into current index block */
+ /* current depth stored in i var */
+ i = depth - 1;
+ while (k--) {
+ oldblock = newblock;
+ newblock = ablocks[--a];
+ bh = sb_getblk(inode->i_sb, newblock);
+ if (!bh) {
+ err = -EIO;
+ goto cleanup;
+ }
+ lock_buffer(bh);
+
+ if ((err = ext4_journal_get_create_access(handle, bh)))
+ goto cleanup;
+
+ neh = ext_block_hdr(bh);
+ neh->eh_entries = cpu_to_le16(1);
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+ neh->eh_depth = cpu_to_le16(depth - i);
+ fidx = EXT_FIRST_INDEX(neh);
+ fidx->ei_block = border;
+ fidx->ei_leaf = cpu_to_le32(oldblock);
+
+ ext_debug("int.index at %d (block %lu): %lu -> %lu\n", i,
+ newblock, (unsigned long) le32_to_cpu(border),
+ oldblock);
+ /* copy indexes */
+ m = 0;
+ path[i].p_idx++;
+
+ ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
+ EXT_MAX_INDEX(path[i].p_hdr));
+ BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
+ EXT_LAST_INDEX(path[i].p_hdr));
+ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
+ ext_debug("%d: move %d:%d in new index %lu\n", i,
+ le32_to_cpu(path[i].p_idx->ei_block),
+ le32_to_cpu(path[i].p_idx->ei_leaf),
+ newblock);
+ /*memmove(++fidx, path[i].p_idx++,
+ sizeof(struct ext4_extent_idx));
+ neh->eh_entries++;
+ BUG_ON(neh->eh_entries > neh->eh_max);*/
+ path[i].p_idx++;
+ m++;
+ }
+ if (m) {
+ memmove(++fidx, path[i].p_idx - m,
+ sizeof(struct ext4_extent_idx) * m);
+ neh->eh_entries =
+ cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+ }
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if ((err = ext4_journal_dirty_metadata(handle, bh)))
+ goto cleanup;
+ brelse(bh);
+ bh = NULL;
+
+ /* correct old index */
+ if (m) {
+ err = ext4_ext_get_access(handle, inode, path + i);
+ if (err)
+ goto cleanup;
+ path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+ err = ext4_ext_dirty(handle, inode, path + i);
+ if (err)
+ goto cleanup;
+ }
+
+ i--;
+ }
+
+ /* insert new index */
+ if (err)
+ goto cleanup;
+
+ err = ext4_ext_insert_index(handle, inode, path + at,
+ le32_to_cpu(border), newblock);
+
+cleanup:
+ if (bh) {
+ if (buffer_locked(bh))
+ unlock_buffer(bh);
+ brelse(bh);
+ }
+
+ if (err) {
+ /* free all allocated blocks in error case */
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+ ext4_free_blocks(handle, inode, ablocks[i], 1);
+ }
+ }
+ kfree(ablocks);
+
+ return err;
+}
+
+/*
+ * routine implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initialize new top-level, creating index that points to the
+ * just created block
+ */
+static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext)
+{
+ struct ext4_ext_path *curp = path;
+ struct ext4_extent_header *neh;
+ struct ext4_extent_idx *fidx;
+ struct buffer_head *bh;
+ unsigned long newblock;
+ int err = 0;
+
+ newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+ if (newblock == 0)
+ return err;
+
+ bh = sb_getblk(inode->i_sb, newblock);
+ if (!bh) {
+ err = -EIO;
+ ext4_std_error(inode->i_sb, err);
+ return err;
+ }
+ lock_buffer(bh);
+
+ if ((err = ext4_journal_get_create_access(handle, bh))) {
+ unlock_buffer(bh);
+ goto out;
+ }
+
+ /* move top-level index/leaf into new block */
+ memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
+
+ /* set size of new block */
+ neh = ext_block_hdr(bh);
+ /* old root could have indexes or leaves
+ * so calculate e_max right way */
+ if (ext_depth(inode))
+ neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+ else
+ neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+ neh->eh_magic = EXT4_EXT_MAGIC;
+ set_buffer_uptodate(bh);
+ unlock_buffer(bh);
+
+ if ((err = ext4_journal_dirty_metadata(handle, bh)))
+ goto out;
+
+ /* create index in new top-level index: num,max,pointer */
+ if ((err = ext4_ext_get_access(handle, inode, curp)))
+ goto out;
+
+ curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
+ curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
+ curp->p_hdr->eh_entries = cpu_to_le16(1);
+ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
+ /* FIXME: it works, but actually path[0] can be index */
+ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
+ curp->p_idx->ei_leaf = cpu_to_le32(newblock);
+
+ neh = ext_inode_hdr(inode);
+ fidx = EXT_FIRST_INDEX(neh);
+ ext_debug("new root: num %d(%d), lblock %d, ptr %d\n",
+ le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
+ le32_to_cpu(fidx->ei_block), le32_to_cpu(fidx->ei_leaf));
+
+ neh->eh_depth = cpu_to_le16(path->p_depth + 1);
+ err = ext4_ext_dirty(handle, inode, curp);
+out:
+ brelse(bh);
+
+ return err;
+}
+
+/*
+ * routine finds empty index and adds new leaf. if no free index found
+ * then it requests in-depth growing
+ */
+static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext)
+{
+ struct ext4_ext_path *curp;
+ int depth, i, err = 0;
+
+repeat:
+ i = depth = ext_depth(inode);
+
+ /* walk up to the tree and look for free index entry */
+ curp = path + depth;
+ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
+ i--;
+ curp--;
+ }
+
+ /* we use already allocated block for index block
+ * so, subsequent data blocks should be contigoues */
+ if (EXT_HAS_FREE_INDEX(curp)) {
+ /* if we found index with free entry, then use that
+ * entry: create all needed subtree and add new leaf */
+ err = ext4_ext_split(handle, inode, path, newext, i);
+
+ /* refill path */
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode,
+ le32_to_cpu(newext->ee_block),
+ path);
+ if (IS_ERR(path))
+ err = PTR_ERR(path);
+ } else {
+ /* tree is full, time to grow in depth */
+ err = ext4_ext_grow_indepth(handle, inode, path, newext);
+ if (err)
+ goto out;
+
+ /* refill path */
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode,
+ le32_to_cpu(newext->ee_block),
+ path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ goto out;
+ }
+
+ /*
+ * only first (depth 0 -> 1) produces free space
+ * in all other cases we have to split growed tree
+ */
+ depth = ext_depth(inode);
+ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+ /* now we need split */
+ goto repeat;
+ }
+ }
+
+out:
+ return err;
+}
+
+/*
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
+ * NOTE: it consider block number from index entry as
+ * allocated block. thus, index entries have to be consistent
+ * with leafs
+ */
+static unsigned long
+ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+{
+ int depth;
+
+ BUG_ON(path == NULL);
+ depth = path->p_depth;
+
+ if (depth == 0 && path->p_ext == NULL)
+ return EXT_MAX_BLOCK;
+
+ while (depth >= 0) {
+ if (depth == path->p_depth) {
+ /* leaf */
+ if (path[depth].p_ext !=
+ EXT_LAST_EXTENT(path[depth].p_hdr))
+ return le32_to_cpu(path[depth].p_ext[1].ee_block);
+ } else {
+ /* index */
+ if (path[depth].p_idx !=
+ EXT_LAST_INDEX(path[depth].p_hdr))
+ return le32_to_cpu(path[depth].p_idx[1].ei_block);
+ }
+ depth--;
+ }
+
+ return EXT_MAX_BLOCK;
+}
+
+/*
+ * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ */
+static unsigned ext4_ext_next_leaf_block(struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ int depth;
+
+ BUG_ON(path == NULL);
+ depth = path->p_depth;
+
+ /* zero-tree has no leaf blocks at all */
+ if (depth == 0)
+ return EXT_MAX_BLOCK;
+
+ /* go to index block */
+ depth--;
+
+ while (depth >= 0) {
+ if (path[depth].p_idx !=
+ EXT_LAST_INDEX(path[depth].p_hdr))
+ return le32_to_cpu(path[depth].p_idx[1].ei_block);
+ depth--;
+ }
+
+ return EXT_MAX_BLOCK;
+}
+
+/*
+ * if leaf gets modified and modified extent is first in the leaf
+ * then we have to correct all indexes above
+ * TODO: do we need to correct tree in all cases?
+ */
+int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ struct ext4_extent_header *eh;
+ int depth = ext_depth(inode);
+ struct ext4_extent *ex;
+ __le32 border;
+ int k, err = 0;
+
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+ BUG_ON(ex == NULL);
+ BUG_ON(eh == NULL);
+
+ if (depth == 0) {
+ /* there is no tree at all */
+ return 0;
+ }
+
+ if (ex != EXT_FIRST_EXTENT(eh)) {
+ /* we correct tree if first leaf got modified only */
+ return 0;
+ }
+
+ /*
+ * TODO: we need correction if border is smaller then current one
+ */
+ k = depth - 1;
+ border = path[depth].p_ext->ee_block;
+ if ((err = ext4_ext_get_access(handle, inode, path + k)))
+ return err;
+ path[k].p_idx->ei_block = border;
+ if ((err = ext4_ext_dirty(handle, inode, path + k)))
+ return err;
+
+ while (k--) {
+ /* change all left-side indexes */
+ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
+ break;
+ if ((err = ext4_ext_get_access(handle, inode, path + k)))
+ break;
+ path[k].p_idx->ei_block = border;
+ if ((err = ext4_ext_dirty(handle, inode, path + k)))
+ break;
+ }
+
+ return err;
+}
+
+static int inline
+ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
+ struct ext4_extent *ex2)
+{
+ /* FIXME: 48bit support */
+ if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
+ != le32_to_cpu(ex2->ee_block))
+ return 0;
+
+#ifdef AGRESSIVE_TEST
+ if (le16_to_cpu(ex1->ee_len) >= 4)
+ return 0;
+#endif
+
+ if (le32_to_cpu(ex1->ee_start) + le16_to_cpu(ex1->ee_len)
+ == le32_to_cpu(ex2->ee_start))
+ return 1;
+ return 0;
+}
+
+/*
+ * this routine tries to merge requsted extent into the existing
+ * extent or inserts requested extent as new one into the tree,
+ * creating new leaf in no-space case
+ */
+int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *newext)
+{
+ struct ext4_extent_header * eh;
+ struct ext4_extent *ex, *fex;
+ struct ext4_extent *nearex; /* nearest extent */
+ struct ext4_ext_path *npath = NULL;
+ int depth, len, err, next;
+
+ BUG_ON(newext->ee_len == 0);
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ BUG_ON(path[depth].p_hdr == NULL);
+
+ /* try to insert block into found extent and return */
+ if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
+ ext_debug("append %d block to %d:%d (from %d)\n",
+ le16_to_cpu(newext->ee_len),
+ le32_to_cpu(ex->ee_block),
+ le16_to_cpu(ex->ee_len),
+ le32_to_cpu(ex->ee_start));
+ if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+ return err;
+ ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
+ + le16_to_cpu(newext->ee_len));
+ eh = path[depth].p_hdr;
+ nearex = ex;
+ goto merge;
+ }
+
+repeat:
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
+ goto has_space;
+
+ /* probably next leaf has space for us? */
+ fex = EXT_LAST_EXTENT(eh);
+ next = ext4_ext_next_leaf_block(inode, path);
+ if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
+ && next != EXT_MAX_BLOCK) {
+ ext_debug("next leaf block - %d\n", next);
+ BUG_ON(npath != NULL);
+ npath = ext4_ext_find_extent(inode, next, NULL);
+ if (IS_ERR(npath))
+ return PTR_ERR(npath);
+ BUG_ON(npath->p_depth != path->p_depth);
+ eh = npath[depth].p_hdr;
+ if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
+ ext_debug("next leaf isnt full(%d)\n",
+ le16_to_cpu(eh->eh_entries));
+ path = npath;
+ goto repeat;
+ }
+ ext_debug("next leaf has no free space(%d,%d)\n",
+ le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+ }
+
+ /*
+ * there is no free space in found leaf
+ * we're gonna add new leaf in the tree
+ */
+ err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+ if (err)
+ goto cleanup;
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+
+has_space:
+ nearex = path[depth].p_ext;
+
+ if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+ goto cleanup;
+
+ if (!nearex) {
+ /* there is no extent in this leaf, create first one */
+ ext_debug("first extent in the leaf: %d:%d:%d\n",
+ le32_to_cpu(newext->ee_block),
+ le32_to_cpu(newext->ee_start),
+ le16_to_cpu(newext->ee_len));
+ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
+ } else if (le32_to_cpu(newext->ee_block)
+ > le32_to_cpu(nearex->ee_block)) {
+/* BUG_ON(newext->ee_block == nearex->ee_block); */
+ if (nearex != EXT_LAST_EXTENT(eh)) {
+ len = EXT_MAX_EXTENT(eh) - nearex;
+ len = (len - 1) * sizeof(struct ext4_extent);
+ len = len < 0 ? 0 : len;
+ ext_debug("insert %d:%d:%d after: nearest 0x%p, "
+ "move %d from 0x%p to 0x%p\n",
+ le32_to_cpu(newext->ee_block),
+ le32_to_cpu(newext->ee_start),
+ le16_to_cpu(newext->ee_len),
+ nearex, len, nearex + 1, nearex + 2);
+ memmove(nearex + 2, nearex + 1, len);
+ }
+ path[depth].p_ext = nearex + 1;
+ } else {
+ BUG_ON(newext->ee_block == nearex->ee_block);
+ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
+ len = len < 0 ? 0 : len;
+ ext_debug("insert %d:%d:%d before: nearest 0x%p, "
+ "move %d from 0x%p to 0x%p\n",
+ le32_to_cpu(newext->ee_block),
+ le32_to_cpu(newext->ee_start),
+ le16_to_cpu(newext->ee_len),
+ nearex, len, nearex + 1, nearex + 2);
+ memmove(nearex + 1, nearex, len);
+ path[depth].p_ext = nearex;
+ }
+
+ eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+ nearex = path[depth].p_ext;
+ nearex->ee_block = newext->ee_block;
+ nearex->ee_start = newext->ee_start;
+ nearex->ee_len = newext->ee_len;
+ /* FIXME: support for large fs */
+ nearex->ee_start_hi = 0;
+
+merge:
+ /* try to merge extents to the right */
+ while (nearex < EXT_LAST_EXTENT(eh)) {
+ if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
+ break;
+ /* merge with next extent! */
+ nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len)
+ + le16_to_cpu(nearex[1].ee_len));
+ if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
+ len = (EXT_LAST_EXTENT(eh) - nearex - 1)
+ * sizeof(struct ext4_extent);
+ memmove(nearex + 1, nearex + 2, len);
+ }
+ eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+ BUG_ON(eh->eh_entries == 0);
+ }
+
+ /* try to merge extents to the left */
+
+ /* time to correct all indexes above */
+ err = ext4_ext_correct_indexes(handle, inode, path);
+ if (err)
+ goto cleanup;
+
+ err = ext4_ext_dirty(handle, inode, path + depth);
+
+cleanup:
+ if (npath) {
+ ext4_ext_drop_refs(npath);
+ kfree(npath);
+ }
+ ext4_ext_tree_changed(inode);
+ ext4_ext_invalidate_cache(inode);
+ return err;
+}
+
+int ext4_ext_walk_space(struct inode *inode, unsigned long block,
+ unsigned long num, ext_prepare_callback func,
+ void *cbdata)
+{
+ struct ext4_ext_path *path = NULL;
+ struct ext4_ext_cache cbex;
+ struct ext4_extent *ex;
+ unsigned long next, start = 0, end = 0;
+ unsigned long last = block + num;
+ int depth, exists, err = 0;
+
+ BUG_ON(func == NULL);
+ BUG_ON(inode == NULL);
+
+ while (block < last && block != EXT_MAX_BLOCK) {
+ num = last - block;
+ /* find extent for this block */
+ path = ext4_ext_find_extent(inode, block, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ path = NULL;
+ break;
+ }
+
+ depth = ext_depth(inode);
+ BUG_ON(path[depth].p_hdr == NULL);
+ ex = path[depth].p_ext;
+ next = ext4_ext_next_allocated_block(path);
+
+ exists = 0;
+ if (!ex) {
+ /* there is no extent yet, so try to allocate
+ * all requested space */
+ start = block;
+ end = block + num;
+ } else if (le32_to_cpu(ex->ee_block) > block) {
+ /* need to allocate space before found extent */
+ start = block;
+ end = le32_to_cpu(ex->ee_block);
+ if (block + num < end)
+ end = block + num;
+ } else if (block >=
+ le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) {
+ /* need to allocate space after found extent */
+ start = block;
+ end = block + num;
+ if (end >= next)
+ end = next;
+ } else if (block >= le32_to_cpu(ex->ee_block)) {
+ /*
+ * some part of requested space is covered
+ * by found extent
+ */
+ start = block;
+ end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len);
+ if (block + num < end)
+ end = block + num;
+ exists = 1;
+ } else {
+ BUG();
+ }
+ BUG_ON(end <= start);
+
+ if (!exists) {
+ cbex.ec_block = start;
+ cbex.ec_len = end - start;
+ cbex.ec_start = 0;
+ cbex.ec_type = EXT4_EXT_CACHE_GAP;
+ } else {
+ cbex.ec_block = le32_to_cpu(ex->ee_block);
+ cbex.ec_len = le16_to_cpu(ex->ee_len);
+ cbex.ec_start = le32_to_cpu(ex->ee_start);
+ cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
+ }
+
+ BUG_ON(cbex.ec_len == 0);
+ err = func(inode, path, &cbex, cbdata);
+ ext4_ext_drop_refs(path);
+
+ if (err < 0)
+ break;
+ if (err == EXT_REPEAT)
+ continue;
+ else if (err == EXT_BREAK) {
+ err = 0;
+ break;
+ }
+
+ if (ext_depth(inode) != depth) {
+ /* depth was changed. we have to realloc path */
+ kfree(path);
+ path = NULL;
+ }
+
+ block = cbex.ec_block + cbex.ec_len;
+ }
+
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+
+ return err;
+}
+
+static inline void
+ext4_ext_put_in_cache(struct inode *inode, __u32 block,
+ __u32 len, __u32 start, int type)
+{
+ struct ext4_ext_cache *cex;
+ BUG_ON(len == 0);
+ cex = &EXT4_I(inode)->i_cached_extent;
+ cex->ec_type = type;
+ cex->ec_block = block;
+ cex->ec_len = len;
+ cex->ec_start = start;
+}
+
+/*
+ * this routine calculate boundaries of the gap requested block fits into
+ * and cache this gap
+ */
+static inline void
+ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
+ unsigned long block)
+{
+ int depth = ext_depth(inode);
+ unsigned long lblock, len;
+ struct ext4_extent *ex;
+
+ ex = path[depth].p_ext;
+ if (ex == NULL) {
+ /* there is no extent yet, so gap is [0;-] */
+ lblock = 0;
+ len = EXT_MAX_BLOCK;
+ ext_debug("cache gap(whole file):");
+ } else if (block < le32_to_cpu(ex->ee_block)) {
+ lblock = block;
+ len = le32_to_cpu(ex->ee_block) - block;
+ ext_debug("cache gap(before): %lu [%lu:%lu]",
+ (unsigned long) block,
+ (unsigned long) le32_to_cpu(ex->ee_block),
+ (unsigned long) le16_to_cpu(ex->ee_len));
+ } else if (block >= le32_to_cpu(ex->ee_block)
+ + le16_to_cpu(ex->ee_len)) {
+ lblock = le32_to_cpu(ex->ee_block)
+ + le16_to_cpu(ex->ee_len);
+ len = ext4_ext_next_allocated_block(path);
+ ext_debug("cache gap(after): [%lu:%lu] %lu",
+ (unsigned long) le32_to_cpu(ex->ee_block),
+ (unsigned long) le16_to_cpu(ex->ee_len),
+ (unsigned long) block);
+ BUG_ON(len == lblock);
+ len = len - lblock;
+ } else {
+ lblock = len = 0;
+ BUG();
+ }
+
+ ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len);
+ ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+}
+
+static inline int
+ext4_ext_in_cache(struct inode *inode, unsigned long block,
+ struct ext4_extent *ex)
+{
+ struct ext4_ext_cache *cex;
+
+ cex = &EXT4_I(inode)->i_cached_extent;
+
+ /* has cache valid data? */
+ if (cex->ec_type == EXT4_EXT_CACHE_NO)
+ return EXT4_EXT_CACHE_NO;
+
+ BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+ cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+ ex->ee_block = cpu_to_le32(cex->ec_block);
+ ex->ee_start = cpu_to_le32(cex->ec_start);
+ ex->ee_len = cpu_to_le16(cex->ec_len);
+ ext_debug("%lu cached by %lu:%lu:%lu\n",
+ (unsigned long) block,
+ (unsigned long) cex->ec_block,
+ (unsigned long) cex->ec_len,
+ (unsigned long) cex->ec_start);
+ return cex->ec_type;
+ }
+
+ /* not in cache */
+ return EXT4_EXT_CACHE_NO;
+}
+
+/*
+ * routine removes index from the index block
+ * it's used in truncate case only. thus all requests are for
+ * last index in the block only
+ */
+int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ struct buffer_head *bh;
+ int err;
+ unsigned long leaf;
+
+ /* free index block */
+ path--;
+ leaf = le32_to_cpu(path->p_idx->ei_leaf);
+ BUG_ON(path->p_hdr->eh_entries == 0);
+ if ((err = ext4_ext_get_access(handle, inode, path)))
+ return err;
+ path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+ if ((err = ext4_ext_dirty(handle, inode, path)))
+ return err;
+ ext_debug("index is empty, remove it, free block %lu\n", leaf);
+ bh = sb_find_get_block(inode->i_sb, leaf);
+ ext4_forget(handle, 1, inode, bh, leaf);
+ ext4_free_blocks(handle, inode, leaf, 1);
+ return err;
+}
+
+/*
+ * This routine returns max. credits extent tree can consume.
+ * It should be OK for low-performance paths like ->writepage()
+ * To allow many writing process to fit a single transaction,
+ * caller should calculate credits under truncate_mutex and
+ * pass actual path.
+ */
+int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
+ struct ext4_ext_path *path)
+{
+ int depth, needed;
+
+ if (path) {
+ /* probably there is space in leaf? */
+ depth = ext_depth(inode);
+ if (le16_to_cpu(path[depth].p_hdr->eh_entries)
+ < le16_to_cpu(path[depth].p_hdr->eh_max))
+ return 1;
+ }
+
+ /*
+ * given 32bit logical block (4294967296 blocks), max. tree
+ * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+ * let's also add one more level for imbalance.
+ */
+ depth = 5;
+
+ /* allocation of new data block(s) */
+ needed = 2;
+
+ /*
+ * tree can be full, so it'd need to grow in depth:
+ * allocation + old root + new root
+ */
+ needed += 2 + 1 + 1;
+
+ /*
+ * Index split can happen, we'd need:
+ * allocate intermediate indexes (bitmap + group)
+ * + change two blocks at each level, but root (already included)
+ */
+ needed = (depth * 2) + (depth * 2);
+
+ /* any allocation modifies superblock */
+ needed += 1;
+
+ return needed;
+}
+
+static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_extent *ex,
+ unsigned long from, unsigned long to)
+{
+ struct buffer_head *bh;
+ int i;
+
+#ifdef EXTENTS_STATS
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned short ee_len = le16_to_cpu(ex->ee_len);
+ spin_lock(&sbi->s_ext_stats_lock);
+ sbi->s_ext_blocks += ee_len;
+ sbi->s_ext_extents++;
+ if (ee_len < sbi->s_ext_min)
+ sbi->s_ext_min = ee_len;
+ if (ee_len > sbi->s_ext_max)
+ sbi->s_ext_max = ee_len;
+ if (ext_depth(inode) > sbi->s_depth_max)
+ sbi->s_depth_max = ext_depth(inode);
+ spin_unlock(&sbi->s_ext_stats_lock);
+ }
+#endif
+ if (from >= le32_to_cpu(ex->ee_block)
+ && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+ /* tail removal */
+ unsigned long num, start;
+ num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
+ start = le32_to_cpu(ex->ee_start) + le16_to_cpu(ex->ee_len) - num;
+ ext_debug("free last %lu blocks starting %lu\n", num, start);
+ for (i = 0; i < num; i++) {
+ bh = sb_find_get_block(inode->i_sb, start + i);
+ ext4_forget(handle, 0, inode, bh, start + i);
+ }
+ ext4_free_blocks(handle, inode, start, num);
+ } else if (from == le32_to_cpu(ex->ee_block)
+ && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+ } else {
+ printk("strange request: removal(2) %lu-%lu from %u:%u\n",
+ from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+ }
+ return 0;
+}
+
+static int
+ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path, unsigned long start)
+{
+ int err = 0, correct_index = 0;
+ int depth = ext_depth(inode), credits;
+ struct ext4_extent_header *eh;
+ unsigned a, b, block, num;
+ unsigned long ex_ee_block;
+ unsigned short ex_ee_len;
+ struct ext4_extent *ex;
+
+ ext_debug("truncate since %lu in leaf\n", start);
+ if (!path[depth].p_hdr)
+ path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
+ eh = path[depth].p_hdr;
+ BUG_ON(eh == NULL);
+ BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+ BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+
+ /* find where to start removing */
+ ex = EXT_LAST_EXTENT(eh);
+
+ ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_len = le16_to_cpu(ex->ee_len);
+
+ while (ex >= EXT_FIRST_EXTENT(eh) &&
+ ex_ee_block + ex_ee_len > start) {
+ ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
+ path[depth].p_ext = ex;
+
+ a = ex_ee_block > start ? ex_ee_block : start;
+ b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
+ ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
+
+ ext_debug(" border %u:%u\n", a, b);
+
+ if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
+ block = 0;
+ num = 0;
+ BUG();
+ } else if (a != ex_ee_block) {
+ /* remove tail of the extent */
+ block = ex_ee_block;
+ num = a - block;
+ } else if (b != ex_ee_block + ex_ee_len - 1) {
+ /* remove head of the extent */
+ block = a;
+ num = b - a;
+ /* there is no "make a hole" API yet */
+ BUG();
+ } else {
+ /* remove whole extent: excellent! */
+ block = ex_ee_block;
+ num = 0;
+ BUG_ON(a != ex_ee_block);
+ BUG_ON(b != ex_ee_block + ex_ee_len - 1);
+ }
+
+ /* at present, extent can't cross block group */
+ /* leaf + bitmap + group desc + sb + inode */
+ credits = 5;
+ if (ex == EXT_FIRST_EXTENT(eh)) {
+ correct_index = 1;
+ credits += (ext_depth(inode)) + 1;
+ }
+#ifdef CONFIG_QUOTA
+ credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+ handle = ext4_ext_journal_restart(handle, credits);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out;
+ }
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ err = ext4_remove_blocks(handle, inode, ex, a, b);
+ if (err)
+ goto out;
+
+ if (num == 0) {
+ /* this extent is removed entirely mark slot unused */
+ ex->ee_start = 0;
+ eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+ }
+
+ ex->ee_block = cpu_to_le32(block);
+ ex->ee_len = cpu_to_le16(num);
+
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ ext_debug("new extent: %u:%u:%u\n", block, num,
+ le32_to_cpu(ex->ee_start));
+ ex--;
+ ex_ee_block = le32_to_cpu(ex->ee_block);
+ ex_ee_len = le16_to_cpu(ex->ee_len);
+ }
+
+ if (correct_index && eh->eh_entries)
+ err = ext4_ext_correct_indexes(handle, inode, path);
+
+ /* if this leaf is free, then we should
+ * remove it from index block above */
+ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
+ err = ext4_ext_rm_idx(handle, inode, path + depth);
+
+out:
+ return err;
+}
+
+/*
+ * returns 1 if current index have to be freed (even partial)
+ */
+static int inline
+ext4_ext_more_to_rm(struct ext4_ext_path *path)
+{
+ BUG_ON(path->p_idx == NULL);
+
+ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
+ return 0;
+
+ /*
+ * if truncate on deeper level happened it it wasn't partial
+ * so we have to consider current index for truncation
+ */
+ if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
+ return 0;
+ return 1;
+}
+
+int ext4_ext_remove_space(struct inode *inode, unsigned long start)
+{
+ struct super_block *sb = inode->i_sb;
+ int depth = ext_depth(inode);
+ struct ext4_ext_path *path;
+ handle_t *handle;
+ int i = 0, err = 0;
+
+ ext_debug("truncate since %lu\n", start);
+
+ /* probably first extent we're gonna free will be last in block */
+ handle = ext4_journal_start(inode, depth + 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ext4_ext_invalidate_cache(inode);
+
+ /*
+ * we start scanning from right side freeing all the blocks
+ * after i_size and walking into the deep
+ */
+ path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+ if (path == NULL) {
+ ext4_journal_stop(handle);
+ return -ENOMEM;
+ }
+ memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+ path[0].p_hdr = ext_inode_hdr(inode);
+ if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
+ err = -EIO;
+ goto out;
+ }
+ path[0].p_depth = depth;
+
+ while (i >= 0 && err == 0) {
+ if (i == depth) {
+ /* this is leaf block */
+ err = ext4_ext_rm_leaf(handle, inode, path, start);
+ /* root level have p_bh == NULL, brelse() eats this */
+ brelse(path[i].p_bh);
+ path[i].p_bh = NULL;
+ i--;
+ continue;
+ }
+
+ /* this is index block */
+ if (!path[i].p_hdr) {
+ ext_debug("initialize header\n");
+ path[i].p_hdr = ext_block_hdr(path[i].p_bh);
+ if (ext4_ext_check_header(__FUNCTION__, inode,
+ path[i].p_hdr)) {
+ err = -EIO;
+ goto out;
+ }
+ }
+
+ BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries)
+ > le16_to_cpu(path[i].p_hdr->eh_max));
+ BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
+
+ if (!path[i].p_idx) {
+ /* this level hasn't touched yet */
+ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
+ path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
+ ext_debug("init index ptr: hdr 0x%p, num %d\n",
+ path[i].p_hdr,
+ le16_to_cpu(path[i].p_hdr->eh_entries));
+ } else {
+ /* we've already was here, see at next index */
+ path[i].p_idx--;
+ }
+
+ ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
+ i, EXT_FIRST_INDEX(path[i].p_hdr),
+ path[i].p_idx);
+ if (ext4_ext_more_to_rm(path + i)) {
+ /* go to the next level */
+ ext_debug("move to level %d (block %d)\n",
+ i + 1, le32_to_cpu(path[i].p_idx->ei_leaf));
+ memset(path + i + 1, 0, sizeof(*path));
+ path[i+1].p_bh =
+ sb_bread(sb, le32_to_cpu(path[i].p_idx->ei_leaf));
+ if (!path[i+1].p_bh) {
+ /* should we reset i_size? */
+ err = -EIO;
+ break;
+ }
+
+ /* put actual number of indexes to know is this
+ * number got changed at the next iteration */
+ path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
+ i++;
+ } else {
+ /* we finish processing this index, go up */
+ if (path[i].p_hdr->eh_entries == 0 && i > 0) {
+ /* index is empty, remove it
+ * handle must be already prepared by the
+ * truncatei_leaf() */
+ err = ext4_ext_rm_idx(handle, inode, path + i);
+ }
+ /* root level have p_bh == NULL, brelse() eats this */
+ brelse(path[i].p_bh);
+ path[i].p_bh = NULL;
+ i--;
+ ext_debug("return to level %d\n", i);
+ }
+ }
+
+ /* TODO: flexible tree reduction should be here */
+ if (path->p_hdr->eh_entries == 0) {
+ /*
+ * truncate to zero freed all the tree
+ * so, we need to correct eh_depth
+ */
+ err = ext4_ext_get_access(handle, inode, path);
+ if (err == 0) {
+ ext_inode_hdr(inode)->eh_depth = 0;
+ ext_inode_hdr(inode)->eh_max =
+ cpu_to_le16(ext4_ext_space_root(inode));
+ err = ext4_ext_dirty(handle, inode, path);
+ }
+ }
+out:
+ ext4_ext_tree_changed(inode);
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ ext4_journal_stop(handle);
+
+ return err;
+}
+
+/*
+ * called at mount time
+ */
+void ext4_ext_init(struct super_block *sb)
+{
+ /*
+ * possible initialization would be here
+ */
+
+ if (test_opt(sb, EXTENTS)) {
+ printk("EXT4-fs: file extents enabled");
+#ifdef AGRESSIVE_TEST
+ printk(", agressive tests");
+#endif
+#ifdef CHECK_BINSEARCH
+ printk(", check binsearch");
+#endif
+#ifdef EXTENTS_STATS
+ printk(", stats");
+#endif
+ printk("\n");
+#ifdef EXTENTS_STATS
+ spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
+ EXT4_SB(sb)->s_ext_min = 1 << 30;
+ EXT4_SB(sb)->s_ext_max = 0;
+#endif
+ }
+}
+
+/*
+ * called at umount time
+ */
+void ext4_ext_release(struct super_block *sb)
+{
+ if (!test_opt(sb, EXTENTS))
+ return;
+
+#ifdef EXTENTS_STATS
+ if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
+ sbi->s_ext_blocks, sbi->s_ext_extents,
+ sbi->s_ext_blocks / sbi->s_ext_extents);
+ printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
+ sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
+ }
+#endif
+}
+
+int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
+ unsigned long max_blocks, struct buffer_head *bh_result,
+ int create, int extend_disksize)
+{
+ struct ext4_ext_path *path = NULL;
+ struct ext4_extent newex, *ex;
+ int goal, newblock, err = 0, depth;
+ unsigned long allocated = 0;
+
+ __clear_bit(BH_New, &bh_result->b_state);
+ ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock,
+ max_blocks, (unsigned) inode->i_ino);
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+
+ /* check in cache */
+ if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) {
+ if (goal == EXT4_EXT_CACHE_GAP) {
+ if (!create) {
+ /* block isn't allocated yet and
+ * user don't want to allocate it */
+ goto out2;
+ }
+ /* we should allocate requested block */
+ } else if (goal == EXT4_EXT_CACHE_EXTENT) {
+ /* block is already allocated */
+ newblock = iblock
+ - le32_to_cpu(newex.ee_block)
+ + le32_to_cpu(newex.ee_start);
+ /* number of remain blocks in the extent */
+ allocated = le16_to_cpu(newex.ee_len) -
+ (iblock - le32_to_cpu(newex.ee_block));
+ goto out;
+ } else {
+ BUG();
+ }
+ }
+
+ /* find extent for this block */
+ path = ext4_ext_find_extent(inode, iblock, NULL);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ path = NULL;
+ goto out2;
+ }
+
+ depth = ext_depth(inode);
+
+ /*
+ * consistent leaf must not be empty
+ * this situations is possible, though, _during_ tree modification
+ * this is why assert can't be put in ext4_ext_find_extent()
+ */
+ BUG_ON(path[depth].p_ext == NULL && depth != 0);
+
+ if ((ex = path[depth].p_ext)) {
+ unsigned long ee_block = le32_to_cpu(ex->ee_block);
+ unsigned long ee_start = le32_to_cpu(ex->ee_start);
+ unsigned short ee_len = le16_to_cpu(ex->ee_len);
+ /* if found exent covers block, simple return it */
+ if (iblock >= ee_block && iblock < ee_block + ee_len) {
+ newblock = iblock - ee_block + ee_start;
+ /* number of remain blocks in the extent */
+ allocated = ee_len - (iblock - ee_block);
+ ext_debug("%d fit into %lu:%d -> %d\n", (int) iblock,
+ ee_block, ee_len, newblock);
+ ext4_ext_put_in_cache(inode, ee_block, ee_len,
+ ee_start, EXT4_EXT_CACHE_EXTENT);
+ goto out;
+ }
+ }
+
+ /*
+ * requested block isn't allocated yet
+ * we couldn't try to create block if create flag is zero
+ */
+ if (!create) {
+ /* put just found gap into cache to speedup subsequest reqs */
+ ext4_ext_put_gap_in_cache(inode, path, iblock);
+ goto out2;
+ }
+ /*
+ * Okay, we need to do block allocation. Lazily initialize the block
+ * allocation info here if necessary
+ */
+ if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
+ ext4_init_block_alloc_info(inode);
+
+ /* allocate new block */
+ goal = ext4_ext_find_goal(inode, path, iblock);
+ allocated = max_blocks;
+ newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
+ if (!newblock)
+ goto out2;
+ ext_debug("allocate new block: goal %d, found %d/%lu\n",
+ goal, newblock, allocated);
+
+ /* try to insert new extent into found leaf and return */
+ newex.ee_block = cpu_to_le32(iblock);
+ newex.ee_start = cpu_to_le32(newblock);
+ newex.ee_len = cpu_to_le16(allocated);
+ err = ext4_ext_insert_extent(handle, inode, path, &newex);
+ if (err)
+ goto out2;
+
+ if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
+ EXT4_I(inode)->i_disksize = inode->i_size;
+
+ /* previous routine could use block we allocated */
+ newblock = le32_to_cpu(newex.ee_start);
+ __set_bit(BH_New, &bh_result->b_state);
+
+ ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+ EXT4_EXT_CACHE_EXTENT);
+out:
+ if (allocated > max_blocks)
+ allocated = max_blocks;
+ ext4_ext_show_leaf(inode, path);
+ __set_bit(BH_Mapped, &bh_result->b_state);
+ bh_result->b_bdev = inode->i_sb->s_bdev;
+ bh_result->b_blocknr = newblock;
+out2:
+ if (path) {
+ ext4_ext_drop_refs(path);
+ kfree(path);
+ }
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+
+ return err ? err : allocated;
+}
+
+void ext4_ext_truncate(struct inode * inode, struct page *page)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct super_block *sb = inode->i_sb;
+ unsigned long last_block;
+ handle_t *handle;
+ int err = 0;
+
+ /*
+ * probably first extent we're gonna free will be last in block
+ */
+ err = ext4_writepage_trans_blocks(inode) + 3;
+ handle = ext4_journal_start(inode, err);
+ if (IS_ERR(handle)) {
+ if (page) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+ return;
+ }
+
+ if (page)
+ ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+
+ mutex_lock(&EXT4_I(inode)->truncate_mutex);
+ ext4_ext_invalidate_cache(inode);
+
+ /*
+ * TODO: optimization is possible here
+ * probably we need not scaning at all,
+ * because page truncation is enough
+ */
+ if (ext4_orphan_add(handle, inode))
+ goto out_stop;
+
+ /* we have to know where to truncate from in crash case */
+ EXT4_I(inode)->i_disksize = inode->i_size;
+ ext4_mark_inode_dirty(handle, inode);
+
+ last_block = (inode->i_size + sb->s_blocksize - 1)
+ >> EXT4_BLOCK_SIZE_BITS(sb);
+ err = ext4_ext_remove_space(inode, last_block);
+
+ /* In a multi-transaction truncate, we only make the final
+ * transaction synchronous */
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+
+out_stop:
+ /*
+ * If this was a simple ftruncate(), and the file will remain alive
+ * then we need to clear up the orphan record which we created above.
+ * However, if this was a real unlink then we were called by
+ * ext4_delete_inode(), and we allow that function to clean up the
+ * orphan info for us.
+ */
+ if (inode->i_nlink)
+ ext4_orphan_del(handle, inode);
+
+ mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+ ext4_journal_stop(handle);
+}
+
+/*
+ * this routine calculate max number of blocks we could modify
+ * in order to allocate new block for an inode
+ */
+int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
+{
+ int needed;
+
+ needed = ext4_ext_calc_credits_for_insert(inode, NULL);
+
+ /* caller want to allocate num blocks, but note it includes sb */
+ needed = needed * num - (num - 1);
+
+#ifdef CONFIG_QUOTA
+ needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+ return needed;
+}
+
+EXPORT_SYMBOL(ext4_mark_inode_dirty);
+EXPORT_SYMBOL(ext4_ext_invalidate_cache);
+EXPORT_SYMBOL(ext4_ext_insert_extent);
+EXPORT_SYMBOL(ext4_ext_walk_space);
+EXPORT_SYMBOL(ext4_ext_find_goal);
+EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
+
diff -puN fs/ext4/ialloc.c~ext4-extents fs/ext4/ialloc.c
--- linux-2.6.18-rc4/fs/ext4/ialloc.c~ext4-extents 2006-08-09 15:41:44.196226520 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/ialloc.c 2006-08-09 15:41:44.305227402 -0700
@@ -616,6 +616,17 @@ got:
ext4_std_error(sb, err);
goto fail_free_drop;
}
+ if (test_opt(sb, EXTENTS)) {
+ EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+ ext4_ext_tree_init(handle, inode);
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+ err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+ if (err) goto fail;
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS);
+ BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
+ err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+ }
+ }

ext4_debug("allocating inode %lu\n", inode->i_ino);
goto really_out;
diff -puN fs/ext4/inode.c~ext4-extents fs/ext4/inode.c
--- linux-2.6.18-rc4/fs/ext4/inode.c~ext4-extents 2006-08-09 15:41:44.200226552 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/inode.c 2006-08-09 15:41:44.310227443 -0700
@@ -39,8 +39,6 @@
#include "xattr.h"
#include "acl.h"

-static int ext4_writepage_trans_blocks(struct inode *inode);
-
/*
* Test whether an inode is a fast symlink.
*/
@@ -803,6 +801,7 @@ int ext4_get_blocks_handle(handle_t *han
ext4_fsblk_t first_block = 0;


+ J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
J_ASSERT(handle != NULL || create == 0);
depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);

@@ -983,7 +982,7 @@ static int ext4_get_block(struct inode *

get_block:
if (ret == 0) {
- ret = ext4_get_blocks_handle(handle, inode, iblock,
+ ret = ext4_get_blocks_wrap(handle, inode, iblock,
max_blocks, bh_result, create, 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
@@ -1007,7 +1006,7 @@ struct buffer_head *ext4_getblk(handle_t
dummy.b_state = 0;
dummy.b_blocknr = -1000;
buffer_trace_init(&dummy.b_history);
- err = ext4_get_blocks_handle(handle, inode, block, 1,
+ err = ext4_get_blocks_wrap(handle, inode, block, 1,
&dummy, create, 1);
if (err == 1) {
err = 0;
@@ -1755,7 +1754,7 @@ void ext4_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
-static int ext4_block_truncate_page(handle_t *handle, struct page *page,
+int ext4_block_truncate_page(handle_t *handle, struct page *page,
struct address_space *mapping, loff_t from)
{
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -2259,6 +2258,9 @@ void ext4_truncate(struct inode *inode)
return;
}

+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_truncate(inode, page);
+
handle = start_transaction(inode);
if (IS_ERR(handle)) {
if (page) {
@@ -3001,12 +3003,15 @@ err_out:
* block and work out the exact number of indirects which are touched. Pah.
*/

-static int ext4_writepage_trans_blocks(struct inode *inode)
+int ext4_writepage_trans_blocks(struct inode *inode)
{
int bpp = ext4_journal_blocks_per_page(inode);
int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
int ret;

+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_writepage_trans_blocks(inode, bpp);
+
if (ext4_should_journal_data(inode))
ret = 3 * (bpp + indirects) + 2;
else
diff -puN fs/ext4/ioctl.c~ext4-extents fs/ext4/ioctl.c
--- linux-2.6.18-rc4/fs/ext4/ioctl.c~ext4-extents 2006-08-09 15:41:44.203226576 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/ioctl.c 2006-08-09 15:41:44.311227451 -0700
@@ -247,7 +247,6 @@ flags_err:
return err;
}

-
default:
return -ENOTTY;
}
diff -puN fs/ext4/Makefile~ext4-extents fs/ext4/Makefile
--- linux-2.6.18-rc4/fs/ext4/Makefile~ext4-extents 2006-08-09 15:41:44.247226932 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/Makefile 2006-08-09 15:41:44.311227451 -0700
@@ -5,7 +5,7 @@
obj-$(CONFIG_EXT3DEV_FS) += ext3dev.o

ext3dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o
+ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o

ext3dev-$(CONFIG_EXT3DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext3dev-$(CONFIG_EXT3DEV_FS_POSIX_ACL) += acl.o
diff -puN fs/ext4/super.c~ext4-extents fs/ext4/super.c
--- linux-2.6.18-rc4/fs/ext4/super.c~ext4-extents 2006-08-09 15:41:44.250226957 -0700
+++ linux-2.6.18-rc4-ming/fs/ext4/super.c 2006-08-09 15:41:44.316227491 -0700
@@ -389,6 +389,7 @@ static void ext4_put_super (struct super
struct ext4_super_block *es = sbi->s_es;
int i;

+ ext4_ext_release(sb);
ext4_xattr_put_super(sb);
jbd2_journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
@@ -453,6 +454,7 @@ static struct inode *ext4_alloc_inode(st
#endif
ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1;
+ memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
return &ei->vfs_inode;
}

@@ -635,7 +637,7 @@ enum {
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
- Opt_grpquota
+ Opt_grpquota, Opt_extents,
};

static match_table_t tokens = {
@@ -685,6 +687,7 @@ static match_table_t tokens = {
{Opt_quota, "quota"},
{Opt_usrquota, "usrquota"},
{Opt_barrier, "barrier=%u"},
+ {Opt_extents, "extents"},
{Opt_err, NULL},
{Opt_resize, "resize"},
};
@@ -1017,6 +1020,9 @@ clear_qf_name:
case Opt_bh:
clear_opt(sbi->s_mount_opt, NOBH);
break;
+ case Opt_extents:
+ set_opt (sbi->s_mount_opt, EXTENTS);
+ break;
default:
printk (KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1742,6 +1748,8 @@ static int ext4_fill_super (struct super
test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
"writeback");

+ ext4_ext_init(sb);
+
lock_kernel();
return 0;

diff -puN /dev/null include/linux/ext4_fs_extents.h
--- /dev/null 2006-08-08 14:57:22.983223272 -0700
+++ linux-2.6.18-rc4-ming/include/linux/ext4_fs_extents.h 2006-08-09 15:41:44.317227499 -0700
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, [email protected]
+ * Written by Alex Tomas <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+
+#ifndef _LINUX_EXT4_EXTENTS
+#define _LINUX_EXT4_EXTENTS
+
+#include <linux/ext4_fs.h>
+
+/*
+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
+ * become very little, so index split, in-depth growing and
+ * other hard changes happens much more often
+ * this is for debug purposes only
+ */
+#define AGRESSIVE_TEST_
+
+/*
+ * with EXTENTS_STATS defined number of blocks and extents
+ * are collected in truncate path. they'll be showed at
+ * umount time
+ */
+#define EXTENTS_STATS__
+
+/*
+ * if CHECK_BINSEARCH defined, then results of binary search
+ * will be checked by linear search
+ */
+#define CHECK_BINSEARCH__
+
+/*
+ * if EXT_DEBUG is defined you can use 'extdebug' mount option
+ * to get lots of info what's going on
+ */
+#define EXT_DEBUG__
+#ifdef EXT_DEBUG
+#define ext_debug(a...) printk(a)
+#else
+#define ext_debug(a...)
+#endif
+
+/*
+ * if EXT_STATS is defined then stats numbers are collected
+ * these number will be displayed at umount time
+ */
+#define EXT_STATS_
+
+
+/*
+ * ext4_inode has i_block array (60 bytes total)
+ * first 12 bytes store ext4_extent_header
+ * the remain stores array of ext4_extent
+ */
+
+/*
+ * this is extent on-disk structure
+ * it's used at the bottom of the tree
+ */
+struct ext4_extent {
+ __le32 ee_block; /* first logical block extent covers */
+ __le16 ee_len; /* number of blocks covered by extent */
+ __le16 ee_start_hi; /* high 16 bits of physical block */
+ __le32 ee_start; /* low 32 bigs of physical block */
+};
+
+/*
+ * this is index on-disk structure
+ * it's used at all the levels, but the bottom
+ */
+struct ext4_extent_idx {
+ __le32 ei_block; /* index covers logical blocks from 'block' */
+ __le32 ei_leaf; /* pointer to the physical block of the next *
+ * level. leaf or next index could bet here */
+ __le16 ei_leaf_hi; /* high 16 bits of physical block */
+ __u16 ei_unused;
+};
+
+/*
+ * each block (leaves and indexes), even inode-stored has header
+ */
+struct ext4_extent_header {
+ __le16 eh_magic; /* probably will support different formats */
+ __le16 eh_entries; /* number of valid entries */
+ __le16 eh_max; /* capacity of store in entries */
+ __le16 eh_depth; /* has tree real underlaying blocks? */
+ __le32 eh_generation; /* generation of the tree */
+};
+
+#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
+
+/*
+ * array of ext4_ext_path contains path to some extent
+ * creation/lookup routines use it for traversal/splitting/etc
+ * truncate uses it to simulate recursive walking
+ */
+struct ext4_ext_path {
+ __u32 p_block;
+ __u16 p_depth;
+ struct ext4_extent *p_ext;
+ struct ext4_extent_idx *p_idx;
+ struct ext4_extent_header *p_hdr;
+ struct buffer_head *p_bh;
+};
+
+/*
+ * structure for external API
+ */
+
+#define EXT4_EXT_CACHE_NO 0
+#define EXT4_EXT_CACHE_GAP 1
+#define EXT4_EXT_CACHE_EXTENT 2
+
+/*
+ * to be called by ext4_ext_walk_space()
+ * negative retcode - error
+ * positive retcode - signal for ext4_ext_walk_space(), see below
+ * callback must return valid extent (passed or newly created)
+ */
+typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+ struct ext4_ext_cache *,
+ void *);
+
+#define EXT_CONTINUE 0
+#define EXT_BREAK 1
+#define EXT_REPEAT 2
+
+
+#define EXT_MAX_BLOCK 0xffffffff
+
+
+#define EXT_FIRST_EXTENT(__hdr__) \
+ ((struct ext4_extent *) (((char *) (__hdr__)) + \
+ sizeof(struct ext4_extent_header)))
+#define EXT_FIRST_INDEX(__hdr__) \
+ ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
+ sizeof(struct ext4_extent_header)))
+#define EXT_HAS_FREE_INDEX(__path__) \
+ (le16_to_cpu((__path__)->p_hdr->eh_entries) \
+ < le16_to_cpu((__path__)->p_hdr->eh_max))
+#define EXT_LAST_EXTENT(__hdr__) \
+ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_LAST_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+
+static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
+{
+ return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
+}
+
+static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
+{
+ return (struct ext4_extent_header *) bh->b_data;
+}
+
+static inline unsigned short ext_depth(struct inode *inode)
+{
+ return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
+}
+
+static inline void ext4_ext_tree_changed(struct inode *inode)
+{
+ EXT4_I(inode)->i_ext_generation++;
+}
+
+static inline void
+ext4_ext_invalidate_cache(struct inode *inode)
+{
+ EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+}
+
+extern int ext4_extent_tree_init(handle_t *, struct inode *);
+extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
+extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
+
+#endif /* _LINUX_EXT4_EXTENTS */
+
diff -puN include/linux/ext4_fs.h~ext4-extents include/linux/ext4_fs.h
--- linux-2.6.18-rc4/include/linux/ext4_fs.h~ext4-extents 2006-08-09 15:41:44.282227216 -0700
+++ linux-2.6.18-rc4-ming/include/linux/ext4_fs.h 2006-08-09 15:41:44.319227515 -0700
@@ -182,8 +182,9 @@ struct ext4_group_desc
#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
+#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */

-#define EXT4_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */

/*
@@ -371,6 +372,7 @@ struct ext4_inode {
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */

/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
@@ -560,11 +562,13 @@ static inline struct ext4_inode_info *EX
#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
+#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */

#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
EXT4_FEATURE_INCOMPAT_RECOVER| \
- EXT4_FEATURE_INCOMPAT_META_BG)
+ EXT4_FEATURE_INCOMPAT_META_BG| \
+ EXT4_FEATURE_INCOMPAT_EXTENTS)
#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
@@ -803,6 +807,9 @@ extern int ext4_get_inode_loc(struct ino
extern void ext4_truncate (struct inode *);
extern void ext4_set_inode_flags(struct inode *);
extern void ext4_set_aops(struct inode *inode);
+extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
+ struct address_space *mapping, loff_t from);

/* ioctl.c */
extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
@@ -856,6 +863,26 @@ extern struct inode_operations ext4_spec
extern struct inode_operations ext4_symlink_inode_operations;
extern struct inode_operations ext4_fast_symlink_inode_operations;

+/* extents.c */
+extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
+extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_get_blocks(handle_t *, struct inode *, sector_t,
+ unsigned long, struct buffer_head *, int, int);
+extern void ext4_ext_truncate(struct inode *, struct page *);
+extern void ext4_ext_init(struct super_block *);
+extern void ext4_ext_release(struct super_block *);
+static inline int
+ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
+ unsigned long max_blocks, struct buffer_head *bh,
+ int create, int extend_disksize)
+{
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_get_blocks(handle, inode, block, max_blocks,
+ bh, create, extend_disksize);
+ return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh,
+ create, extend_disksize);
+}
+

#endif /* __KERNEL__ */

diff -puN include/linux/ext4_fs_i.h~ext4-extents include/linux/ext4_fs_i.h
--- linux-2.6.18-rc4/include/linux/ext4_fs_i.h~ext4-extents 2006-08-09 15:41:44.285227240 -0700
+++ linux-2.6.18-rc4-ming/include/linux/ext4_fs_i.h 2006-08-09 15:41:44.320227524 -0700
@@ -65,6 +65,16 @@ struct ext4_block_alloc_info {
#define rsv_end rsv_window._rsv_end

/*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+ __u32 ec_start;
+ __u32 ec_block;
+ __u32 ec_len; /* must be 32bit to return holes */
+ __u32 ec_type;
+};
+
+/*
* third extended file system inode data in memory
*/
struct ext4_inode_info {
@@ -142,6 +152,9 @@ struct ext4_inode_info {
*/
struct mutex truncate_mutex;
struct inode vfs_inode;
+
+ unsigned long i_ext_generation;
+ struct ext4_ext_cache i_cached_extent;
};

#endif /* _LINUX_EXT4_FS_I */
diff -puN include/linux/ext4_fs_sb.h~ext4-extents include/linux/ext4_fs_sb.h
--- linux-2.6.18-rc4/include/linux/ext4_fs_sb.h~ext4-extents 2006-08-09 15:41:44.289227273 -0700
+++ linux-2.6.18-rc4-ming/include/linux/ext4_fs_sb.h 2006-08-09 15:41:44.320227524 -0700
@@ -78,6 +78,16 @@ struct ext4_sb_info {
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
int s_jquota_fmt; /* Format of quota to use */
#endif
+
+#ifdef EXTENTS_STATS
+ /* ext4 extents stats */
+ unsigned long s_ext_min;
+ unsigned long s_ext_max;
+ unsigned long s_depth_max;
+ spinlock_t s_ext_stats_lock;
+ unsigned long s_ext_blocks;
+ unsigned long s_ext_extents;
+#endif
};

#endif /* _LINUX_EXT4_FS_SB */
diff -puN include/linux/ext4_jbd2.h~ext4-extents include/linux/ext4_jbd2.h
--- linux-2.6.18-rc4/include/linux/ext4_jbd2.h~ext4-extents 2006-08-09 15:41:44.292227297 -0700
+++ linux-2.6.18-rc4-ming/include/linux/ext4_jbd2.h 2006-08-09 15:41:44.321227532 -0700
@@ -26,9 +26,14 @@
*
* We may have to touch one inode, one bitmap buffer, up to three
* indirection blocks, the group and superblock summaries, and the data
- * block to complete the transaction. */
+ * block to complete the transaction.
+ *
+ * For extents-enabled fs we may have to allocate and modify upto
+ * 5 levels of tree + root which is stored in inode. */

-#define EXT4_SINGLEDATA_TRANS_BLOCKS 8U
+#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
+ (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
+ || test_opt(sb, EXTENTS) ? 27U : 8U)

/* Extended attribute operations touch at most two data buffers,
* two bitmap buffers, and two group summaries, in addition to the inode
@@ -42,7 +47,7 @@
* superblock only gets updated once, of course, so don't bother
* counting that again for the quota updates. */

-#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS + \
+#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
EXT4_XATTR_TRANS_BLOCKS - 2 + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb))

@@ -78,9 +83,9 @@
/* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */
#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
- (EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
#else
#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0

_



2006-08-10 06:40:10

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 1/9] extents for ext4

On Wed, 09 Aug 2006 18:20:26 -0700
Mingming Cao <[email protected]> wrote:

>
> Add extent map support to ext4. Patch from Alex Tomas.
>
> On disk extents format:
> /*
> * this is extent on-disk structure
> * it's used at the bottom of the tree
> */
> struct ext3_extent {
> __le32 ee_block; /* first logical block extent covers */
> __le16 ee_len; /* number of blocks covered by extent */
> __le16 ee_start_hi; /* high 16 bits of physical block */
> __le32 ee_start; /* low 32 bigs of physical block */
> };
>

>From a quick scan:

- The code is very poorly commented. I'd want to spend a lot of time
reviewing this implementation, but not in its present state.

- Far, far too many inlines

- overly-terse variable naming

- There are several places which appear to be putting block numbers into
an `int'.

- Needs kmalloc()->kzalloc() conversion

- replace all brelse() calls with put_bh(). Because brelse() is
old-fashioned, has a weird name and neelessly permits a NULL arg.

In fact it would be beter to convert JBD and ext3 to put_bh before
copying it all over.

- The open-coded __clear_bit(BH_New, ...) in ext4_ext_get_blocks is a bit
nasty. We can live with nasty, but are we sure that it isn't buggy??

- It has about 7,000 instances of

if ((lhs = expression)) {

whereas the preferred coding style is

lhs = expression;
if (lhs) {

- The existing comments could benefit from some rework by a native English
speaker.


2006-08-10 09:28:13

by Alex Tomas

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

>>>>> Andrew Morton (AM) writes:

>> From a quick scan:

AM> - The code is very poorly commented. I'd want to spend a lot of time
AM> reviewing this implementation, but not in its present state.

what sort of comments are you expecting?

AM> - Far, far too many inlines

probably, I'll review the code in this regard

AM> - overly-terse variable naming

same

AM> - There are several places which appear to be putting block numbers into
AM> an `int'.

same

AM> - Needs kmalloc()->kzalloc() conversion

OK

AM> - replace all brelse() calls with put_bh(). Because brelse() is
AM> old-fashioned, has a weird name and neelessly permits a NULL arg.

AM> In fact it would be beter to convert JBD and ext3 to put_bh before
AM> copying it all over.

OK

AM> - The open-coded __clear_bit(BH_New, ...) in ext4_ext_get_blocks is a bit
AM> nasty. We can live with nasty, but are we sure that it isn't buggy??

I believe it isn't buggy -- it applies to non-shared var.
it also showed minor improvement on SMP.

AM> - It has about 7,000 instances of

AM> if ((lhs = expression)) {

AM> whereas the preferred coding style is

AM> lhs = expression;
AM> if (lhs) {

OK

AM> - The existing comments could benefit from some rework by a native English
AM> speaker.

could someone assist here, please?

thanks, Alex

2006-08-10 09:48:38

by Andrew Morton

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Thu, 10 Aug 2006 13:29:56 +0400
Alex Tomas <[email protected]> wrote:

> AM> - The code is very poorly commented. I'd want to spend a lot of time
> AM> reviewing this implementation, but not in its present state.
>
> what sort of comments are you expecting?

Ones which tell me what the code is attempting to do. Ones which tell me
the things which I need to know and which I cannot determine from the
implementation within a reasonable period of time. Ones which tell me
about the hidden design decisions, the known shortcomings, the
things-still-to-do.

It's a bit of an artform, really. I guess one needs to put oneself in the
position of the reader, then work out what the reader wants to know.

Good examples don't immediately leap to mind, I'm afraid. Maybe some of
fs/buffer.c? That's important and pretty tricky code in there, so it goes
to some lengths.

2006-08-10 10:07:11

by Alex Tomas

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4


just to make things clear ... I thought the code isn't that bad commented. I may be wrong, of course. but could you have a look at few routines
(ext3_ext_create_new_leaf() or ext3_ext_get_blocks() fo example)
and tell me what's wrong with existing comments (besides monkey english)
and how it should look like?

thanks, Alex

>>>>> Andrew Morton (AM) writes:

AM> On Thu, 10 Aug 2006 13:29:56 +0400
AM> Alex Tomas <[email protected]> wrote:

AM> - The code is very poorly commented. I'd want to spend a lot of time
AM> reviewing this implementation, but not in its present state.
>>
>> what sort of comments are you expecting?

AM> Ones which tell me what the code is attempting to do. Ones which tell me
AM> the things which I need to know and which I cannot determine from the
AM> implementation within a reasonable period of time. Ones which tell me
AM> about the hidden design decisions, the known shortcomings, the
AM> things-still-to-do.

AM> It's a bit of an artform, really. I guess one needs to put oneself in the
AM> position of the reader, then work out what the reader wants to know.

AM> Good examples don't immediately leap to mind, I'm afraid. Maybe some of
AM> fs/buffer.c? That's important and pretty tricky code in there, so it goes
AM> to some lengths.

2006-08-10 15:55:35

by Zach Brown

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4


> Good examples don't immediately leap to mind, I'm afraid. Maybe some of
> fs/buffer.c? That's important and pretty tricky code in there, so it goes
> to some lengths.

fs/direct-io.c? It has some fantastic commentary. (Just please don't
also take inspiration from its bug/line ratio :)).

- z

2006-08-10 16:46:45

by Mingming Cao

[permalink] [raw]
Subject: Re: [PATCH 1/9] extents for ext4

Andrew Morton wrote:

> On Wed, 09 Aug 2006 18:20:26 -0700
> Mingming Cao <[email protected]> wrote:
>
>
>>Add extent map support to ext4. Patch from Alex Tomas.
>>
>>On disk extents format:
>>/*
>> * this is extent on-disk structure
>> * it's used at the bottom of the tree
>> */
>>struct ext3_extent {
>> __le32 ee_block; /* first logical block extent covers */
>> __le16 ee_len; /* number of blocks covered by extent */
>> __le16 ee_start_hi; /* high 16 bits of physical block */
>> __le32 ee_start; /* low 32 bigs of physical block */
>>};
>>
>
>
>>From a quick scan:
>

> - There are several places which appear to be putting block numbers into
> an `int'.
>

This is fixed in [PATCH 4/9] 48bit support in extents, where we
converted those "int" type block numbers to ext4_fsblk_t (which is
typedefined as sector_t to support 48bit)

Thanks,
Mingming

2006-08-10 17:18:22

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Wed, Aug 09, 2006 at 11:39:40PM -0700, Andrew Morton wrote:
> - replace all brelse() calls with put_bh(). Because brelse() is
> old-fashioned, has a weird name and neelessly permits a NULL arg.
>
> In fact it would be beter to convert JBD and ext3 to put_bh before
> copying it all over.

Wouldn't it be better to preserve in the source code history the
brelse->put_bh conversion? We can pour a huge number of changes in
ext4 before we submit, but I would have thought it would be easier for
everyone to see what is going on if we submit with just the minimal
changes, and then have patches that address concerns like this one at
a time.

- Ted

2006-08-10 17:47:15

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:

> >>>>> Andrew Morton (AM) writes:
>
> >> From a quick scan:
>
> AM> - The code is very poorly commented. I'd want to spend a lot of time
> AM> reviewing this implementation, but not in its present state.
>
> what sort of comments are you expecting?

Helpful ones. Not obvious stuff. Intents.
Tricks used (if they are the right thing to do).

How, what, why. But not nitty-gritty details of how.
"Why" is often more important.

> AM> - The existing comments could benefit from some rework by a native English
> AM> speaker.
>
> could someone assist here, please?

Yes. How would you like it? Just comments via email or (quilt) patches?
Which files/patches?

---
~Randy

2006-08-10 18:04:15

by Andrew Morton

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Thu, 10 Aug 2006 13:17:55 -0400
Theodore Tso <[email protected]> wrote:

> On Wed, Aug 09, 2006 at 11:39:40PM -0700, Andrew Morton wrote:
> > - replace all brelse() calls with put_bh(). Because brelse() is
> > old-fashioned, has a weird name and neelessly permits a NULL arg.
> >
> > In fact it would be beter to convert JBD and ext3 to put_bh before
> > copying it all over.
>
> Wouldn't it be better to preserve in the source code history the
> brelse->put_bh conversion? We can pour a huge number of changes in
> ext4 before we submit, but I would have thought it would be easier for
> everyone to see what is going on if we submit with just the minimal
> changes, and then have patches that address concerns like this one at
> a time.
>

I'd suggest that this be one of the cleanups which be done within ext3
before taking the ext4 copy.

That's assuming we want to do the spring-cleaning - we might of course
decide not to. But it'd be a good time to do so.

2006-08-10 19:04:00

by Alex Tomas

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

>>>>> Randy Dunlap (RD) writes:

RD> On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:
>> >>>>> Andrew Morton (AM) writes:
>>
>> >> From a quick scan:
>>
AM> - The code is very poorly commented. I'd want to spend a lot of time
AM> reviewing this implementation, but not in its present state.
>>
>> what sort of comments are you expecting?

RD> Helpful ones. Not obvious stuff. Intents.
RD> Tricks used (if they are the right thing to do).

RD> How, what, why. But not nitty-gritty details of how.
RD> "Why" is often more important.

well, it's a simple b+tree and i'm not sure there are ticks in.
I'll try to re-read them again WRT what you wrote.


AM> - The existing comments could benefit from some rework by a native English
AM> speaker.
>>
>> could someone assist here, please?

RD> Yes. How would you like it? Just comments via email or (quilt) patches?
RD> Which files/patches?

please, have a look at ext4-extents.patch first

thanks, Alex

2006-08-11 20:54:57

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:

> AM> - The existing comments could benefit from some rework by a
> AM> native English speaker.
>
> could someone assist here, please?

See if this helps.
Patch applies on top of all ext4 patches from
http://ext2.sourceforge.net/48bitext3/patches/latest/.

---
From: Randy Dunlap <[email protected]>

Clean up comments in ext4-extents patch.

Signed-off-by: Randy Dunlap <[email protected]>
---
fs/ext4/extents.c | 226 ++++++++++++++++++++++------------------
include/linux/ext4_fs_extents.h | 54 ++++-----
include/linux/ext4_jbd2.h | 4
3 files changed, 157 insertions(+), 127 deletions(-)

--- linux-2618-rc4-ext4.orig/include/linux/ext4_jbd2.h
+++ linux-2618-rc4-ext4/include/linux/ext4_jbd2.h
@@ -28,8 +28,8 @@
* indirection blocks, the group and superblock summaries, and the data
* block to complete the transaction.
*
- * For extents-enabled fs we may have to allocate and modify upto
- * 5 levels of tree + root which is stored in inode. */
+ * For extents-enabled fs we may have to allocate and modify up to
+ * 5 levels of tree + root which are stored in the inode. */

#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
--- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_extents.h
+++ linux-2618-rc4-ext4/include/linux/ext4_fs_extents.h
@@ -22,29 +22,29 @@
#include <linux/ext4_fs.h>

/*
- * with AGRESSIVE_TEST defined capacity of index/leaf blocks
- * become very little, so index split, in-depth growing and
- * other hard changes happens much more often
- * this is for debug purposes only
+ * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks
+ * becomes very small, so index split, in-depth growing and
+ * other hard changes happen much more often.
+ * This is for debug purposes only.
*/
#define AGRESSIVE_TEST_

/*
- * with EXTENTS_STATS defined number of blocks and extents
- * are collected in truncate path. they'll be showed at
- * umount time
+ * With EXTENTS_STATS defined, the number of blocks and extents
+ * are collected in the truncate path. They'll be shown at
+ * umount time.
*/
#define EXTENTS_STATS__

/*
- * if CHECK_BINSEARCH defined, then results of binary search
- * will be checked by linear search
+ * If CHECK_BINSEARCH is defined, then the results of the binary search
+ * will also be checked by linear search.
*/
#define CHECK_BINSEARCH__

/*
- * if EXT_DEBUG is defined you can use 'extdebug' mount option
- * to get lots of info what's going on
+ * If EXT_DEBUG is defined you can use the 'extdebug' mount option
+ * to get lots of info about what's going on.
*/
#define EXT_DEBUG__
#ifdef EXT_DEBUG
@@ -54,58 +54,58 @@
#endif

/*
- * if EXT_STATS is defined then stats numbers are collected
- * these number will be displayed at umount time
+ * If EXT_STATS is defined then stats numbers are collected.
+ * These number will be displayed at umount time.
*/
#define EXT_STATS_


/*
- * ext4_inode has i_block array (60 bytes total)
- * first 12 bytes store ext4_extent_header
- * the remain stores array of ext4_extent
+ * ext4_inode has i_block array (60 bytes total).
+ * The first 12 bytes store ext4_extent_header;
+ * the remainder stores an array of ext4_extent.
*/

/*
- * this is extent on-disk structure
- * it's used at the bottom of the tree
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
*/
struct ext4_extent {
__le32 ee_block; /* first logical block extent covers */
__le16 ee_len; /* number of blocks covered by extent */
__le16 ee_start_hi; /* high 16 bits of physical block */
- __le32 ee_start; /* low 32 bigs of physical block */
+ __le32 ee_start; /* low 32 bits of physical block */
};

/*
- * this is index on-disk structure
- * it's used at all the levels, but the bottom
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
*/
struct ext4_extent_idx {
__le32 ei_block; /* index covers logical blocks from 'block' */
__le32 ei_leaf; /* pointer to the physical block of the next *
- * level. leaf or next index could bet here */
+ * level. leaf or next index could be there */
__le16 ei_leaf_hi; /* high 16 bits of physical block */
__u16 ei_unused;
};

/*
- * each block (leaves and indexes), even inode-stored has header
+ * Each block (leaves and indexes), even inode-stored has header.
*/
struct ext4_extent_header {
__le16 eh_magic; /* probably will support different formats */
__le16 eh_entries; /* number of valid entries */
__le16 eh_max; /* capacity of store in entries */
- __le16 eh_depth; /* has tree real underlaying blocks? */
+ __le16 eh_depth; /* has tree real underlying blocks? */
__le32 eh_generation; /* generation of the tree */
};

#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)

/*
- * array of ext4_ext_path contains path to some extent
- * creation/lookup routines use it for traversal/splitting/etc
- * truncate uses it to simulate recursive walking
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
*/
struct ext4_ext_path {
ext4_fsblk_t p_block;
--- linux-2618-rc4-ext4.orig/fs/ext4/extents.c
+++ linux-2618-rc4-ext4/fs/ext4/extents.c
@@ -44,7 +44,10 @@
#include <asm/uaccess.h>


-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
{
ext4_fsblk_t block;
@@ -55,7 +58,10 @@ static inline ext4_fsblk_t ext_pblock(st
return block;
}

-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{
ext4_fsblk_t block;
@@ -66,7 +72,11 @@ static inline ext4_fsblk_t idx_pblock(st
return block;
}

-/* the routine stores large phys. blocknr into extent breaking it into parts */
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
{
ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -74,7 +84,11 @@ static inline void ext4_ext_store_pblock
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
}

-/* the routine stores large phys. blocknr into index breaking it into parts */
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
{
ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -179,8 +193,8 @@ static ext4_fsblk_t ext4_ext_find_goal(s
if ((ex = path[depth].p_ext))
return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));

- /* it looks index is empty
- * try to find starting from index itself */
+ /* it looks like index is empty;
+ * try to find starting block from index itself */
if (path[depth].p_bh)
return path[depth].p_bh->b_blocknr;
}
@@ -317,7 +331,8 @@ static void ext4_ext_drop_refs(struct ex
}

/*
- * binary search for closest index by given block
+ * ext4_ext_binsearch_idx:
+ * binary search for the closest index of the given block
*/
static void
ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -375,7 +390,8 @@ ext4_ext_binsearch_idx(struct inode *ino
}

/*
- * binary search for closest extent by given block
+ * ext4_ext_binsearch:
+ * binary search for closest extent of the given block
*/
static void
ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -388,8 +404,8 @@ ext4_ext_binsearch(struct inode *inode,

if (eh->eh_entries == 0) {
/*
- * this leaf is empty yet:
- * we get such a leaf in split/add case
+ * this leaf is empty:
+ * we get such a leaf in split/add case
*/
return;
}
@@ -520,8 +536,9 @@ err:
}

/*
- * insert new index [logical;ptr] into the block at cupr
- * it check where to insert: before curp or after curp
+ * ext4_ext_insert_index:
+ * insert new index [@logical;@ptr] into the block at @curp;
+ * check where to insert: before @curp or after @curp
*/
static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
struct ext4_ext_path *curp,
@@ -574,13 +591,14 @@ static int ext4_ext_insert_index(handle_
}

/*
- * routine inserts new subtree into the path, using free index entry
- * at depth 'at:
- * - allocates all needed blocks (new leaf and all intermediate index blocks)
- * - makes decision where to split
- * - moves remaining extens and index entries (right to the split point)
- * into the newly allocated blocks
- * - initialize subtree
+ * ext4_ext_split:
+ * inserts new subtree into the path, using free index entry
+ * at depth @at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extents and index entries (right to the split point)
+ * into the newly allocated blocks
+ * - initializes subtree
*/
static int ext4_ext_split(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
@@ -598,14 +616,14 @@ static int ext4_ext_split(handle_t *hand
int err = 0;

/* make decision: where to split? */
- /* FIXME: now desicion is simplest: at current extent */
+ /* FIXME: now decision is simplest: at current extent */

- /* if current leaf will be splitted, then we should use
+ /* if current leaf will be split, then we should use
* border from split point */
BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block;
- ext_debug("leaf will be splitted."
+ ext_debug("leaf will be split."
" next leaf starts at %d\n",
le32_to_cpu(border));
} else {
@@ -616,16 +634,16 @@ static int ext4_ext_split(handle_t *hand
}

/*
- * if error occurs, then we break processing
- * and turn filesystem read-only. so, index won't
+ * If error occurs, then we break processing
+ * and mark filesystem read-only. index won't
* be inserted and tree will be in consistent
- * state. next mount will repair buffers too
+ * state. Next mount will repair buffers too.
*/

/*
- * get array to track all allocated blocks
- * we need this to handle errors and free blocks
- * upon them
+ * Get array to track all allocated blocks.
+ * We need this to handle errors and free blocks
+ * upon them.
*/
ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
if (!ablocks)
@@ -661,7 +679,7 @@ static int ext4_ext_split(handle_t *hand
neh->eh_depth = 0;
ex = EXT_FIRST_EXTENT(neh);

- /* move remain of path[depth] to the new leaf */
+ /* move remainder of path[depth] to the new leaf */
BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
/* start copy from next extent */
/* TODO: we could do it by single memmove */
@@ -813,11 +831,12 @@ cleanup:
}

/*
- * routine implements tree growing procedure:
- * - allocates new block
- * - moves top-level data (index block or leaf) into the new block
- * - initialize new top-level, creating index that points to the
- * just created block
+ * ext4_ext_grow_indepth:
+ * implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initializes new top-level, creating index that points to the
+ * just created block
*/
static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
@@ -892,8 +911,9 @@ out:
}

/*
- * routine finds empty index and adds new leaf. if no free index found
- * then it requests in-depth growing
+ * ext4_ext_create_new_leaf:
+ * finds empty index and adds new leaf.
+ * if no free index is found, then it requests in-depth growing.
*/
static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
@@ -912,8 +932,8 @@ repeat:
curp--;
}

- /* we use already allocated block for index block
- * so, subsequent data blocks should be contigoues */
+ /* we use already allocated block for index block,
+ * so subsequent data blocks should be contiguous */
if (EXT_HAS_FREE_INDEX(curp)) {
/* if we found index with free entry, then use that
* entry: create all needed subtree and add new leaf */
@@ -943,12 +963,12 @@ repeat:
}

/*
- * only first (depth 0 -> 1) produces free space
- * in all other cases we have to split growed tree
+ * only first (depth 0 -> 1) produces free space;
+ * in all other cases we have to split the grown tree
*/
depth = ext_depth(inode);
if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
- /* now we need split */
+ /* now we need to split */
goto repeat;
}
}
@@ -958,10 +978,11 @@ out:
}

/*
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK
- * NOTE: it consider block number from index entry as
- * allocated block. thus, index entries have to be consistent
- * with leafs
+ * ext4_ext_next_allocated_block:
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
+ * NOTE: it considers block number from index entry as
+ * allocated block. Thus, index entries have to be consistent
+ * with leaves.
*/
static unsigned long
ext4_ext_next_allocated_block(struct ext4_ext_path *path)
@@ -993,6 +1014,7 @@ ext4_ext_next_allocated_block(struct ext
}

/*
+ * ext4_ext_next_leaf_block:
* returns first allocated block from next leaf or EXT_MAX_BLOCK
*/
static unsigned ext4_ext_next_leaf_block(struct inode *inode,
@@ -1021,8 +1043,9 @@ static unsigned ext4_ext_next_leaf_block
}

/*
- * if leaf gets modified and modified extent is first in the leaf
- * then we have to correct all indexes above
+ * ext4_ext_correct_indexes:
+ * if leaf gets modified and modified extent is first in the leaf,
+ * then we have to correct all indexes above.
* TODO: do we need to correct tree in all cases?
*/
int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
@@ -1050,7 +1073,7 @@ int ext4_ext_correct_indexes(handle_t *h
}

/*
- * TODO: we need correction if border is smaller then current one
+ * TODO: we need correction if border is smaller than current one
*/
k = depth - 1;
border = path[depth].p_ext->ee_block;
@@ -1085,7 +1108,7 @@ ext4_can_extents_be_merged(struct inode
/*
* To allow future support for preallocated extents to be added
* as an RO_COMPAT feature, refuse to merge to extents if
- * can result in the top bit of ee_len being set
+ * this can result in the top bit of ee_len being set.
*/
if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
return 0;
@@ -1100,9 +1123,10 @@ ext4_can_extents_be_merged(struct inode
}

/*
- * this routine tries to merge requsted extent into the existing
- * extent or inserts requested extent as new one into the tree,
- * creating new leaf in no-space case
+ * ext4_ext_insert_extent:
+ * tries to merge requsted extent into the existing extent or
+ * inserts requested extent as new one into the tree,
+ * creating new leaf in the no-space case.
*/
int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path,
@@ -1163,8 +1187,8 @@ repeat:
}

/*
- * there is no free space in found leaf
- * we're gonna add new leaf in the tree
+ * There is no free space in the found leaf.
+ * We're gonna add a new leaf in the tree.
*/
err = ext4_ext_create_new_leaf(handle, inode, path, newext);
if (err)
@@ -1377,7 +1401,8 @@ ext4_ext_put_in_cache(struct inode *inod
}

/*
- * this routine calculate boundaries of the gap requested block fits into
+ * ext4_ext_put_gap_in_cache:
+ * calculate boundaries of the gap that the requested block fits into
* and cache this gap
*/
static inline void
@@ -1452,9 +1477,10 @@ ext4_ext_in_cache(struct inode *inode, u
}

/*
- * routine removes index from the index block
- * it's used in truncate case only. thus all requests are for
- * last index in the block only
+ * ext4_ext_rm_idx:
+ * removes index from the index block.
+ * It's used in truncate case only, thus all requests are for
+ * last index in the block only.
*/
int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
@@ -1480,11 +1506,12 @@ int ext4_ext_rm_idx(handle_t *handle, st
}

/*
- * This routine returns max. credits extent tree can consume.
+ * ext4_ext_calc_credits_for_insert:
+ * This routine returns max. credits that the extent tree can consume.
* It should be OK for low-performance paths like ->writepage()
- * To allow many writing process to fit a single transaction,
- * caller should calculate credits under truncate_mutex and
- * pass actual path.
+ * To allow many writing processes to fit into a single transaction,
+ * the caller should calculate credits under truncate_mutex and
+ * pass the actual path.
*/
int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
struct ext4_ext_path *path)
@@ -1500,9 +1527,9 @@ int inline ext4_ext_calc_credits_for_ins
}

/*
- * given 32bit logical block (4294967296 blocks), max. tree
+ * given 32-bit logical block (4294967296 blocks), max. tree
* can be 4 levels in depth -- 4 * 340^4 == 53453440000.
- * let's also add one more level for imbalance.
+ * Let's also add one more level for imbalance.
*/
depth = 5;

@@ -1510,13 +1537,13 @@ int inline ext4_ext_calc_credits_for_ins
needed = 2;

/*
- * tree can be full, so it'd need to grow in depth:
+ * tree can be full, so it would need to grow in depth:
* allocation + old root + new root
*/
needed += 2 + 1 + 1;

/*
- * Index split can happen, we'd need:
+ * Index split can happen, we would need:
* allocate intermediate indexes (bitmap + group)
* + change two blocks at each level, but root (already included)
*/
@@ -1634,7 +1661,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
BUG_ON(b != ex_ee_block + ex_ee_len - 1);
}

- /* at present, extent can't cross block group */
+ /* at present, extent can't cross block group: */
/* leaf + bitmap + group desc + sb + inode */
credits = 5;
if (ex == EXT_FIRST_EXTENT(eh)) {
@@ -1660,7 +1687,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
goto out;

if (num == 0) {
- /* this extent is removed entirely mark slot unused */
+ /* this extent is removed; mark slot entirely unused */
ext4_ext_store_pblock(ex, 0);
eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
}
@@ -1692,7 +1719,8 @@ out:
}

/*
- * returns 1 if current index have to be freed (even partial)
+ * ext4_ext_more_to_rm:
+ * returns 1 if current index has to be freed (even partial)
*/
static int inline
ext4_ext_more_to_rm(struct ext4_ext_path *path)
@@ -1703,7 +1731,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path
return 0;

/*
- * if truncate on deeper level happened it it wasn't partial
+ * if truncate on deeper level happened, it wasn't partial,
* so we have to consider current index for truncation
*/
if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
@@ -1729,8 +1757,8 @@ int ext4_ext_remove_space(struct inode *
ext4_ext_invalidate_cache(inode);

/*
- * we start scanning from right side freeing all the blocks
- * after i_size and walking into the deep
+ * We start scanning from right side, freeing all the blocks
+ * after i_size and walking into the tree depth-wise.
*/
path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
if (path == NULL) {
@@ -1749,7 +1777,7 @@ int ext4_ext_remove_space(struct inode *
if (i == depth) {
/* this is leaf block */
err = ext4_ext_rm_leaf(handle, inode, path, start);
- /* root level have p_bh == NULL, brelse() eats this */
+ /* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
path[i].p_bh = NULL;
i--;
@@ -1772,14 +1800,14 @@ int ext4_ext_remove_space(struct inode *
BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);

if (!path[i].p_idx) {
- /* this level hasn't touched yet */
+ /* this level hasn't been touched yet */
path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
ext_debug("init index ptr: hdr 0x%p, num %d\n",
path[i].p_hdr,
le16_to_cpu(path[i].p_hdr->eh_entries));
} else {
- /* we've already was here, see at next index */
+ /* we were already here, see at next index */
path[i].p_idx--;
}

@@ -1799,19 +1827,19 @@ int ext4_ext_remove_space(struct inode *
break;
}

- /* put actual number of indexes to know is this
- * number got changed at the next iteration */
+ /* save actual number of indexes since this
+ * number is changed at the next iteration */
path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
i++;
} else {
- /* we finish processing this index, go up */
+ /* we finished processing this index, go up */
if (path[i].p_hdr->eh_entries == 0 && i > 0) {
- /* index is empty, remove it
+ /* index is empty, remove it;
* handle must be already prepared by the
* truncatei_leaf() */
err = ext4_ext_rm_idx(handle, inode, path + i);
}
- /* root level have p_bh == NULL, brelse() eats this */
+ /* root level has p_bh == NULL, brelse() eats this */
brelse(path[i].p_bh);
path[i].p_bh = NULL;
i--;
@@ -1822,8 +1850,8 @@ int ext4_ext_remove_space(struct inode *
/* TODO: flexible tree reduction should be here */
if (path->p_hdr->eh_entries == 0) {
/*
- * truncate to zero freed all the tree
- * so, we need to correct eh_depth
+ * truncate to zero freed all the tree,
+ * so we need to correct eh_depth
*/
err = ext4_ext_get_access(handle, inode, path);
if (err == 0) {
@@ -1911,7 +1939,7 @@ int ext4_ext_get_blocks(handle_t *handle
if (goal == EXT4_EXT_CACHE_GAP) {
if (!create) {
/* block isn't allocated yet and
- * user don't want to allocate it */
+ * user doesn't want to allocate it */
goto out2;
}
/* we should allocate requested block */
@@ -1920,7 +1948,7 @@ int ext4_ext_get_blocks(handle_t *handle
newblock = iblock
- le32_to_cpu(newex.ee_block)
+ ext_pblock(&newex);
- /* number of remain blocks in the extent */
+ /* number of remaining blocks in the extent */
allocated = le16_to_cpu(newex.ee_len) -
(iblock - le32_to_cpu(newex.ee_block));
goto out;
@@ -1940,8 +1968,8 @@ int ext4_ext_get_blocks(handle_t *handle
depth = ext_depth(inode);

/*
- * consistent leaf must not be empty
- * this situations is possible, though, _during_ tree modification
+ * consistent leaf must not be empty;
+ * this situation is possible, though, _during_ tree modification;
* this is why assert can't be put in ext4_ext_find_extent()
*/
BUG_ON(path[depth].p_ext == NULL && depth != 0);
@@ -1959,10 +1987,10 @@ int ext4_ext_get_blocks(handle_t *handle
*/
if (ee_len > EXT_MAX_LEN)
goto out2;
- /* if found exent covers block, simple return it */
+ /* if found extent covers block, simply return it */
if (iblock >= ee_block && iblock < ee_block + ee_len) {
newblock = iblock - ee_block + ee_start;
- /* number of remain blocks in the extent */
+ /* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
ee_block, ee_len, newblock);
@@ -1973,17 +2001,18 @@ int ext4_ext_get_blocks(handle_t *handle
}

/*
- * requested block isn't allocated yet
+ * requested block isn't allocated yet;
* we couldn't try to create block if create flag is zero
*/
if (!create) {
- /* put just found gap into cache to speedup subsequest reqs */
+ /* put just found gap into cache to speed up
+ * subsequent requests */
ext4_ext_put_gap_in_cache(inode, path, iblock);
goto out2;
}
/*
* Okay, we need to do block allocation. Lazily initialize the block
- * allocation info here if necessary
+ * allocation info here if necessary.
*/
if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
ext4_init_block_alloc_info(inode);
@@ -2061,9 +2090,9 @@ void ext4_ext_truncate(struct inode * in
ext4_ext_invalidate_cache(inode);

/*
- * TODO: optimization is possible here
- * probably we need not scaning at all,
- * because page truncation is enough
+ * TODO: optimization is possible here.
+ * Probably we need not scan at all,
+ * because page truncation is enough.
*/
if (ext4_orphan_add(handle, inode))
goto out_stop;
@@ -2077,13 +2106,13 @@ void ext4_ext_truncate(struct inode * in
err = ext4_ext_remove_space(inode, last_block);

/* In a multi-transaction truncate, we only make the final
- * transaction synchronous */
+ * transaction synchronous. */
if (IS_SYNC(inode))
handle->h_sync = 1;

out_stop:
/*
- * If this was a simple ftruncate(), and the file will remain alive
+ * If this was a simple ftruncate() and the file will remain alive,
* then we need to clear up the orphan record which we created above.
* However, if this was a real unlink then we were called by
* ext4_delete_inode(), and we allow that function to clean up the
@@ -2097,7 +2126,8 @@ out_stop:
}

/*
- * this routine calculate max number of blocks we could modify
+ * ext4_ext_writepage_trans_blocks:
+ * calculate max number of blocks we could modify
* in order to allocate new block for an inode
*/
int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
@@ -2106,7 +2136,7 @@ int ext4_ext_writepage_trans_blocks(stru

needed = ext4_ext_calc_credits_for_insert(inode, NULL);

- /* caller want to allocate num blocks, but note it includes sb */
+ /* caller wants to allocate num blocks, but note it includes sb */
needed = needed * num - (num - 1);

#ifdef CONFIG_QUOTA

2006-08-11 21:06:13

by Alex Tomas

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

thanks a lot Randy

> On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:
> See if this helps.
> Patch applies on top of all ext4 patches from
> http://ext2.sourceforge.net/48bitext3/patches/latest/.

2006-08-11 21:50:09

by Mingming Cao

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Fri, 2006-08-11 at 13:57 -0700, Randy.Dunlap wrote:
> On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:
>
> > AM> - The existing comments could benefit from some rework by a
> > AM> native English speaker.
> >
> > could someone assist here, please?
>
> See if this helps.
> Patch applies on top of all ext4 patches from
> http://ext2.sourceforge.net/48bitext3/patches/latest/.
>
> ---
> From: Randy Dunlap <[email protected]>
>
> Clean up comments in ext4-extents patch.
>
> Signed-off-by: Randy Dunlap <[email protected]>

Thanks, Randy, I added this to the queue.

> ---
> fs/ext4/extents.c | 226 ++++++++++++++++++++++------------------
> include/linux/ext4_fs_extents.h | 54 ++++-----
> include/linux/ext4_jbd2.h | 4
> 3 files changed, 157 insertions(+), 127 deletions(-)
>
> --- linux-2618-rc4-ext4.orig/include/linux/ext4_jbd2.h
> +++ linux-2618-rc4-ext4/include/linux/ext4_jbd2.h
> @@ -28,8 +28,8 @@
> * indirection blocks, the group and superblock summaries, and the data
> * block to complete the transaction.
> *
> - * For extents-enabled fs we may have to allocate and modify upto
> - * 5 levels of tree + root which is stored in inode. */
> + * For extents-enabled fs we may have to allocate and modify up to
> + * 5 levels of tree + root which are stored in the inode. */
>
> #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
> (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
> --- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_extents.h
> +++ linux-2618-rc4-ext4/include/linux/ext4_fs_extents.h
> @@ -22,29 +22,29 @@
> #include <linux/ext4_fs.h>
>
> /*
> - * with AGRESSIVE_TEST defined capacity of index/leaf blocks
> - * become very little, so index split, in-depth growing and
> - * other hard changes happens much more often
> - * this is for debug purposes only
> + * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks
> + * becomes very small, so index split, in-depth growing and
> + * other hard changes happen much more often.
> + * This is for debug purposes only.
> */
> #define AGRESSIVE_TEST_
>
> /*
> - * with EXTENTS_STATS defined number of blocks and extents
> - * are collected in truncate path. they'll be showed at
> - * umount time
> + * With EXTENTS_STATS defined, the number of blocks and extents
> + * are collected in the truncate path. They'll be shown at
> + * umount time.
> */
> #define EXTENTS_STATS__
>
> /*
> - * if CHECK_BINSEARCH defined, then results of binary search
> - * will be checked by linear search
> + * If CHECK_BINSEARCH is defined, then the results of the binary search
> + * will also be checked by linear search.
> */
> #define CHECK_BINSEARCH__
>
> /*
> - * if EXT_DEBUG is defined you can use 'extdebug' mount option
> - * to get lots of info what's going on
> + * If EXT_DEBUG is defined you can use the 'extdebug' mount option
> + * to get lots of info about what's going on.
> */
> #define EXT_DEBUG__
> #ifdef EXT_DEBUG
> @@ -54,58 +54,58 @@
> #endif
>
> /*
> - * if EXT_STATS is defined then stats numbers are collected
> - * these number will be displayed at umount time
> + * If EXT_STATS is defined then stats numbers are collected.
> + * These number will be displayed at umount time.
> */
> #define EXT_STATS_
>
>
> /*
> - * ext4_inode has i_block array (60 bytes total)
> - * first 12 bytes store ext4_extent_header
> - * the remain stores array of ext4_extent
> + * ext4_inode has i_block array (60 bytes total).
> + * The first 12 bytes store ext4_extent_header;
> + * the remainder stores an array of ext4_extent.
> */
>
> /*
> - * this is extent on-disk structure
> - * it's used at the bottom of the tree
> + * This is the extent on-disk structure.
> + * It's used at the bottom of the tree.
> */
> struct ext4_extent {
> __le32 ee_block; /* first logical block extent covers */
> __le16 ee_len; /* number of blocks covered by extent */
> __le16 ee_start_hi; /* high 16 bits of physical block */
> - __le32 ee_start; /* low 32 bigs of physical block */
> + __le32 ee_start; /* low 32 bits of physical block */
> };
>
> /*
> - * this is index on-disk structure
> - * it's used at all the levels, but the bottom
> + * This is index on-disk structure.
> + * It's used at all the levels except the bottom.
> */
> struct ext4_extent_idx {
> __le32 ei_block; /* index covers logical blocks from 'block' */
> __le32 ei_leaf; /* pointer to the physical block of the next *
> - * level. leaf or next index could bet here */
> + * level. leaf or next index could be there */
> __le16 ei_leaf_hi; /* high 16 bits of physical block */
> __u16 ei_unused;
> };
>
> /*
> - * each block (leaves and indexes), even inode-stored has header
> + * Each block (leaves and indexes), even inode-stored has header.
> */
> struct ext4_extent_header {
> __le16 eh_magic; /* probably will support different formats */
> __le16 eh_entries; /* number of valid entries */
> __le16 eh_max; /* capacity of store in entries */
> - __le16 eh_depth; /* has tree real underlaying blocks? */
> + __le16 eh_depth; /* has tree real underlying blocks? */
> __le32 eh_generation; /* generation of the tree */
> };
>
> #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
>
> /*
> - * array of ext4_ext_path contains path to some extent
> - * creation/lookup routines use it for traversal/splitting/etc
> - * truncate uses it to simulate recursive walking
> + * Array of ext4_ext_path contains path to some extent.
> + * Creation/lookup routines use it for traversal/splitting/etc.
> + * Truncate uses it to simulate recursive walking.
> */
> struct ext4_ext_path {
> ext4_fsblk_t p_block;
> --- linux-2618-rc4-ext4.orig/fs/ext4/extents.c
> +++ linux-2618-rc4-ext4/fs/ext4/extents.c
> @@ -44,7 +44,10 @@
> #include <asm/uaccess.h>
>
>
> -/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
> +/*
> + * ext_pblock:
> + * combine low and high parts of physical block number into ext4_fsblk_t
> + */
> static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
> {
> ext4_fsblk_t block;
> @@ -55,7 +58,10 @@ static inline ext4_fsblk_t ext_pblock(st
> return block;
> }
>
> -/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
> +/*
> + * idx_pblock:
> + * combine low and high parts of a leaf physical block number into ext4_fsblk_t
> + */
> static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
> {
> ext4_fsblk_t block;
> @@ -66,7 +72,11 @@ static inline ext4_fsblk_t idx_pblock(st
> return block;
> }
>
> -/* the routine stores large phys. blocknr into extent breaking it into parts */
> +/*
> + * ext4_ext_store_pblock:
> + * stores a large physical block number into an extent struct,
> + * breaking it into parts
> + */
> static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
> {
> ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
> @@ -74,7 +84,11 @@ static inline void ext4_ext_store_pblock
> ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
> }
>
> -/* the routine stores large phys. blocknr into index breaking it into parts */
> +/*
> + * ext4_idx_store_pblock:
> + * stores a large physical block number into an index struct,
> + * breaking it into parts
> + */
> static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
> {
> ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
> @@ -179,8 +193,8 @@ static ext4_fsblk_t ext4_ext_find_goal(s
> if ((ex = path[depth].p_ext))
> return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
>
> - /* it looks index is empty
> - * try to find starting from index itself */
> + /* it looks like index is empty;
> + * try to find starting block from index itself */
> if (path[depth].p_bh)
> return path[depth].p_bh->b_blocknr;
> }
> @@ -317,7 +331,8 @@ static void ext4_ext_drop_refs(struct ex
> }
>
> /*
> - * binary search for closest index by given block
> + * ext4_ext_binsearch_idx:
> + * binary search for the closest index of the given block
> */
> static void
> ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
> @@ -375,7 +390,8 @@ ext4_ext_binsearch_idx(struct inode *ino
> }
>
> /*
> - * binary search for closest extent by given block
> + * ext4_ext_binsearch:
> + * binary search for closest extent of the given block
> */
> static void
> ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
> @@ -388,8 +404,8 @@ ext4_ext_binsearch(struct inode *inode,
>
> if (eh->eh_entries == 0) {
> /*
> - * this leaf is empty yet:
> - * we get such a leaf in split/add case
> + * this leaf is empty:
> + * we get such a leaf in split/add case
> */
> return;
> }
> @@ -520,8 +536,9 @@ err:
> }
>
> /*
> - * insert new index [logical;ptr] into the block at cupr
> - * it check where to insert: before curp or after curp
> + * ext4_ext_insert_index:
> + * insert new index [@logical;@ptr] into the block at @curp;
> + * check where to insert: before @curp or after @curp
> */
> static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *curp,
> @@ -574,13 +591,14 @@ static int ext4_ext_insert_index(handle_
> }
>
> /*
> - * routine inserts new subtree into the path, using free index entry
> - * at depth 'at:
> - * - allocates all needed blocks (new leaf and all intermediate index blocks)
> - * - makes decision where to split
> - * - moves remaining extens and index entries (right to the split point)
> - * into the newly allocated blocks
> - * - initialize subtree
> + * ext4_ext_split:
> + * inserts new subtree into the path, using free index entry
> + * at depth @at:
> + * - allocates all needed blocks (new leaf and all intermediate index blocks)
> + * - makes decision where to split
> + * - moves remaining extents and index entries (right to the split point)
> + * into the newly allocated blocks
> + * - initializes subtree
> */
> static int ext4_ext_split(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path,
> @@ -598,14 +616,14 @@ static int ext4_ext_split(handle_t *hand
> int err = 0;
>
> /* make decision: where to split? */
> - /* FIXME: now desicion is simplest: at current extent */
> + /* FIXME: now decision is simplest: at current extent */
>
> - /* if current leaf will be splitted, then we should use
> + /* if current leaf will be split, then we should use
> * border from split point */
> BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
> if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
> border = path[depth].p_ext[1].ee_block;
> - ext_debug("leaf will be splitted."
> + ext_debug("leaf will be split."
> " next leaf starts at %d\n",
> le32_to_cpu(border));
> } else {
> @@ -616,16 +634,16 @@ static int ext4_ext_split(handle_t *hand
> }
>
> /*
> - * if error occurs, then we break processing
> - * and turn filesystem read-only. so, index won't
> + * If error occurs, then we break processing
> + * and mark filesystem read-only. index won't
> * be inserted and tree will be in consistent
> - * state. next mount will repair buffers too
> + * state. Next mount will repair buffers too.
> */
>
> /*
> - * get array to track all allocated blocks
> - * we need this to handle errors and free blocks
> - * upon them
> + * Get array to track all allocated blocks.
> + * We need this to handle errors and free blocks
> + * upon them.
> */
> ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
> if (!ablocks)
> @@ -661,7 +679,7 @@ static int ext4_ext_split(handle_t *hand
> neh->eh_depth = 0;
> ex = EXT_FIRST_EXTENT(neh);
>
> - /* move remain of path[depth] to the new leaf */
> + /* move remainder of path[depth] to the new leaf */
> BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
> /* start copy from next extent */
> /* TODO: we could do it by single memmove */
> @@ -813,11 +831,12 @@ cleanup:
> }
>
> /*
> - * routine implements tree growing procedure:
> - * - allocates new block
> - * - moves top-level data (index block or leaf) into the new block
> - * - initialize new top-level, creating index that points to the
> - * just created block
> + * ext4_ext_grow_indepth:
> + * implements tree growing procedure:
> + * - allocates new block
> + * - moves top-level data (index block or leaf) into the new block
> + * - initializes new top-level, creating index that points to the
> + * just created block
> */
> static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path,
> @@ -892,8 +911,9 @@ out:
> }
>
> /*
> - * routine finds empty index and adds new leaf. if no free index found
> - * then it requests in-depth growing
> + * ext4_ext_create_new_leaf:
> + * finds empty index and adds new leaf.
> + * if no free index is found, then it requests in-depth growing.
> */
> static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path,
> @@ -912,8 +932,8 @@ repeat:
> curp--;
> }
>
> - /* we use already allocated block for index block
> - * so, subsequent data blocks should be contigoues */
> + /* we use already allocated block for index block,
> + * so subsequent data blocks should be contiguous */
> if (EXT_HAS_FREE_INDEX(curp)) {
> /* if we found index with free entry, then use that
> * entry: create all needed subtree and add new leaf */
> @@ -943,12 +963,12 @@ repeat:
> }
>
> /*
> - * only first (depth 0 -> 1) produces free space
> - * in all other cases we have to split growed tree
> + * only first (depth 0 -> 1) produces free space;
> + * in all other cases we have to split the grown tree
> */
> depth = ext_depth(inode);
> if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
> - /* now we need split */
> + /* now we need to split */
> goto repeat;
> }
> }
> @@ -958,10 +978,11 @@ out:
> }
>
> /*
> - * returns allocated block in subsequent extent or EXT_MAX_BLOCK
> - * NOTE: it consider block number from index entry as
> - * allocated block. thus, index entries have to be consistent
> - * with leafs
> + * ext4_ext_next_allocated_block:
> + * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
> + * NOTE: it considers block number from index entry as
> + * allocated block. Thus, index entries have to be consistent
> + * with leaves.
> */
> static unsigned long
> ext4_ext_next_allocated_block(struct ext4_ext_path *path)
> @@ -993,6 +1014,7 @@ ext4_ext_next_allocated_block(struct ext
> }
>
> /*
> + * ext4_ext_next_leaf_block:
> * returns first allocated block from next leaf or EXT_MAX_BLOCK
> */
> static unsigned ext4_ext_next_leaf_block(struct inode *inode,
> @@ -1021,8 +1043,9 @@ static unsigned ext4_ext_next_leaf_block
> }
>
> /*
> - * if leaf gets modified and modified extent is first in the leaf
> - * then we have to correct all indexes above
> + * ext4_ext_correct_indexes:
> + * if leaf gets modified and modified extent is first in the leaf,
> + * then we have to correct all indexes above.
> * TODO: do we need to correct tree in all cases?
> */
> int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
> @@ -1050,7 +1073,7 @@ int ext4_ext_correct_indexes(handle_t *h
> }
>
> /*
> - * TODO: we need correction if border is smaller then current one
> + * TODO: we need correction if border is smaller than current one
> */
> k = depth - 1;
> border = path[depth].p_ext->ee_block;
> @@ -1085,7 +1108,7 @@ ext4_can_extents_be_merged(struct inode
> /*
> * To allow future support for preallocated extents to be added
> * as an RO_COMPAT feature, refuse to merge to extents if
> - * can result in the top bit of ee_len being set
> + * this can result in the top bit of ee_len being set.
> */
> if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
> return 0;
> @@ -1100,9 +1123,10 @@ ext4_can_extents_be_merged(struct inode
> }
>
> /*
> - * this routine tries to merge requsted extent into the existing
> - * extent or inserts requested extent as new one into the tree,
> - * creating new leaf in no-space case
> + * ext4_ext_insert_extent:
> + * tries to merge requsted extent into the existing extent or
> + * inserts requested extent as new one into the tree,
> + * creating new leaf in the no-space case.
> */
> int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path,
> @@ -1163,8 +1187,8 @@ repeat:
> }
>
> /*
> - * there is no free space in found leaf
> - * we're gonna add new leaf in the tree
> + * There is no free space in the found leaf.
> + * We're gonna add a new leaf in the tree.
> */
> err = ext4_ext_create_new_leaf(handle, inode, path, newext);
> if (err)
> @@ -1377,7 +1401,8 @@ ext4_ext_put_in_cache(struct inode *inod
> }
>
> /*
> - * this routine calculate boundaries of the gap requested block fits into
> + * ext4_ext_put_gap_in_cache:
> + * calculate boundaries of the gap that the requested block fits into
> * and cache this gap
> */
> static inline void
> @@ -1452,9 +1477,10 @@ ext4_ext_in_cache(struct inode *inode, u
> }
>
> /*
> - * routine removes index from the index block
> - * it's used in truncate case only. thus all requests are for
> - * last index in the block only
> + * ext4_ext_rm_idx:
> + * removes index from the index block.
> + * It's used in truncate case only, thus all requests are for
> + * last index in the block only.
> */
> int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
> struct ext4_ext_path *path)
> @@ -1480,11 +1506,12 @@ int ext4_ext_rm_idx(handle_t *handle, st
> }
>
> /*
> - * This routine returns max. credits extent tree can consume.
> + * ext4_ext_calc_credits_for_insert:
> + * This routine returns max. credits that the extent tree can consume.
> * It should be OK for low-performance paths like ->writepage()
> - * To allow many writing process to fit a single transaction,
> - * caller should calculate credits under truncate_mutex and
> - * pass actual path.
> + * To allow many writing processes to fit into a single transaction,
> + * the caller should calculate credits under truncate_mutex and
> + * pass the actual path.
> */
> int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
> struct ext4_ext_path *path)
> @@ -1500,9 +1527,9 @@ int inline ext4_ext_calc_credits_for_ins
> }
>
> /*
> - * given 32bit logical block (4294967296 blocks), max. tree
> + * given 32-bit logical block (4294967296 blocks), max. tree
> * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
> - * let's also add one more level for imbalance.
> + * Let's also add one more level for imbalance.
> */
> depth = 5;
>
> @@ -1510,13 +1537,13 @@ int inline ext4_ext_calc_credits_for_ins
> needed = 2;
>
> /*
> - * tree can be full, so it'd need to grow in depth:
> + * tree can be full, so it would need to grow in depth:
> * allocation + old root + new root
> */
> needed += 2 + 1 + 1;
>
> /*
> - * Index split can happen, we'd need:
> + * Index split can happen, we would need:
> * allocate intermediate indexes (bitmap + group)
> * + change two blocks at each level, but root (already included)
> */
> @@ -1634,7 +1661,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
> BUG_ON(b != ex_ee_block + ex_ee_len - 1);
> }
>
> - /* at present, extent can't cross block group */
> + /* at present, extent can't cross block group: */
> /* leaf + bitmap + group desc + sb + inode */
> credits = 5;
> if (ex == EXT_FIRST_EXTENT(eh)) {
> @@ -1660,7 +1687,7 @@ ext4_ext_rm_leaf(handle_t *handle, struc
> goto out;
>
> if (num == 0) {
> - /* this extent is removed entirely mark slot unused */
> + /* this extent is removed; mark slot entirely unused */
> ext4_ext_store_pblock(ex, 0);
> eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
> }
> @@ -1692,7 +1719,8 @@ out:
> }
>
> /*
> - * returns 1 if current index have to be freed (even partial)
> + * ext4_ext_more_to_rm:
> + * returns 1 if current index has to be freed (even partial)
> */
> static int inline
> ext4_ext_more_to_rm(struct ext4_ext_path *path)
> @@ -1703,7 +1731,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path
> return 0;
>
> /*
> - * if truncate on deeper level happened it it wasn't partial
> + * if truncate on deeper level happened, it wasn't partial,
> * so we have to consider current index for truncation
> */
> if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
> @@ -1729,8 +1757,8 @@ int ext4_ext_remove_space(struct inode *
> ext4_ext_invalidate_cache(inode);
>
> /*
> - * we start scanning from right side freeing all the blocks
> - * after i_size and walking into the deep
> + * We start scanning from right side, freeing all the blocks
> + * after i_size and walking into the tree depth-wise.
> */
> path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
> if (path == NULL) {
> @@ -1749,7 +1777,7 @@ int ext4_ext_remove_space(struct inode *
> if (i == depth) {
> /* this is leaf block */
> err = ext4_ext_rm_leaf(handle, inode, path, start);
> - /* root level have p_bh == NULL, brelse() eats this */
> + /* root level has p_bh == NULL, brelse() eats this */
> brelse(path[i].p_bh);
> path[i].p_bh = NULL;
> i--;
> @@ -1772,14 +1800,14 @@ int ext4_ext_remove_space(struct inode *
> BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
>
> if (!path[i].p_idx) {
> - /* this level hasn't touched yet */
> + /* this level hasn't been touched yet */
> path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
> path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
> ext_debug("init index ptr: hdr 0x%p, num %d\n",
> path[i].p_hdr,
> le16_to_cpu(path[i].p_hdr->eh_entries));
> } else {
> - /* we've already was here, see at next index */
> + /* we were already here, see at next index */
> path[i].p_idx--;
> }
>
> @@ -1799,19 +1827,19 @@ int ext4_ext_remove_space(struct inode *
> break;
> }
>
> - /* put actual number of indexes to know is this
> - * number got changed at the next iteration */
> + /* save actual number of indexes since this
> + * number is changed at the next iteration */
> path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
> i++;
> } else {
> - /* we finish processing this index, go up */
> + /* we finished processing this index, go up */
> if (path[i].p_hdr->eh_entries == 0 && i > 0) {
> - /* index is empty, remove it
> + /* index is empty, remove it;
> * handle must be already prepared by the
> * truncatei_leaf() */
> err = ext4_ext_rm_idx(handle, inode, path + i);
> }
> - /* root level have p_bh == NULL, brelse() eats this */
> + /* root level has p_bh == NULL, brelse() eats this */
> brelse(path[i].p_bh);
> path[i].p_bh = NULL;
> i--;
> @@ -1822,8 +1850,8 @@ int ext4_ext_remove_space(struct inode *
> /* TODO: flexible tree reduction should be here */
> if (path->p_hdr->eh_entries == 0) {
> /*
> - * truncate to zero freed all the tree
> - * so, we need to correct eh_depth
> + * truncate to zero freed all the tree,
> + * so we need to correct eh_depth
> */
> err = ext4_ext_get_access(handle, inode, path);
> if (err == 0) {
> @@ -1911,7 +1939,7 @@ int ext4_ext_get_blocks(handle_t *handle
> if (goal == EXT4_EXT_CACHE_GAP) {
> if (!create) {
> /* block isn't allocated yet and
> - * user don't want to allocate it */
> + * user doesn't want to allocate it */
> goto out2;
> }
> /* we should allocate requested block */
> @@ -1920,7 +1948,7 @@ int ext4_ext_get_blocks(handle_t *handle
> newblock = iblock
> - le32_to_cpu(newex.ee_block)
> + ext_pblock(&newex);
> - /* number of remain blocks in the extent */
> + /* number of remaining blocks in the extent */
> allocated = le16_to_cpu(newex.ee_len) -
> (iblock - le32_to_cpu(newex.ee_block));
> goto out;
> @@ -1940,8 +1968,8 @@ int ext4_ext_get_blocks(handle_t *handle
> depth = ext_depth(inode);
>
> /*
> - * consistent leaf must not be empty
> - * this situations is possible, though, _during_ tree modification
> + * consistent leaf must not be empty;
> + * this situation is possible, though, _during_ tree modification;
> * this is why assert can't be put in ext4_ext_find_extent()
> */
> BUG_ON(path[depth].p_ext == NULL && depth != 0);
> @@ -1959,10 +1987,10 @@ int ext4_ext_get_blocks(handle_t *handle
> */
> if (ee_len > EXT_MAX_LEN)
> goto out2;
> - /* if found exent covers block, simple return it */
> + /* if found extent covers block, simply return it */
> if (iblock >= ee_block && iblock < ee_block + ee_len) {
> newblock = iblock - ee_block + ee_start;
> - /* number of remain blocks in the extent */
> + /* number of remaining blocks in the extent */
> allocated = ee_len - (iblock - ee_block);
> ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
> ee_block, ee_len, newblock);
> @@ -1973,17 +2001,18 @@ int ext4_ext_get_blocks(handle_t *handle
> }
>
> /*
> - * requested block isn't allocated yet
> + * requested block isn't allocated yet;
> * we couldn't try to create block if create flag is zero
> */
> if (!create) {
> - /* put just found gap into cache to speedup subsequest reqs */
> + /* put just found gap into cache to speed up
> + * subsequent requests */
> ext4_ext_put_gap_in_cache(inode, path, iblock);
> goto out2;
> }
> /*
> * Okay, we need to do block allocation. Lazily initialize the block
> - * allocation info here if necessary
> + * allocation info here if necessary.
> */
> if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
> ext4_init_block_alloc_info(inode);
> @@ -2061,9 +2090,9 @@ void ext4_ext_truncate(struct inode * in
> ext4_ext_invalidate_cache(inode);
>
> /*
> - * TODO: optimization is possible here
> - * probably we need not scaning at all,
> - * because page truncation is enough
> + * TODO: optimization is possible here.
> + * Probably we need not scan at all,
> + * because page truncation is enough.
> */
> if (ext4_orphan_add(handle, inode))
> goto out_stop;
> @@ -2077,13 +2106,13 @@ void ext4_ext_truncate(struct inode * in
> err = ext4_ext_remove_space(inode, last_block);
>
> /* In a multi-transaction truncate, we only make the final
> - * transaction synchronous */
> + * transaction synchronous. */
> if (IS_SYNC(inode))
> handle->h_sync = 1;
>
> out_stop:
> /*
> - * If this was a simple ftruncate(), and the file will remain alive
> + * If this was a simple ftruncate() and the file will remain alive,
> * then we need to clear up the orphan record which we created above.
> * However, if this was a real unlink then we were called by
> * ext4_delete_inode(), and we allow that function to clean up the
> @@ -2097,7 +2126,8 @@ out_stop:
> }
>
> /*
> - * this routine calculate max number of blocks we could modify
> + * ext4_ext_writepage_trans_blocks:
> + * calculate max number of blocks we could modify
> * in order to allocate new block for an inode
> */
> int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
> @@ -2106,7 +2136,7 @@ int ext4_ext_writepage_trans_blocks(stru
>
> needed = ext4_ext_calc_credits_for_insert(inode, NULL);
>
> - /* caller want to allocate num blocks, but note it includes sb */
> + /* caller wants to allocate num blocks, but note it includes sb */
> needed = needed * num - (num - 1);
>
> #ifdef CONFIG_QUOTA
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2006-08-11 22:13:28

by Mingming Cao

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Thu, 2006-08-10 at 11:00 -0700, Andrew Morton wrote:
> On Thu, 10 Aug 2006 13:17:55 -0400
> Theodore Tso <[email protected]> wrote:
>
> > On Wed, Aug 09, 2006 at 11:39:40PM -0700, Andrew Morton wrote:
> > > - replace all brelse() calls with put_bh(). Because brelse() is
> > > old-fashioned, has a weird name and neelessly permits a NULL arg.
> > >
> > > In fact it would be beter to convert JBD and ext3 to put_bh before
> > > copying it all over.
> >
> > Wouldn't it be better to preserve in the source code history the
> > brelse->put_bh conversion? We can pour a huge number of changes in
> > ext4 before we submit, but I would have thought it would be easier for
> > everyone to see what is going on if we submit with just the minimal
> > changes, and then have patches that address concerns like this one at
> > a time.
> >
>
> I'd suggest that this be one of the cleanups which be done within ext3
> before taking the ext4 copy.


Looked at this today -- currently brelse() and __brelse() will check the
b_count before calling put_bh(). I think it's okay to replace put_bh()
without checking the b_count, as we always call put_bh() with get_bh
()....but want to confirm with you.

Mingming

2006-08-11 23:00:19

by Andrew Morton

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Fri, 11 Aug 2006 13:57:37 -0700
"Randy.Dunlap" <[email protected]> wrote:

> On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:
>
> > AM> - The existing comments could benefit from some rework by a
> > AM> native English speaker.
> >
> > could someone assist here, please?
>
> See if this helps.

Thanks, Randy. The Kconfig help text could do with some help too, if
you're feeling keen..

2006-08-11 23:17:10

by Andrew Morton

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Fri, 11 Aug 2006 15:13:09 -0700
Mingming Cao <[email protected]> wrote:

> On Thu, 2006-08-10 at 11:00 -0700, Andrew Morton wrote:
> > On Thu, 10 Aug 2006 13:17:55 -0400
> > Theodore Tso <[email protected]> wrote:
> >
> > > On Wed, Aug 09, 2006 at 11:39:40PM -0700, Andrew Morton wrote:
> > > > - replace all brelse() calls with put_bh(). Because brelse() is
> > > > old-fashioned, has a weird name and neelessly permits a NULL arg.
> > > >
> > > > In fact it would be beter to convert JBD and ext3 to put_bh before
> > > > copying it all over.
> > >
> > > Wouldn't it be better to preserve in the source code history the
> > > brelse->put_bh conversion? We can pour a huge number of changes in
> > > ext4 before we submit, but I would have thought it would be easier for
> > > everyone to see what is going on if we submit with just the minimal
> > > changes, and then have patches that address concerns like this one at
> > > a time.
> > >
> >
> > I'd suggest that this be one of the cleanups which be done within ext3
> > before taking the ext4 copy.
>
>
> Looked at this today -- currently brelse() and __brelse() will check the
> b_count before calling put_bh(). I think it's okay to replace put_bh()
> without checking the b_count, as we always call put_bh() with get_bh
> ()....but want to confirm with you.
>

I haven't seen that warning come out in a couple of years.

I guess that during development it would be useful to trap underflows in
put_bh().

Like this?


fs/buffer.c | 8 ++++++++
include/linux/buffer_head.h | 6 +-----
2 files changed, 9 insertions(+), 5 deletions(-)

diff -puN include/linux/buffer_head.h~put_bh-debug include/linux/buffer_head.h
--- a/include/linux/buffer_head.h~put_bh-debug
+++ a/include/linux/buffer_head.h
@@ -232,11 +232,7 @@ static inline void get_bh(struct buffer_
atomic_inc(&bh->b_count);
}

-static inline void put_bh(struct buffer_head *bh)
-{
- smp_mb__before_atomic_dec();
- atomic_dec(&bh->b_count);
-}
+void put_bh(struct buffer_head *bh);

static inline void brelse(struct buffer_head *bh)
{
diff -puN fs/buffer.c~put_bh-debug fs/buffer.c
--- a/fs/buffer.c~put_bh-debug
+++ a/fs/buffer.c
@@ -47,6 +47,14 @@ static void invalidate_bh_lrus(void);

#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)

+void put_bh(struct buffer_head *bh)
+{
+ WARN_ON(atomic_read(&bh->b_count <= 0);
+ smp_mb__before_atomic_dec();
+ atomic_dec(&bh->b_count);
+}
+EXPORT_SYMBOL(put_bh);
+
inline void
init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
{
_

2006-08-12 00:05:17

by Mingming Cao

[permalink] [raw]
Subject: [PATCH 2/2] ext3 and jbd cleanup: replace brelse() to put_bh

> > On Wed, Aug 09, 2006 at 11:39:40PM -0700, Andrew Morton wrote:
> > > - replace all brelse() calls with put_bh(). Because brelse() is
> > > old-fashioned, has a weird name and neelessly permits a NULL
arg.
> > >
> > > In fact it would be beter to convert JBD and ext3 to put_bh
before
> > > copying it all over.


Here is the patch.

Signed-Off-By: Mingming Cao<[email protected]>
---

linux-2.6.18-rc4-ming/fs/ext3/balloc.c | 18 +++++-----
linux-2.6.18-rc4-ming/fs/ext3/ialloc.c | 14 ++++----
linux-2.6.18-rc4-ming/fs/ext3/inode.c | 24 +++++++-------
linux-2.6.18-rc4-ming/fs/ext3/namei.c | 48 ++++++++++++++---------------
linux-2.6.18-rc4-ming/fs/ext3/resize.c | 34 ++++++++++----------
linux-2.6.18-rc4-ming/fs/ext3/super.c | 20 ++++++------
linux-2.6.18-rc4-ming/fs/ext3/xattr.c | 18 +++++-----
linux-2.6.18-rc4-ming/fs/jbd/checkpoint.c | 12 +++----
linux-2.6.18-rc4-ming/fs/jbd/commit.c | 10 +++---
linux-2.6.18-rc4-ming/fs/jbd/journal.c | 14 ++++----
linux-2.6.18-rc4-ming/fs/jbd/recovery.c | 30 +++++++++---------
linux-2.6.18-rc4-ming/fs/jbd/revoke.c | 6 +--
linux-2.6.18-rc4-ming/fs/jbd/transaction.c | 14 ++++----
13 files changed, 131 insertions(+), 131 deletions(-)

diff -puN fs/ext3/balloc.c~ext3_replace_brelse_to_put_bh fs/ext3/balloc.c
--- linux-2.6.18-rc4/fs/ext3/balloc.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:19.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/balloc.c 2006-08-11 16:01:06.549638409 -0700
@@ -351,7 +351,7 @@ do_more:
overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
count -= overflow;
}
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
@@ -407,7 +407,7 @@ do_more:
BUFFER_TRACE(debug_bh,
"No commited data in bitmap");
BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
- __brelse(debug_bh);
+ put_bh(debug_bh);
}
}
jbd_lock_bh_state(bitmap_bh);
@@ -485,7 +485,7 @@ do_more:
}
sb->s_dirt = 1;
error_return:
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
ext3_std_error(sb, err);
return;
}
@@ -1324,7 +1324,7 @@ retry:
if (free_blocks <= (windowsz/2))
continue;

- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
goto io_error;
@@ -1389,7 +1389,7 @@ allocated:
if (debug_bh) {
BUFFER_TRACE(debug_bh, "state when allocated");
BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
- brelse(debug_bh);
+ put_bh(debug_bh);
}
}
jbd_lock_bh_state(bitmap_bh);
@@ -1442,7 +1442,7 @@ allocated:
goto out;

*errp = 0;
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
DQUOT_FREE_BLOCK(inode, *count-num);
*count = num;
return ret_block;
@@ -1459,7 +1459,7 @@ out:
*/
if (!performed_allocation)
DQUOT_FREE_BLOCK(inode, *count);
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
return 0;
}

@@ -1494,7 +1494,7 @@ ext3_fsblk_t ext3_count_free_blocks(stru
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, i);
if (bitmap_bh == NULL)
continue;
@@ -1504,7 +1504,7 @@ ext3_fsblk_t ext3_count_free_blocks(stru
i, le16_to_cpu(gdp->bg_free_blocks_count), x);
bitmap_count += x;
}
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
printk("ext3_count_free_blocks: stored = "E3FSBLK
", computed = "E3FSBLK", "E3FSBLK"\n",
le32_to_cpu(es->s_free_blocks_count),
diff -puN fs/ext3/ialloc.c~ext3_replace_brelse_to_put_bh fs/ext3/ialloc.c
--- linux-2.6.18-rc4/fs/ext3/ialloc.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:19.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/ialloc.c 2006-08-11 16:01:06.655626188 -0700
@@ -185,7 +185,7 @@ void ext3_free_inode (handle_t *handle,
fatal = err;
sb->s_dirt = 1;
error_return:
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
ext3_std_error(sb, fatal);
}

@@ -468,7 +468,7 @@ struct inode *ext3_new_inode(handle_t *h
if (!gdp)
goto fail;

- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail;
@@ -625,7 +625,7 @@ out:
iput(inode);
ret = ERR_PTR(err);
really_out:
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
return ret;

fail_free_drop:
@@ -636,7 +636,7 @@ fail_drop:
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
return ERR_PTR(err);
}

@@ -692,7 +692,7 @@ struct inode *ext3_orphan_get(struct sup
inode = NULL;
}
out:
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
return inode;
}

@@ -715,7 +715,7 @@ unsigned long ext3_count_free_inodes (st
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
@@ -725,7 +725,7 @@ unsigned long ext3_count_free_inodes (st
i, le16_to_cpu(gdp->bg_free_inodes_count), x);
bitmap_count += x;
}
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
return desc_count;
diff -puN fs/ext3/inode.c~ext3_replace_brelse_to_put_bh fs/ext3/inode.c
--- linux-2.6.18-rc4/fs/ext3/inode.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:19.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/inode.c 2006-08-11 16:01:06.748615465 -0700
@@ -378,7 +378,7 @@ static Indirect *ext3_get_branch(struct
return NULL;

changed:
- brelse(bh);
+ put_bh(bh);
*err = -EAGAIN;
goto no_block;
failure:
@@ -624,7 +624,7 @@ static int ext3_alloc_branch(handle_t *h
err = ext3_journal_get_create_access(handle, bh);
if (err) {
unlock_buffer(bh);
- brelse(bh);
+ put_bh(bh);
goto failed;
}

@@ -863,7 +863,7 @@ int ext3_get_blocks_handle(handle_t *han
*/
if (err == -EAGAIN || !verify_chain(chain, partial)) {
while (partial > chain) {
- brelse(partial->bh);
+ put_bh(partial->bh);
partial--;
}
partial = ext3_get_branch(inode, depth, offsets, chain, &err);
@@ -933,7 +933,7 @@ got_it:
cleanup:
while (partial > chain) {
BUFFER_TRACE(partial->bh, "call brelse");
- brelse(partial->bh);
+ put_bh(partial->bh);
partial--;
}
BUFFER_TRACE(bh_result, "returned");
@@ -1051,7 +1051,7 @@ struct buffer_head *ext3_getblk(handle_t
}
if (fatal) {
*errp = fatal;
- brelse(bh);
+ put_bh(bh);
bh = NULL;
}
return bh;
@@ -1943,7 +1943,7 @@ static Indirect *ext3_find_shared(struct
/* Writer: end */

while(partial > p) {
- brelse(partial->bh);
+ put_bh(partial->bh);
partial--;
}
no_top:
@@ -2133,7 +2133,7 @@ static void ext3_free_branches(handle_t
* That's easy if it's exclusively part of this
* transaction. But if it's part of the committing
* transaction then journal_forget() will simply
- * brelse() it. That means that if the underlying
+ * put_bh() it. That means that if the underlying
* block is reallocated in ext3_get_block(),
* unmap_underlying_metadata() will find this block
* and will try to get rid of it. damn, damn.
@@ -2510,7 +2510,7 @@ static int __ext3_get_inode_loc(struct i
* of one, so skip it.
*/
if (!buffer_uptodate(bitmap_bh)) {
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
goto make_io;
}
for (i = start; i < start + inodes_per_buffer; i++) {
@@ -2519,7 +2519,7 @@ static int __ext3_get_inode_loc(struct i
if (ext3_test_bit(i, bitmap_bh->b_data))
break;
}
- brelse(bitmap_bh);
+ put_bh(bitmap_bh);
if (i == start + inodes_per_buffer) {
/* all other inodes are free, so skip I/O */
memset(bh->b_data, 0, bh->b_size);
@@ -2544,7 +2544,7 @@ make_io:
"unable to read inode block - "
"inode=%lu, block="E3FSBLK,
inode->i_ino, block);
- brelse(bh);
+ put_bh(bh);
return -EIO;
}
}
@@ -3056,7 +3056,7 @@ ext3_reserve_inode_write(handle_t *handl
BUFFER_TRACE(iloc->bh, "get_write_access");
err = ext3_journal_get_write_access(handle, iloc->bh);
if (err) {
- brelse(iloc->bh);
+ put_bh(iloc->bh);
iloc->bh = NULL;
}
}
@@ -3156,7 +3156,7 @@ static int ext3_pin_inode(handle_t *hand
if (!err)
err = ext3_journal_dirty_metadata(handle,
iloc.bh);
- brelse(iloc.bh);
+ put_bh(iloc.bh);
}
}
ext3_std_error(inode->i_sb, err);
diff -puN fs/ext3/namei.c~ext3_replace_brelse_to_put_bh fs/ext3/namei.c
--- linux-2.6.18-rc4/fs/ext3/namei.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:19.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/namei.c 2006-08-11 16:01:06.838605088 -0700
@@ -349,7 +349,7 @@ dx_probe(struct dentry *dentry, struct i
ext3_warning(dir->i_sb, __FUNCTION__,
"Unrecognised inode hash code %d",
root->info.hash_version);
- brelse(bh);
+ put_bh(bh);
*err = ERR_BAD_DX_DIR;
goto fail;
}
@@ -363,7 +363,7 @@ dx_probe(struct dentry *dentry, struct i
ext3_warning(dir->i_sb, __FUNCTION__,
"Unimplemented inode hash flags: %#06x",
root->info.unused_flags);
- brelse(bh);
+ put_bh(bh);
*err = ERR_BAD_DX_DIR;
goto fail;
}
@@ -372,7 +372,7 @@ dx_probe(struct dentry *dentry, struct i
ext3_warning(dir->i_sb, __FUNCTION__,
"Unimplemented inode hash depth: %#06x",
root->info.indirect_levels);
- brelse(bh);
+ put_bh(bh);
*err = ERR_BAD_DX_DIR;
goto fail;
}
@@ -428,7 +428,7 @@ dx_probe(struct dentry *dentry, struct i
}
fail2:
while (frame >= frame_in) {
- brelse(frame->bh);
+ put_bh(frame->bh);
frame--;
}
fail:
@@ -441,8 +441,8 @@ static void dx_release (struct dx_frame
return;

if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
- brelse(frames[1].bh);
- brelse(frames[0].bh);
+ put_bh(frames[1].bh);
+ put_bh(frames[0].bh);
}

/*
@@ -560,12 +560,12 @@ static int htree_dirblock_to_tree(struct
continue;
if ((err = ext3_htree_store_dirent(dir_file,
hinfo->hash, hinfo->minor_hash, de)) != 0) {
- brelse(bh);
+ put_bh(bh);
return err;
}
count++;
}
- brelse(bh);
+ put_bh(bh);
return count;
}

@@ -802,7 +802,7 @@ static inline int search_dirblock(struct
* entry - you'll have to do that yourself if you want to.
*
* The returned buffer_head has ->b_count elevated. The caller is expected
- * to brelse() it when appropriate.
+ * to put_bh() it when appropriate.
*/
static struct buffer_head * ext3_find_entry (struct dentry *dentry,
struct ext3_dir_entry_2 ** res_dir)
@@ -880,7 +880,7 @@ restart:
/* read error, skip block & hope for the best */
ext3_error(sb, __FUNCTION__, "reading directory #%lu "
"offset %lu", dir->i_ino, block);
- brelse(bh);
+ put_bh(bh);
goto next;
}
i = search_dirblock(bh, dir, dentry,
@@ -890,7 +890,7 @@ restart:
ret = bh;
goto cleanup_and_exit;
} else {
- brelse(bh);
+ put_bh(bh);
if (i < 0)
goto cleanup_and_exit;
}
@@ -1032,7 +1032,7 @@ struct dentry *ext3_get_parent(struct de
if (!bh)
return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode);
- brelse(bh);
+ put_bh(bh);

if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
ext3_error(child->d_inode->i_sb, "ext3_get_parent",
@@ -1128,7 +1128,7 @@ static struct ext3_dir_entry_2 *do_split

bh2 = ext3_append (handle, dir, &newblock, error);
if (!(bh2)) {
- brelse(*bh);
+ put_bh(*bh);
*bh = NULL;
goto errout;
}
@@ -1137,8 +1137,8 @@ static struct ext3_dir_entry_2 *do_split
err = ext3_journal_get_write_access(handle, *bh);
if (err) {
journal_error:
- brelse(*bh);
- brelse(bh2);
+ put_bh(*bh);
+ put_bh(bh2);
*bh = NULL;
ext3_std_error(dir->i_sb, err);
goto errout;
@@ -1242,7 +1242,7 @@ static int add_dirent_to_buf(handle_t *h
err = ext3_journal_get_write_access(handle, bh);
if (err) {
ext3_std_error(dir->i_sb, err);
- brelse(bh);
+ put_bh(bh);
return err;
}

@@ -1282,7 +1282,7 @@ static int add_dirent_to_buf(handle_t *h
err = ext3_journal_dirty_metadata(handle, bh);
if (err)
ext3_std_error(dir->i_sb, err);
- brelse(bh);
+ put_bh(bh);
return 0;
}

@@ -1315,14 +1315,14 @@ static int make_indexed_dir(handle_t *ha
retval = ext3_journal_get_write_access(handle, bh);
if (retval) {
ext3_std_error(dir->i_sb, retval);
- brelse(bh);
+ put_bh(bh);
return retval;
}
root = (struct dx_root *) bh->b_data;

bh2 = ext3_append (handle, dir, &block, &retval);
if (!(bh2)) {
- brelse(bh);
+ put_bh(bh);
return retval;
}
EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
@@ -1420,7 +1420,7 @@ static int ext3_add_entry (handle_t *han
EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
return make_indexed_dir(handle, dentry, inode, bh);
#endif
- brelse(bh);
+ put_bh(bh);
}
bh = ext3_append(handle, dir, &block, &retval);
if (!bh)
@@ -1562,7 +1562,7 @@ journal_error:
ext3_std_error(dir->i_sb, err);
cleanup:
if (bh)
- brelse(bh);
+ put_bh(bh);
dx_release(frames);
return err;
}
@@ -1999,7 +1999,7 @@ out:
return err;

out_brelse:
- brelse(iloc.bh);
+ put_bh(iloc.bh);
goto out_err;
}

@@ -2285,7 +2285,7 @@ static int ext3_rename (struct inode * o
new_dir->i_version++;
BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
ext3_journal_dirty_metadata(handle, new_bh);
- brelse(new_bh);
+ put_bh(new_bh);
new_bh = NULL;
}

@@ -2315,7 +2315,7 @@ static int ext3_rename (struct inode * o
if (old_bh2) {
retval = ext3_delete_entry(handle, old_dir,
old_de2, old_bh2);
- brelse(old_bh2);
+ put_bh(old_bh2);
}
}
if (retval) {
diff -puN fs/ext3/resize.c~ext3_replace_brelse_to_put_bh fs/ext3/resize.c
--- linux-2.6.18-rc4/fs/ext3/resize.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:19.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/resize.c 2006-08-11 16:01:06.877600591 -0700
@@ -109,7 +109,7 @@ static int verify_group_input(struct sup
input->inode_table, itend - 1, start, metaend - 1);
else
err = 0;
- brelse(bh);
+ put_bh(bh);

return err;
}
@@ -124,7 +124,7 @@ static struct buffer_head *bclean(handle
if (!bh)
return ERR_PTR(-EIO);
if ((err = ext3_journal_get_write_access(handle, bh))) {
- brelse(bh);
+ put_bh(bh);
bh = ERR_PTR(err);
} else {
lock_buffer(bh);
@@ -211,7 +211,7 @@ static int setup_new_group_blocks(struct
goto exit_bh;
}
if ((err = ext3_journal_get_write_access(handle, gdb))) {
- brelse(gdb);
+ put_bh(gdb);
goto exit_bh;
}
lock_buffer(bh);
@@ -220,7 +220,7 @@ static int setup_new_group_blocks(struct
unlock_buffer(bh);
ext3_journal_dirty_metadata(handle, gdb);
ext3_set_bit(bit, bh->b_data);
- brelse(gdb);
+ put_bh(gdb);
}

/* Zero out all of the reserved backup group descriptor table blocks */
@@ -236,7 +236,7 @@ static int setup_new_group_blocks(struct
}
ext3_journal_dirty_metadata(handle, gdb);
ext3_set_bit(bit, bh->b_data);
- brelse(gdb);
+ put_bh(gdb);
}
ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
input->block_bitmap - start);
@@ -256,13 +256,13 @@ static int setup_new_group_blocks(struct
goto exit_bh;
}
ext3_journal_dirty_metadata(handle, it);
- brelse(it);
+ put_bh(it);
ext3_set_bit(bit, bh->b_data);
}
mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
bh->b_data);
ext3_journal_dirty_metadata(handle, bh);
- brelse(bh);
+ put_bh(bh);

/* Mark unused entries in inode bitmap used */
ext3_debug("clear inode bitmap %#04x (+%ld)\n",
@@ -276,7 +276,7 @@ static int setup_new_group_blocks(struct
bh->b_data);
ext3_journal_dirty_metadata(handle, bh);
exit_bh:
- brelse(bh);
+ put_bh(bh);

exit_journal:
unlock_super(sb);
@@ -459,7 +459,7 @@ static int add_new_gdb(handle_t *handle,
*/
data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
ext3_journal_dirty_metadata(handle, dind);
- brelse(dind);
+ put_bh(dind);
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
ext3_mark_iloc_dirty(handle, inode, &iloc);
memset((*primary)->b_data, 0, sb->s_blocksize);
@@ -481,7 +481,7 @@ static int add_new_gdb(handle_t *handle,

exit_inode:
//ext3_journal_release_buffer(handle, iloc.bh);
- brelse(iloc.bh);
+ put_bh(iloc.bh);
exit_dindj:
//ext3_journal_release_buffer(handle, dind);
exit_primary:
@@ -489,9 +489,9 @@ exit_primary:
exit_sbh:
//ext3_journal_release_buffer(handle, *primary);
exit_dind:
- brelse(dind);
+ put_bh(dind);
exit_bh:
- brelse(*primary);
+ put_bh(*primary);

ext3_debug("leaving with error %d\n", err);
return err;
@@ -555,7 +555,7 @@ static int reserve_backup_gdb(handle_t *
goto exit_bh;
}
if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
- brelse(primary[res]);
+ put_bh(primary[res]);
err = gdbackups;
goto exit_bh;
}
@@ -598,8 +598,8 @@ static int reserve_backup_gdb(handle_t *

exit_bh:
while (--res >= 0)
- brelse(primary[res]);
- brelse(dind);
+ put_bh(primary[res]);
+ put_bh(dind);

exit_free:
kfree(primary);
@@ -668,7 +668,7 @@ static void update_backups(struct super_
set_buffer_uptodate(bh);
unlock_buffer(bh);
ext3_journal_dirty_metadata(handle, bh);
- brelse(bh);
+ put_bh(bh);
}
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
@@ -974,7 +974,7 @@ int ext3_group_extend(struct super_block
"can't read last block, resize aborted");
return -ENOSPC;
}
- brelse(bh);
+ put_bh(bh);

/* We will update the superblock, one block bitmap, and
* one group descriptor via ext3_free_blocks().
diff -puN fs/ext3/super.c~ext3_replace_brelse_to_put_bh fs/ext3/super.c
--- linux-2.6.18-rc4/fs/ext3/super.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:20.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/super.c 2006-08-11 16:01:06.953591829 -0700
@@ -400,12 +400,12 @@ static void ext3_put_super (struct super
}

for (i = 0; i < sbi->s_gdb_count; i++)
- brelse(sbi->s_group_desc[i]);
+ put_bh(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
- brelse(sbi->s_sbh);
+ put_bh(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
kfree(sbi->s_qf_names[i]);
@@ -1759,7 +1759,7 @@ failed_mount3:
percpu_counter_destroy(&sbi->s_dirs_counter);
failed_mount2:
for (i = 0; i < db_count; i++)
- brelse(sbi->s_group_desc[i]);
+ put_bh(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
failed_mount:
#ifdef CONFIG_QUOTA
@@ -1767,7 +1767,7 @@ failed_mount:
kfree(sbi->s_qf_names[i]);
#endif
ext3_blkdev_remove(sbi);
- brelse(bh);
+ put_bh(bh);
out_fail:
sb->s_fs_info = NULL;
kfree(sbi);
@@ -1885,19 +1885,19 @@ static journal_t *ext3_get_dev_journal(s
EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
printk(KERN_ERR "EXT3-fs: external journal has "
"bad superblock\n");
- brelse(bh);
+ put_bh(bh);
goto out_bdev;
}

if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
- brelse(bh);
+ put_bh(bh);
goto out_bdev;
}

len = le32_to_cpu(es->s_blocks_count);
start = sb_block + 1;
- brelse(bh); /* we're done with the superblock */
+ put_bh(bh); /* we're done with the superblock */

journal = journal_init_dev(bdev, sb->s_bdev,
start, len, blocksize);
@@ -2590,7 +2590,7 @@ static ssize_t ext3_quota_read(struct su
memset(data, 0, tocopy);
else
memcpy(data, bh->b_data+offset, tocopy);
- brelse(bh);
+ put_bh(bh);
offset = 0;
toread -= tocopy;
data += tocopy;
@@ -2624,7 +2624,7 @@ static ssize_t ext3_quota_write(struct s
if (journal_quota) {
err = ext3_journal_get_write_access(handle, bh);
if (err) {
- brelse(bh);
+ put_bh(bh);
goto out;
}
}
@@ -2639,7 +2639,7 @@ static ssize_t ext3_quota_write(struct s
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
- brelse(bh);
+ put_bh(bh);
if (err)
goto out;
offset = 0;
diff -puN fs/ext3/xattr.c~ext3_replace_brelse_to_put_bh fs/ext3/xattr.c
--- linux-2.6.18-rc4/fs/ext3/xattr.c~ext3_replace_brelse_to_put_bh 2006-08-10 23:04:20.000000000 -0700
+++ linux-2.6.18-rc4-ming/fs/ext3/xattr.c 2006-08-11 16:01:07.007585602 -0700
@@ -256,7 +256,7 @@ bad_block: ext3_error(inode->i_sb, __FUN
error = size;

cleanup:
- brelse(bh);
+ put_bh(bh);
return error;
}

@@ -299,7 +299,7 @@ ext3_xattr_ibody_get(struct inode *inode
error = size;

cleanup:
- brelse(iloc.bh);
+ put_bh(iloc.bh);
return error;
}

@@ -384,7 +384,7 @@ ext3_xattr_block_list(struct inode *inod
error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);

cleanup:
- brelse(bh);
+ put_bh(bh);

return error;
}
@@ -413,7 +413,7 @@ ext3_xattr_ibody_list(struct inode *inod
buffer, buffer_size);

cleanup:
- brelse(iloc.bh);
+ put_bh(iloc.bh);
return error;
}

@@ -836,7 +836,7 @@ getblk_failed:
cleanup:
if (ce)
mb_cache_entry_release(ce);
- brelse(new_bh);
+ put_bh(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);

@@ -1019,8 +1019,8 @@ ext3_xattr_set_handle(handle_t *handle,
}

cleanup:
- brelse(is.iloc.bh);
- brelse(bs.bh);
+ put_bh(is.iloc.bh);
+ put_bh(bs.bh);
up_write(&EXT3_I(inode)->xattr_sem);
return error;
}
@@ -1092,7 +1092,7 @@ ext3_xattr_delete_inode(handle_t *handle
EXT3_I(inode)->i_file_acl = 0;

cleanup:
- brelse(bh);
+ put_bh(bh);
}

/*
@@ -1223,7 +1223,7 @@ again:
*pce = ce;
return bh;
}
- brelse(bh);
+ put_bh(bh);
ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
}
return NULL;
diff -puN fs/jbd/checkpoint.c~ext3_replace_brelse_to_put_bh fs/jbd/checkpoint.c
--- linux-2.6.18-rc4/fs/jbd/checkpoint.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.637008493 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/checkpoint.c 2006-08-11 16:01:07.150569115 -0700
@@ -99,7 +99,7 @@ static int __try_to_free_cp_buf(struct j
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
- __brelse(bh);
+ put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
@@ -189,7 +189,7 @@ restart:
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
+ put_bh(bh);
spin_lock(&journal->j_list_lock);
goto restart;
}
@@ -200,7 +200,7 @@ restart:
released = __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
}
}

@@ -216,7 +216,7 @@ __flush_batch(journal_t *journal, struct
struct buffer_head *bh = bhs[i];
clear_buffer_jwrite(bh);
BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
+ put_bh(bh);
}
*batch_count = 0;
}
@@ -243,7 +243,7 @@ static int __process_buffer(journal_t *j
wait_on_buffer(bh);
/* the journal_head may have gone by now */
BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
+ put_bh(bh);
ret = 1;
} else if (jh->b_transaction != NULL) {
transaction_t *t = jh->b_transaction;
@@ -261,7 +261,7 @@ static int __process_buffer(journal_t *j
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
ret = 1;
} else {
/*
diff -puN fs/jbd/commit.c~ext3_replace_brelse_to_put_bh fs/jbd/commit.c
--- linux-2.6.18-rc4/fs/jbd/commit.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.701001113 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/commit.c 2006-08-11 16:01:07.183565310 -0700
@@ -68,14 +68,14 @@ static void release_buffer_page(struct b
goto nope;

page_cache_get(page);
- __brelse(bh);
+ put_bh(bh);
try_to_free_buffers(page);
unlock_page(page);
page_cache_release(page);
return;

nope:
- __brelse(bh);
+ put_bh(bh);
}

/*
@@ -642,7 +642,7 @@ wait_for_iobuf:
*/
BUFFER_TRACE(bh, "dumping temporary bh");
journal_put_journal_head(jh);
- __brelse(bh);
+ put_bh(bh);
J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
free_buffer_head(bh);

@@ -663,7 +663,7 @@ wait_for_iobuf:
IO to complete */
wake_up_bit(&bh->b_state, BH_Unshadow);
JBUFFER_TRACE(jh, "brelse shadowed buffer");
- __brelse(bh);
+ put_bh(bh);
}

J_ASSERT (commit_transaction->t_shadow_list == NULL);
@@ -691,7 +691,7 @@ wait_for_iobuf:
clear_buffer_jwrite(bh);
journal_unfile_buffer(journal, jh);
journal_put_journal_head(jh);
- __brelse(bh); /* One for getblk */
+ put_bh(bh); /* One for getblk */
/* AKPM: bforget here */
}

diff -puN fs/jbd/journal.c~ext3_replace_brelse_to_put_bh fs/jbd/journal.c
--- linux-2.6.18-rc4/fs/jbd/journal.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.763993849 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/journal.c 2006-08-11 16:01:07.241558622 -0700
@@ -805,7 +805,7 @@ journal_t * journal_init_inode (struct i
static void journal_fail_superblock (journal_t *journal)
{
struct buffer_head *bh = journal->j_sb_buffer;
- brelse(bh);
+ put_bh(bh);
journal->j_sb_buffer = NULL;
}

@@ -890,7 +890,7 @@ int journal_create(journal_t *journal)
BUFFER_TRACE(bh, "marking uptodate");
set_buffer_uptodate(bh);
unlock_buffer(bh);
- __brelse(bh);
+ put_bh(bh);
}

sync_blockdev(journal->j_dev);
@@ -1146,7 +1146,7 @@ void journal_destroy(journal_t *journal)
journal->j_tail_sequence = ++journal->j_transaction_sequence;
if (journal->j_sb_buffer) {
journal_update_superblock(journal, 1);
- brelse(journal->j_sb_buffer);
+ put_bh(journal->j_sb_buffer);
}

if (journal->j_inode)
@@ -1810,7 +1810,7 @@ static void __journal_remove_journal_hea
bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh);
- __brelse(bh);
+ put_bh(bh);
journal_free_journal_head(jh);
} else {
BUFFER_TRACE(bh, "journal_head was locked");
@@ -1827,8 +1827,8 @@ static void __journal_remove_journal_hea
* We in fact take an additional increment on ->b_count as a convenience,
* because the caller usually wants to do additional things with the bh
* after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
- * time. Once the caller has run __brelse(), the buffer is eligible for
+ * The caller of journal_remove_journal_head() *must* run put_bh(bh) at some
+ * time. Once the caller has run put_bh(), the buffer is eligible for
* reaping by try_to_free_buffers().
*/
void journal_remove_journal_head(struct buffer_head *bh)
@@ -1851,7 +1851,7 @@ void journal_put_journal_head(struct jou
--jh->b_jcount;
if (!jh->b_jcount && !jh->b_transaction) {
__journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
}
jbd_unlock_bh_journal_head(bh);
}
diff -puN fs/jbd/recovery.c~ext3_replace_brelse_to_put_bh fs/jbd/recovery.c
--- linux-2.6.18-rc4/fs/jbd/recovery.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.823986931 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/recovery.c 2006-08-11 16:01:07.266555740 -0700
@@ -108,7 +108,7 @@ static int do_readahead(journal_t *journ
nbufs = 0;
}
} else
- brelse(bh);
+ put_bh(bh);
}

if (nbufs)
@@ -165,7 +165,7 @@ static int jread(struct buffer_head **bh
if (!buffer_uptodate(bh)) {
printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
offset);
- brelse(bh);
+ put_bh(bh);
return -EIO;
}

@@ -388,7 +388,7 @@ static int do_one_pass(journal_t *journa
tmp = (journal_header_t *)bh->b_data;

if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
- brelse(bh);
+ put_bh(bh);
break;
}

@@ -398,7 +398,7 @@ static int do_one_pass(journal_t *journa
blocktype, sequence);

if (sequence != next_commit_ID) {
- brelse(bh);
+ put_bh(bh);
break;
}

@@ -415,7 +415,7 @@ static int do_one_pass(journal_t *journa
next_log_block +=
count_tags(bh, journal->j_blocksize);
wrap(journal, next_log_block);
- brelse(bh);
+ put_bh(bh);
continue;
}

@@ -454,7 +454,7 @@ static int do_one_pass(journal_t *journa
if (journal_test_revoke
(journal, blocknr,
next_commit_ID)) {
- brelse(obh);
+ put_bh(obh);
++info->nr_revoke_hits;
goto skip_write;
}
@@ -469,8 +469,8 @@ static int do_one_pass(journal_t *journa
"JBD: Out of memory "
"during recovery.\n");
err = -ENOMEM;
- brelse(bh);
- brelse(obh);
+ put_bh(bh);
+ put_bh(obh);
goto failed;
}

@@ -489,8 +489,8 @@ static int do_one_pass(journal_t *journa
++info->nr_replays;
/* ll_rw_block(WRITE, 1, &nbh); */
unlock_buffer(nbh);
- brelse(obh);
- brelse(nbh);
+ put_bh(obh);
+ put_bh(nbh);
}

skip_write:
@@ -502,14 +502,14 @@ static int do_one_pass(journal_t *journa
break;
}

- brelse(bh);
+ put_bh(bh);
continue;

case JFS_COMMIT_BLOCK:
/* Found an expected commit block: not much to
* do other than move on to the next sequence
* number. */
- brelse(bh);
+ put_bh(bh);
next_commit_ID++;
continue;

@@ -517,13 +517,13 @@ static int do_one_pass(journal_t *journa
/* If we aren't in the REVOKE pass, then we can
* just skip over this block. */
if (pass != PASS_REVOKE) {
- brelse(bh);
+ put_bh(bh);
continue;
}

err = scan_revoke_records(journal, bh,
next_commit_ID, info);
- brelse(bh);
+ put_bh(bh);
if (err)
goto failed;
continue;
@@ -531,7 +531,7 @@ static int do_one_pass(journal_t *journa
default:
jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
blocktype);
- brelse(bh);
+ put_bh(bh);
goto done;
}
}
diff -puN fs/jbd/revoke.c~ext3_replace_brelse_to_put_bh fs/jbd/revoke.c
--- linux-2.6.18-rc4/fs/jbd/revoke.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.883980012 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/revoke.c 2006-08-11 16:01:07.292552742 -0700
@@ -380,7 +380,7 @@ int journal_revoke(handle_t *handle, uns
if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
"inconsistent data on disk")) {
if (!bh_in)
- brelse(bh);
+ put_bh(bh);
return -EIO;
}
set_buffer_revoked(bh);
@@ -390,7 +390,7 @@ int journal_revoke(handle_t *handle, uns
journal_forget(handle, bh_in);
} else {
BUFFER_TRACE(bh, "call brelse");
- __brelse(bh);
+ put_bh(bh);
}
}

@@ -468,7 +468,7 @@ int journal_cancel_revoke(handle_t *hand
if (bh2) {
if (bh2 != bh)
clear_buffer_revoked(bh2);
- __brelse(bh2);
+ put_bh(bh2);
}
}
return did_revoke;
diff -puN fs/jbd/transaction.c~ext3_replace_brelse_to_put_bh fs/jbd/transaction.c
--- linux-2.6.18-rc4/fs/jbd/transaction.c~ext3_replace_brelse_to_put_bh 2006-08-11 16:00:02.948972518 -0700
+++ linux-2.6.18-rc4-ming/fs/jbd/transaction.c 2006-08-11 16:01:07.356545363 -0700
@@ -1068,7 +1068,7 @@ no_journal:
jbd_unlock_bh_state(bh);
if (need_brelse) {
BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
+ put_bh(bh);
}
JBUFFER_TRACE(jh, "exit");
journal_put_journal_head(jh);
@@ -1254,7 +1254,7 @@ int journal_forget (handle_t *handle, st
} else {
__journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1281,7 +1281,7 @@ int journal_forget (handle_t *handle, st
not_jbd:
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- __brelse(bh);
+ put_bh(bh);
drop:
if (drop_reserve) {
/* no need to reserve log space for this block -bzzz */
@@ -1577,7 +1577,7 @@ __journal_try_to_free_buffer(journal_t *
JBUFFER_TRACE(jh, "release data");
__journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
}
} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
/* written-back checkpointed metadata buffer */
@@ -1585,7 +1585,7 @@ __journal_try_to_free_buffer(journal_t *
JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh);
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
}
}
spin_unlock(&journal->j_list_lock);
@@ -1690,7 +1690,7 @@ static int __dispose_buffer(struct journ
} else {
JBUFFER_TRACE(jh, "on running transaction");
journal_remove_journal_head(bh);
- __brelse(bh);
+ put_bh(bh);
}
return may_free;
}
@@ -2075,5 +2075,5 @@ void journal_refile_buffer(journal_t *jo
journal_remove_journal_head(bh);

spin_unlock(&journal->j_list_lock);
- __brelse(bh);
+ put_bh(bh);
}

_


2006-08-12 00:05:23

by Mingming Cao

[permalink] [raw]
Subject: [PATCH 1/2] ext3 and jbd cleanup: remove whitespace

Remove whitespace from ext3 and jbd, before we clone ext4.

Signed-Off-By: Mingming Cao<[email protected]>

diff -urN linux-2.6.18-rc4/fs/ext3/balloc.c linux-2.6.18-rc4-ws/fs/ext3/balloc.c
--- linux-2.6.18-rc4/fs/ext3/balloc.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/balloc.c 2006-08-10 22:50:58.565737801 -0700
@@ -74,7 +74,7 @@
}

/*
- * Read the bitmap for a given block_group, reading into the specified
+ * Read the bitmap for a given block_group, reading into the specified
* slot in the superblock's bitmap cache.
*
* Return buffer_head on success or NULL in case of failure.
@@ -419,8 +419,8 @@
}
/* @@@ This prevents newly-allocated data from being
* freed and then reallocated within the same
- * transaction.
- *
+ * transaction.
+ *
* Ideally we would want to allow that to happen, but to
* do so requires making journal_forget() capable of
* revoking the queued write of a data block, which
@@ -433,7 +433,7 @@
* safe not to set the allocation bit in the committed
* bitmap, because we know that there is no outstanding
* activity on the buffer any more and so it is safe to
- * reallocate it.
+ * reallocate it.
*/
BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
J_ASSERT_BH(bitmap_bh,
@@ -518,7 +518,7 @@
* data would allow the old block to be overwritten before the
* transaction committed (because we force data to disk before commit).
* This would lead to corruption if we crashed between overwriting the
- * data and committing the delete.
+ * data and committing the delete.
*
* @@@ We may want to make this allocation behaviour conditional on
* data-writes at some point, and disable it for metadata allocations or
@@ -584,7 +584,7 @@

if (start > 0) {
/*
- * The goal was occupied; search forward for a free
+ * The goal was occupied; search forward for a free
* block within the next XX blocks.
*
* end_goal is more or less random, but it has to be
@@ -1194,7 +1194,7 @@
/*
* ext3_new_block uses a goal block to assist allocation. If the goal is
* free, or there is a free block within 32 blocks of the goal, that block
- * is allocated. Otherwise a forward search is made for a free block; within
+ * is allocated. Otherwise a forward search is made for a free block; within
* each block group the search first looks for an entire free byte in the block
* bitmap, and then for any free bit if that fails.
* This function also updates quota and i_blocks field.
@@ -1303,7 +1303,7 @@
smp_rmb();

/*
- * Now search the rest of the groups. We assume that
+ * Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
for (bgi = 0; bgi < ngroups; bgi++) {
diff -urN linux-2.6.18-rc4/fs/ext3/bitmap.c linux-2.6.18-rc4-ws/fs/ext3/bitmap.c
--- linux-2.6.18-rc4/fs/ext3/bitmap.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/bitmap.c 2006-08-10 22:50:58.566737693 -0700
@@ -20,7 +20,7 @@
unsigned int i;
unsigned long sum = 0;

- if (!map)
+ if (!map)
return (0);
for (i = 0; i < numchars; i++)
sum += nibblemap[map->b_data[i] & 0xf] +
diff -urN linux-2.6.18-rc4/fs/ext3/dir.c linux-2.6.18-rc4-ws/fs/ext3/dir.c
--- linux-2.6.18-rc4/fs/ext3/dir.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/dir.c 2006-08-10 22:50:58.605733477 -0700
@@ -59,7 +59,7 @@

return (ext3_filetype_table[filetype]);
}
-
+

int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de,
@@ -162,7 +162,7 @@
* to make sure. */
if (filp->f_version != inode->i_version) {
for (i = 0; i < sb->s_blocksize && i < offset; ) {
- de = (struct ext3_dir_entry_2 *)
+ de = (struct ext3_dir_entry_2 *)
(bh->b_data + i);
/* It's too expensive to do a full
* dirent test each time round this
@@ -181,7 +181,7 @@
filp->f_version = inode->i_version;
}

- while (!error && filp->f_pos < inode->i_size
+ while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) {
de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
@@ -229,7 +229,7 @@
/*
* These functions convert from the major/minor hash to an f_pos
* value.
- *
+ *
* Currently we only use major hash numer. This is unfortunate, but
* on 32-bit machines, the same VFS interface is used for lseek and
* llseek, so if we use the 64 bit offset, then the 32-bit versions of
@@ -250,7 +250,7 @@
struct fname {
__u32 hash;
__u32 minor_hash;
- struct rb_node rb_hash;
+ struct rb_node rb_hash;
struct fname *next;
__u32 inode;
__u8 name_len;
@@ -410,7 +410,7 @@
curr_pos = hash2pos(fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
- fname->name_len, curr_pos,
+ fname->name_len, curr_pos,
fname->inode,
get_dtype(sb, fname->file_type));
if (error) {
@@ -465,7 +465,7 @@
/*
* Fill the rbtree if we have no more entries,
* or the inode has changed since we last read in the
- * cached entries.
+ * cached entries.
*/
if ((!info->curr_node) ||
(filp->f_version != inode->i_version)) {
diff -urN linux-2.6.18-rc4/fs/ext3/file.c linux-2.6.18-rc4-ws/fs/ext3/file.c
--- linux-2.6.18-rc4/fs/ext3/file.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/file.c 2006-08-10 22:50:58.606733368 -0700
@@ -100,7 +100,7 @@

force_commit:
err = ext3_force_commit(inode->i_sb);
- if (err)
+ if (err)
return err;
return ret;
}
diff -urN linux-2.6.18-rc4/fs/ext3/fsync.c linux-2.6.18-rc4-ws/fs/ext3/fsync.c
--- linux-2.6.18-rc4/fs/ext3/fsync.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/fsync.c 2006-08-10 22:50:58.607733260 -0700
@@ -8,14 +8,14 @@
* Universite Pierre et Marie Curie (Paris VI)
* from
* linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
- *
+ *
* ext3fs fsync primitive
*
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller ([email protected]), 1995
- *
+ *
* Removed unnecessary code duplication for little endian machines
- * and excessive __inline__s.
+ * and excessive __inline__s.
* Andi Kleen, 1997
*
* Major simplications and cleanup - we only need to do the metadata, because
diff -urN linux-2.6.18-rc4/fs/ext3/hash.c linux-2.6.18-rc4-ws/fs/ext3/hash.c
--- linux-2.6.18-rc4/fs/ext3/hash.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/hash.c 2006-08-10 22:50:58.630730774 -0700
@@ -4,7 +4,7 @@
* Copyright (C) 2002 by Theodore Ts'o
*
* This file is released under the GPL v2.
- *
+ *
* This file may be redistributed under the terms of the GNU Public
* License.
*/
@@ -80,11 +80,11 @@
* Returns the hash of a filename. If len is 0 and name is NULL, then
* this function can be used to test whether or not a hash version is
* supported.
- *
+ *
* The seed is an 4 longword (32 bits) "secret" which can be used to
* uniquify a hash. If the seed is all zero's, then some default seed
* may be used.
- *
+ *
* A particular hash version specifies whether or not the seed is
* represented, and whether or not the returned hash is 32 bits or 64
* bits. 32 bit hashes will return 0 for the minor hash.
diff -urN linux-2.6.18-rc4/fs/ext3/ialloc.c linux-2.6.18-rc4-ws/fs/ext3/ialloc.c
--- linux-2.6.18-rc4/fs/ext3/ialloc.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/ialloc.c 2006-08-10 22:50:58.632730558 -0700
@@ -216,7 +216,7 @@
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
continue;
- if (!best_desc ||
+ if (!best_desc ||
(le16_to_cpu(desc->bg_free_blocks_count) >
le16_to_cpu(best_desc->bg_free_blocks_count))) {
best_group = group;
@@ -226,30 +226,30 @@
return best_group;
}

-/*
- * Orlov's allocator for directories.
- *
+/*
+ * Orlov's allocator for directories.
+ *
* We always try to spread first-level directories.
*
- * If there are blockgroups with both free inodes and free blocks counts
- * not worse than average we return one with smallest directory count.
- * Otherwise we simply return a random group.
- *
- * For the rest rules look so:
- *
- * It's OK to put directory into a group unless
- * it has too many directories already (max_dirs) or
- * it has too few free inodes left (min_inodes) or
- * it has too few free blocks left (min_blocks) or
- * it's already running too large debt (max_debt).
- * Parent's group is prefered, if it doesn't satisfy these
- * conditions we search cyclically through the rest. If none
- * of the groups look good we just look for a group with more
- * free inodes than average (starting at parent's group).
- *
- * Debt is incremented each time we allocate a directory and decremented
- * when we allocate an inode, within 0--255.
- */
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */

#define INODE_COST 64
#define BLOCK_COST 256
@@ -454,7 +454,7 @@
group = find_group_dir(sb, dir);
else
group = find_group_orlov(sb, dir);
- } else
+ } else
group = find_group_other(sb, dir);

err = -ENOSPC;
diff -urN linux-2.6.18-rc4/fs/ext3/inode.c linux-2.6.18-rc4-ws/fs/ext3/inode.c
--- linux-2.6.18-rc4/fs/ext3/inode.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/inode.c 2006-08-10 22:50:58.684724936 -0700
@@ -55,7 +55,7 @@
/*
* The ext3 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
- * revoked in all cases.
+ * revoked in all cases.
*
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
@@ -105,7 +105,7 @@
* Work out how many blocks we need to proceed with the next chunk of a
* truncate transaction.
*/
-static unsigned long blocks_for_truncate(struct inode *inode)
+static unsigned long blocks_for_truncate(struct inode *inode)
{
unsigned long needed;

@@ -122,13 +122,13 @@

/* But we need to bound the transaction so we don't overflow the
* journal. */
- if (needed > EXT3_MAX_TRANS_DATA)
+ if (needed > EXT3_MAX_TRANS_DATA)
needed = EXT3_MAX_TRANS_DATA;

return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
}

-/*
+/*
* Truncate transactions can be complex and absolutely huge. So we need to
* be able to restart the transaction at a conventient checkpoint to make
* sure we don't overflow the journal.
@@ -136,9 +136,9 @@
* start_transaction gets us a new handle for a truncate transaction,
* and extend_transaction tries to extend the existing one a bit. If
* extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
+ * transaction in the top-level truncate loop. --sct
*/
-static handle_t *start_transaction(struct inode *inode)
+static handle_t *start_transaction(struct inode *inode)
{
handle_t *result;

@@ -215,12 +215,12 @@
ext3_orphan_del(handle, inode);
EXT3_I(inode)->i_dtime = get_seconds();

- /*
+ /*
* One subtle ordering requirement: if anything has gone wrong
* (transaction abort, IO errors, whatever), then we can still
* do these next steps (the fs will already have been marked as
* having errors), but we can't free the inode if the mark_dirty
- * fails.
+ * fails.
*/
if (ext3_mark_inode_dirty(handle, inode))
/* If that failed, just do the required in-core inode clear. */
@@ -398,7 +398,7 @@
* + if there is a block to the left of our position - allocate near it.
* + if pointer will live in indirect block - allocate near that block.
* + if pointer will live in inode - allocate in the same
- * cylinder group.
+ * cylinder group.
*
* In the latter case we colour the starting block by the callers PID to
* prevent it from clashing with concurrent allocations for a different inode
@@ -744,7 +744,7 @@
jbd_debug(5, "splicing indirect only\n");
BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, where->bh);
- if (err)
+ if (err)
goto err_out;
} else {
/*
@@ -1134,7 +1134,7 @@
* So what we do is to rely on the fact that journal_stop/journal_start
* will _not_ run commit under these circumstances because handle->h_ref
* is elevated. We'll still have enough credits for the tiny quotafile
- * write.
+ * write.
*/
static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh)
@@ -1279,7 +1279,7 @@
if (inode->i_size > EXT3_I(inode)->i_disksize) {
EXT3_I(inode)->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
- if (!ret)
+ if (!ret)
ret = ret2;
}
ret2 = ext3_journal_stop(handle);
@@ -1288,7 +1288,7 @@
return ret;
}

-/*
+/*
* bmap() is special. It gets used by applications such as lilo and by
* the swapper to find the on-disk block of a specific piece of data.
*
@@ -1297,10 +1297,10 @@
* filesystem and enables swap, then they may get a nasty shock when the
* data getting swapped to that swapfile suddenly gets overwritten by
* the original zero's written out previously to the journal and
- * awaiting writeback in the kernel's buffer cache.
+ * awaiting writeback in the kernel's buffer cache.
*
* So, if we see any bmap calls here on a modified, data-journaled file,
- * take extra steps to flush any blocks which might be in the cache.
+ * take extra steps to flush any blocks which might be in the cache.
*/
static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
{
@@ -1309,16 +1309,16 @@
int err;

if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
- /*
+ /*
* This is a REALLY heavyweight approach, but the use of
* bmap on dirty files is expected to be extremely rare:
* only if we run lilo or swapon on a freshly made file
- * do we expect this to happen.
+ * do we expect this to happen.
*
* (bmap requires CAP_SYS_RAWIO so this does not
* represent an unprivileged user DOS attack --- we'd be
* in trouble if mortal users could trigger this path at
- * will.)
+ * will.)
*
* NB. EXT3_STATE_JDATA is not set on files other than
* regular files. If somebody wants to bmap a directory
@@ -1454,7 +1454,7 @@
*/

/*
- * And attach them to the current transaction. But only if
+ * And attach them to the current transaction. But only if
* block_write_full_page() succeeded. Otherwise they are unmapped,
* and generally junk.
*/
@@ -1641,7 +1641,7 @@
}
}

- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+ ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext3_get_block, NULL);

@@ -2022,7 +2022,7 @@
__le32 *first, __le32 *last)
{
ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */
- unsigned long count = 0; /* Number of blocks in the run */
+ unsigned long count = 0; /* Number of blocks in the run */
__le32 *block_to_free_p = NULL; /* Pointer into inode/ind
corresponding to
block_to_free */
@@ -2051,7 +2051,7 @@
} else if (nr == block_to_free + count) {
count++;
} else {
- ext3_clear_blocks(handle, inode, this_bh,
+ ext3_clear_blocks(handle, inode, this_bh,
block_to_free,
count, block_to_free_p, p);
block_to_free = nr;
@@ -2181,7 +2181,7 @@
*p = 0;
BUFFER_TRACE(parent_bh,
"call ext3_journal_dirty_metadata");
- ext3_journal_dirty_metadata(handle,
+ ext3_journal_dirty_metadata(handle,
parent_bh);
}
}
@@ -2631,7 +2631,7 @@
}
inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
* (for stat), not the fs block
- * size */
+ * size */
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
#ifdef EXT3_FRAGMENTS
@@ -2701,7 +2701,7 @@
if (raw_inode->i_block[0])
init_special_inode(inode, inode->i_mode,
old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
- else
+ else
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
}
@@ -2721,8 +2721,8 @@
*
* The caller must have write access to iloc->bh.
*/
-static int ext3_do_update_inode(handle_t *handle,
- struct inode *inode,
+static int ext3_do_update_inode(handle_t *handle,
+ struct inode *inode,
struct ext3_iloc *iloc)
{
struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
@@ -2897,7 +2897,7 @@
* commit will leave the blocks being flushed in an unused state on
* disk. (On recovery, the inode will get truncated and the blocks will
* be freed, so we have a strong guarantee that no future commit will
- * leave these blocks visible to the user.)
+ * leave these blocks visible to the user.)
*
* Called with inode->sem down.
*/
@@ -3040,13 +3040,13 @@
return err;
}

-/*
+/*
* On success, We end up with an outstanding reference count against
- * iloc->bh. This _must_ be cleaned up later.
+ * iloc->bh. This _must_ be cleaned up later.
*/

int
-ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext3_iloc *iloc)
{
int err = 0;
@@ -3136,7 +3136,7 @@
}

#if 0
-/*
+/*
* Bind an inode's backing buffer_head into this transaction, to prevent
* it from being flushed to disk early. Unlike
* ext3_reserve_inode_write, this leaves behind no bh reference and
@@ -3154,7 +3154,7 @@
BUFFER_TRACE(iloc.bh, "get_write_access");
err = journal_get_write_access(handle, iloc.bh);
if (!err)
- err = ext3_journal_dirty_metadata(handle,
+ err = ext3_journal_dirty_metadata(handle,
iloc.bh);
brelse(iloc.bh);
}
diff -urN linux-2.6.18-rc4/fs/ext3/namei.c linux-2.6.18-rc4-ws/fs/ext3/namei.c
--- linux-2.6.18-rc4/fs/ext3/namei.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/namei.c 2006-08-10 22:50:58.710722125 -0700
@@ -76,7 +76,7 @@
#ifdef DX_DEBUG
#define dxtrace(command) command
#else
-#define dxtrace(command)
+#define dxtrace(command)
#endif

struct fake_dirent
@@ -169,7 +169,7 @@
static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
static int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
- struct dx_frame *frames,
+ struct dx_frame *frames,
__u32 *start_hash);
static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
struct ext3_dir_entry_2 **res_dir, int *err);
@@ -250,7 +250,7 @@
}

struct stats
-{
+{
unsigned names;
unsigned space;
unsigned bcount;
@@ -464,7 +464,7 @@
*/
static int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame,
- struct dx_frame *frames,
+ struct dx_frame *frames,
__u32 *start_hash)
{
struct dx_frame *p;
@@ -632,7 +632,7 @@
}
count += ret;
hashval = ~0;
- ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
frame, frames, &hashval);
*next_hash = hashval;
if (ret < 0) {
@@ -649,7 +649,7 @@
break;
}
dx_release(frames);
- dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
count, *next_hash));
return count;
errout:
@@ -1050,7 +1050,7 @@
parent = ERR_PTR(-ENOMEM);
}
return parent;
-}
+}

#define S_SHIFT 12
static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -1198,7 +1198,7 @@
* add_dirent_to_buf will attempt search the directory block for
* space. It will return -ENOSPC if no space is available, and -EIO
* and -EEXIST if directory entry already exists.
- *
+ *
* NOTE! bh is NOT released in the case where ENOSPC is returned. In
* all other cases bh is released.
*/
@@ -1572,7 +1572,7 @@
* ext3_delete_entry deletes a directory entry by merging it with the
* previous entry
*/
-static int ext3_delete_entry (handle_t *handle,
+static int ext3_delete_entry (handle_t *handle,
struct inode * dir,
struct ext3_dir_entry_2 * de_del,
struct buffer_head * bh)
@@ -1643,12 +1643,12 @@
* is so far negative - it has no inode.
*
* If the create succeeds, we fill in the inode information
- * with d_instantiate().
+ * with d_instantiate().
*/
static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
struct nameidata *nd)
{
- handle_t *handle;
+ handle_t *handle;
struct inode * inode;
int err, retries = 0;

@@ -1813,7 +1813,7 @@
de1 = (struct ext3_dir_entry_2 *)
((char *) de + le16_to_cpu(de->rec_len));
if (le32_to_cpu(de->inode) != inode->i_ino ||
- !le32_to_cpu(de1->inode) ||
+ !le32_to_cpu(de1->inode) ||
strcmp (".", de->name) ||
strcmp ("..", de1->name)) {
ext3_warning (inode->i_sb, "empty_dir",
@@ -1883,7 +1883,7 @@
* being truncated, or files being unlinked. */

/* @@@ FIXME: Observation from aviro:
- * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
+ * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
* here (on lock_super()), so race with ext3_link() which might bump
* ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0.
@@ -2393,4 +2393,4 @@
.removexattr = generic_removexattr,
#endif
.permission = ext3_permission,
-};
+};
diff -urN linux-2.6.18-rc4/fs/ext3/super.c linux-2.6.18-rc4-ws/fs/ext3/super.c
--- linux-2.6.18-rc4/fs/ext3/super.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/ext3/super.c 2006-08-10 22:50:58.736719315 -0700
@@ -62,13 +62,13 @@
static void ext3_write_super (struct super_block * sb);
static void ext3_write_super_lockfs(struct super_block *sb);

-/*
+/*
* Wrappers for journal_start/end.
*
* The only special thing we need to do here is to make sure that all
* journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
- * appropriate.
+ * appropriate.
*/
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
@@ -90,11 +90,11 @@
return journal_start(journal, nblocks);
}

-/*
+/*
* The only special thing we need to do here is to make sure that all
* journal_stop calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if
- * appropriate.
+ * appropriate.
*/
int __ext3_journal_stop(const char *where, handle_t *handle)
{
@@ -369,7 +369,7 @@
{
struct list_head *l;

- printk(KERN_ERR "sb orphan head is %d\n",
+ printk(KERN_ERR "sb orphan head is %d\n",
le32_to_cpu(sbi->s_es->s_last_orphan));

printk(KERN_ERR "sb_info orphan list:\n");
@@ -378,7 +378,7 @@
printk(KERN_ERR " "
"inode %s:%ld at %p: mode %o, nlink %d, next %d\n",
inode->i_sb->s_id, inode->i_ino, inode,
- inode->i_mode, inode->i_nlink,
+ inode->i_mode, inode->i_nlink,
NEXT_ORPHAN(inode));
}
}
@@ -475,7 +475,7 @@
inode_init_once(&ei->vfs_inode);
}
}
-
+
static int init_inodecache(void)
{
ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
@@ -1441,7 +1441,7 @@
(EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
- printk(KERN_WARNING
+ printk(KERN_WARNING
"EXT3-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
/*
@@ -1467,7 +1467,7 @@

if (blocksize < EXT3_MIN_BLOCK_SIZE ||
blocksize > EXT3_MAX_BLOCK_SIZE) {
- printk(KERN_ERR
+ printk(KERN_ERR
"EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
blocksize, sb->s_id);
goto failed_mount;
@@ -1491,14 +1491,14 @@
offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
bh = sb_bread(sb, logic_sb_block);
if (!bh) {
- printk(KERN_ERR
+ printk(KERN_ERR
"EXT3-fs: Can't read superblock on 2nd try.\n");
goto failed_mount;
}
es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
sbi->s_es = es;
if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
- printk (KERN_ERR
+ printk (KERN_ERR
"EXT3-fs: Magic mismatch, very weird !\n");
goto failed_mount;
}
@@ -1778,7 +1778,7 @@
/*
* Setup any per-fs journal parameters now. We'll do this both on
* initial mount, once the journal has been initialised but before we've
- * done any recovery; and again on any subsequent remount.
+ * done any recovery; and again on any subsequent remount.
*/
static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
{
diff -urN linux-2.6.18-rc4/fs/jbd/checkpoint.c linux-2.6.18-rc4-ws/fs/jbd/checkpoint.c
--- linux-2.6.18-rc4/fs/jbd/checkpoint.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/jbd/checkpoint.c 2006-08-10 22:50:12.168761094 -0700
@@ -1,6 +1,6 @@
/*
* linux/fs/checkpoint.c
- *
+ *
* Written by Stephen C. Tweedie <[email protected]>, 1999
*
* Copyright 1999 Red Hat Software --- All Rights Reserved
@@ -9,8 +9,8 @@
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
*
- * Checkpoint routines for the generic filesystem journaling code.
- * Part of the ext2fs journaling system.
+ * Checkpoint routines for the generic filesystem journaling code.
+ * Part of the ext2fs journaling system.
*
* Checkpointing is the process of ensuring that a section of the log is
* committed fully to disk, so that that portion of the log can be
@@ -225,7 +225,7 @@
* Try to flush one buffer from the checkpoint list to disk.
*
* Return 1 if something happened which requires us to abort the current
- * scan of the checkpoint list.
+ * scan of the checkpoint list.
*
* Called with j_list_lock held and drops it if 1 is returned
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -269,7 +269,7 @@
* possibly block, while still holding the journal lock.
* We cannot afford to let the transaction logic start
* messing around with this buffer before we write it to
- * disk, as that would break recoverability.
+ * disk, as that would break recoverability.
*/
BUFFER_TRACE(bh, "queue");
get_bh(bh);
@@ -292,7 +292,7 @@
* Perform an actual checkpoint. We take the first transaction on the
* list of transactions to be checkpointed and send all its buffers
* to disk. We submit larger chunks of data at once.
- *
+ *
* The journal should be locked before calling this function.
*/
int log_do_checkpoint(journal_t *journal)
@@ -303,10 +303,10 @@

jbd_debug(1, "Start checkpoint\n");

- /*
+ /*
* First thing: if there are any transactions in the log which
* don't need checkpointing, just eliminate them from the
- * journal straight away.
+ * journal straight away.
*/
result = cleanup_journal_tail(journal);
jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
@@ -384,9 +384,9 @@
* we have already got rid of any since the last update of the log tail
* in the journal superblock. If so, we can instantly roll the
* superblock forward to remove those transactions from the log.
- *
+ *
* Return <0 on error, 0 on success, 1 if there was nothing to clean up.
- *
+ *
* Called with the journal lock held.
*
* This is the only part of the journaling code which really needs to be
@@ -403,8 +403,8 @@
unsigned long blocknr, freed;

/* OK, work out the oldest transaction remaining in the log, and
- * the log block it starts at.
- *
+ * the log block it starts at.
+ *
* If the log is now empty, we need to work out which is the
* next transaction ID we will write, and where it will
* start. */
@@ -557,7 +557,7 @@
return ret;
}

-/*
+/*
* journal_remove_checkpoint: called after a buffer has been committed
* to disk (either by being write-back flushed to disk, or being
* committed to the log).
@@ -635,7 +635,7 @@
* Called with the journal locked.
* Called with j_list_lock held.
*/
-void __journal_insert_checkpoint(struct journal_head *jh,
+void __journal_insert_checkpoint(struct journal_head *jh,
transaction_t *transaction)
{
JBUFFER_TRACE(jh, "entry");
@@ -657,7 +657,7 @@

/*
* We've finished with this transaction structure: adios...
- *
+ *
* The transaction must have no links except for the checkpoint by this
* point.
*
diff -urN linux-2.6.18-rc4/fs/jbd/journal.c linux-2.6.18-rc4-ws/fs/jbd/journal.c
--- linux-2.6.18-rc4/fs/jbd/journal.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/jbd/journal.c 2006-08-10 22:50:12.227754698 -0700
@@ -577,7 +577,7 @@
* this is a no-op. If needed, we can use j_blk_offset - everything is
* ready.
*/
-int journal_bmap(journal_t *journal, unsigned long blocknr,
+int journal_bmap(journal_t *journal, unsigned long blocknr,
unsigned long *retp)
{
int err = 0;
@@ -698,10 +698,10 @@
* @len: Lenght of the journal in blocks.
* @blocksize: blocksize of journalling device
* @returns: a newly created journal_t *
- *
+ *
* journal_init_dev creates a journal which maps a fixed contiguous
* range of blocks on an arbitrary block device.
- *
+ *
*/
journal_t * journal_init_dev(struct block_device *bdev,
struct block_device *fs_dev,
@@ -738,11 +738,11 @@

return journal;
}
-
-/**
+
+/**
* journal_t * journal_init_inode () - creates a journal which maps to a inode.
* @inode: An inode to create the journal in
- *
+ *
* journal_init_inode creates a journal which maps an on-disk inode as
* the journal. The inode must exist already, must support bmap() and
* must have all data blocks preallocated.
@@ -762,7 +762,7 @@
journal->j_inode = inode;
jbd_debug(1,
"journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
- journal, inode->i_sb->s_id, inode->i_ino,
+ journal, inode->i_sb->s_id, inode->i_ino,
(long long) inode->i_size,
inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);

@@ -797,10 +797,10 @@
return journal;
}

-/*
+/*
* If the journal init or create aborts, we need to mark the journal
* superblock as being NULL to prevent the journal destroy from writing
- * back a bogus superblock.
+ * back a bogus superblock.
*/
static void journal_fail_superblock (journal_t *journal)
{
@@ -843,13 +843,13 @@
return 0;
}

-/**
+/**
* int journal_create() - Initialise the new journal file
* @journal: Journal to create. This structure must have been initialised
- *
+ *
* Given a journal_t structure which tells us which disk blocks we can
* use, create a new journal superblock and initialise all of the
- * journal fields from scratch.
+ * journal fields from scratch.
**/
int journal_create(journal_t *journal)
{
@@ -914,7 +914,7 @@
return journal_reset(journal);
}

-/**
+/**
* void journal_update_superblock() - Update journal sb on disk.
* @journal: The journal to update.
* @wait: Set to '0' if you don't want to wait for IO completion.
@@ -938,7 +938,7 @@
journal->j_transaction_sequence) {
jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
"(start %ld, seq %d, errno %d)\n",
- journal->j_tail, journal->j_tail_sequence,
+ journal->j_tail, journal->j_tail_sequence,
journal->j_errno);
goto out;
}
@@ -1061,7 +1061,7 @@
/**
* int journal_load() - Read journal from disk.
* @journal: Journal to act on.
- *
+ *
* Given a journal_t structure which tells us which disk blocks contain
* a journal, read the journal from disk to initialise the in-memory
* structures.
@@ -1164,9 +1164,9 @@
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
* @incompat: bitmask of incompatible features
- *
+ *
* Check whether the journal uses all of a given set of
- * features. Return true (non-zero) if it does.
+ * features. Return true (non-zero) if it does.
**/

int journal_check_used_features (journal_t *journal, unsigned long compat,
@@ -1195,7 +1195,7 @@
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
* @incompat: bitmask of incompatible features
- *
+ *
* Check whether the journaling code supports the use of
* all of a given set of features on this journal. Return true
* (non-zero) if it can. */
@@ -1233,7 +1233,7 @@
* @incompat: bitmask of incompatible features
*
* Mark a given journal feature as present on the
- * superblock. Returns true if the requested features could be set.
+ * superblock. Returns true if the requested features could be set.
*
*/

@@ -1319,7 +1319,7 @@
/**
* int journal_flush () - Flush journal
* @journal: Journal to act on.
- *
+ *
* Flush all data for a given journal to disk and empty the journal.
* Filesystems can use this when remounting readonly to ensure that
* recovery does not need to happen on remount.
@@ -1386,7 +1386,7 @@
* int journal_wipe() - Wipe journal contents
* @journal: Journal to act on.
* @write: flag (see below)
- *
+ *
* Wipe out all of the contents of a journal, safely. This will produce
* a warning if the journal contains any valid recovery information.
* Must be called between journal_init_*() and journal_load().
@@ -1441,7 +1441,7 @@

/*
* Journal abort has very specific semantics, which we describe
- * for journal abort.
+ * for journal abort.
*
* Two internal function, which provide abort to te jbd layer
* itself are here.
@@ -1496,7 +1496,7 @@
* Perform a complete, immediate shutdown of the ENTIRE
* journal (not of a single transaction). This operation cannot be
* undone without closing and reopening the journal.
- *
+ *
* The journal_abort function is intended to support higher level error
* recovery mechanisms such as the ext2/ext3 remount-readonly error
* mode.
@@ -1530,7 +1530,7 @@
* supply an errno; a null errno implies that absolutely no further
* writes are done to the journal (unless there are any already in
* progress).
- *
+ *
*/

void journal_abort(journal_t *journal, int errno)
@@ -1538,7 +1538,7 @@
__journal_abort_soft(journal, errno);
}

-/**
+/**
* int journal_errno () - returns the journal's error state.
* @journal: journal to examine.
*
@@ -1562,7 +1562,7 @@
return err;
}

-/**
+/**
* int journal_clear_err () - clears the journal's error state
* @journal: journal to act on.
*
@@ -1582,7 +1582,7 @@
return err;
}

-/**
+/**
* void journal_ack_err() - Ack journal err.
* @journal: journal to act on.
*
@@ -1604,7 +1604,7 @@

/*
* Simple support for retrying memory allocations. Introduced to help to
- * debug different VM deadlock avoidance strategies.
+ * debug different VM deadlock avoidance strategies.
*/
void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
{
diff -urN linux-2.6.18-rc4/fs/jbd/recovery.c linux-2.6.18-rc4-ws/fs/jbd/recovery.c
--- linux-2.6.18-rc4/fs/jbd/recovery.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/jbd/recovery.c 2006-08-10 22:50:12.252751988 -0700
@@ -1,6 +1,6 @@
/*
* linux/fs/recovery.c
- *
+ *
* Written by Stephen C. Tweedie <[email protected]>, 1999
*
* Copyright 1999-2000 Red Hat Software --- All Rights Reserved
@@ -10,7 +10,7 @@
* option, any later version, incorporated herein by reference.
*
* Journal recovery routines for the generic filesystem journaling code;
- * part of the ext2fs journaling system.
+ * part of the ext2fs journaling system.
*/

#ifndef __KERNEL__
@@ -25,9 +25,9 @@

/*
* Maintain information about the progress of the recovery job, so that
- * the different passes can carry information between them.
+ * the different passes can carry information between them.
*/
-struct recovery_info
+struct recovery_info
{
tid_t start_transaction;
tid_t end_transaction;
@@ -116,7 +116,7 @@
err = 0;

failed:
- if (nbufs)
+ if (nbufs)
journal_brelse_array(bufs, nbufs);
return err;
}
@@ -128,7 +128,7 @@
* Read a block from the journal
*/

-static int jread(struct buffer_head **bhp, journal_t *journal,
+static int jread(struct buffer_head **bhp, journal_t *journal,
unsigned int offset)
{
int err;
@@ -212,14 +212,14 @@
/**
* journal_recover - recovers a on-disk journal
* @journal: the journal to recover
- *
+ *
* The primary function for recovering the log contents when mounting a
- * journaled device.
+ * journaled device.
*
* Recovery is done in three passes. In the first pass, we look for the
* end of the log. In the second, we assemble the list of revoke
* blocks. In the third and final pass, we replay any un-revoked blocks
- * in the log.
+ * in the log.
*/
int journal_recover(journal_t *journal)
{
@@ -231,10 +231,10 @@
memset(&info, 0, sizeof(info));
sb = journal->j_superblock;

- /*
+ /*
* The journal superblock's s_start field (the current log head)
* is always zero if, and only if, the journal was cleanly
- * unmounted.
+ * unmounted.
*/

if (!sb->s_start) {
@@ -253,7 +253,7 @@
jbd_debug(0, "JBD: recovery, exit status %d, "
"recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
- jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+ jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);

/* Restart the log at the next transaction ID, thus invalidating
@@ -268,15 +268,15 @@
/**
* journal_skip_recovery - Start journal and wipe exiting records
* @journal: journal to startup
- *
+ *
* Locate any valid recovery information from the journal and set up the
* journal structures in memory to ignore it (presumably because the
- * caller has evidence that it is out of date).
+ * caller has evidence that it is out of date).
* This function does'nt appear to be exorted..
*
* We perform one pass over the journal to allow us to tell the user how
* much recovery information is being erased, and to let us initialise
- * the journal transaction sequence numbers to the next unused ID.
+ * the journal transaction sequence numbers to the next unused ID.
*/
int journal_skip_recovery(journal_t *journal)
{
@@ -297,7 +297,7 @@
#ifdef CONFIG_JBD_DEBUG
int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
#endif
- jbd_debug(0,
+ jbd_debug(0,
"JBD: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
journal->j_transaction_sequence = ++info.end_transaction;
@@ -324,10 +324,10 @@
MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
/ sizeof(journal_block_tag_t));

- /*
+ /*
* First thing is to establish what we expect to find in the log
* (in terms of transaction IDs), and where (in terms of log
- * block offsets): query the superblock.
+ * block offsets): query the superblock.
*/

sb = journal->j_superblock;
@@ -344,7 +344,7 @@
* Now we walk through the log, transaction by transaction,
* making sure that each transaction has a commit block in the
* expected place. Each complete transaction gets replayed back
- * into the main filesystem.
+ * into the main filesystem.
*/

while (1) {
@@ -379,8 +379,8 @@
next_log_block++;
wrap(journal, next_log_block);

- /* What kind of buffer is it?
- *
+ /* What kind of buffer is it?
+ *
* If it is a descriptor block, check that it has the
* expected sequence number. Otherwise, we're all done
* here. */
@@ -394,7 +394,7 @@

blocktype = be32_to_cpu(tmp->h_blocktype);
sequence = be32_to_cpu(tmp->h_sequence);
- jbd_debug(3, "Found magic %d, sequence %d\n",
+ jbd_debug(3, "Found magic %d, sequence %d\n",
blocktype, sequence);

if (sequence != next_commit_ID) {
@@ -438,7 +438,7 @@
/* Recover what we can, but
* report failure at the end. */
success = err;
- printk (KERN_ERR
+ printk (KERN_ERR
"JBD: IO error %d recovering "
"block %ld in log\n",
err, io_block);
@@ -452,7 +452,7 @@
* revoked, then we're all done
* here. */
if (journal_test_revoke
- (journal, blocknr,
+ (journal, blocknr,
next_commit_ID)) {
brelse(obh);
++info->nr_revoke_hits;
@@ -465,7 +465,7 @@
blocknr,
journal->j_blocksize);
if (nbh == NULL) {
- printk(KERN_ERR
+ printk(KERN_ERR
"JBD: Out of memory "
"during recovery.\n");
err = -ENOMEM;
@@ -537,7 +537,7 @@
}

done:
- /*
+ /*
* We broke out of the log scan loop: either we came to the
* known end of the log or we found an unexpected block in the
* log. If the latter happened, then we know that the "current"
@@ -567,7 +567,7 @@

/* Scan a revoke record, marking all blocks mentioned as revoked. */

-static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
tid_t sequence, struct recovery_info *info)
{
journal_revoke_header_t *header;
diff -urN linux-2.6.18-rc4/fs/jbd/revoke.c linux-2.6.18-rc4-ws/fs/jbd/revoke.c
--- linux-2.6.18-rc4/fs/jbd/revoke.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/jbd/revoke.c 2006-08-10 22:50:12.278749169 -0700
@@ -1,6 +1,6 @@
/*
* linux/fs/revoke.c
- *
+ *
* Written by Stephen C. Tweedie <[email protected]>, 2000
*
* Copyright 2000 Red Hat corp --- All Rights Reserved
@@ -15,10 +15,10 @@
* Revoke is the mechanism used to prevent old log records for deleted
* metadata from being replayed on top of newer data using the same
* blocks. The revoke mechanism is used in two separate places:
- *
+ *
* + Commit: during commit we write the entire list of the current
* transaction's revoked blocks to the journal
- *
+ *
* + Recovery: during recovery we record the transaction ID of all
* revoked blocks. If there are multiple revoke records in the log
* for a single block, only the last one counts, and if there is a log
@@ -29,7 +29,7 @@
* single transaction:
*
* Block is revoked and then journaled:
- * The desired end result is the journaling of the new block, so we
+ * The desired end result is the journaling of the new block, so we
* cancel the revoke before the transaction commits.
*
* Block is journaled and then revoked:
@@ -41,7 +41,7 @@
* transaction must have happened after the block was journaled and so
* the revoke must take precedence.
*
- * Block is revoked and then written as data:
+ * Block is revoked and then written as data:
* The data write is allowed to succeed, but the revoke is _not_
* cancelled. We still need to prevent old log records from
* overwriting the new data. We don't even need to clear the revoke
@@ -54,7 +54,7 @@
* buffer has not been revoked, and cancel_revoke
* need do nothing.
* RevokeValid set, Revoked set:
- * buffer has been revoked.
+ * buffer has been revoked.
*/

#ifndef __KERNEL__
@@ -77,7 +77,7 @@
journal replay, this involves recording the transaction ID of the
last transaction to revoke this block. */

-struct jbd_revoke_record_s
+struct jbd_revoke_record_s
{
struct list_head hash;
tid_t sequence; /* Used for recovery only */
@@ -90,8 +90,8 @@
{
/* It is conceivable that we might want a larger hash table
* for recovery. Must be a power of two. */
- int hash_size;
- int hash_shift;
+ int hash_size;
+ int hash_shift;
struct list_head *hash_table;
};

@@ -301,22 +301,22 @@

#ifdef __KERNEL__

-/*
+/*
* journal_revoke: revoke a given buffer_head from the journal. This
* prevents the block from being replayed during recovery if we take a
* crash after this current transaction commits. Any subsequent
* metadata writes of the buffer in this transaction cancel the
- * revoke.
+ * revoke.
*
* Note that this call may block --- it is up to the caller to make
* sure that there are no further calls to journal_write_metadata
* before the revoke is complete. In ext3, this implies calling the
* revoke before clearing the block bitmap when we are deleting
- * metadata.
+ * metadata.
*
* Revoke performs a journal_forget on any buffer_head passed in as a
* parameter, but does _not_ forget the buffer_head if the bh was only
- * found implicitly.
+ * found implicitly.
*
* bh_in may not be a journalled buffer - it may have come off
* the hash tables without an attached journal_head.
@@ -325,7 +325,7 @@
* by one.
*/

-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int journal_revoke(handle_t *handle, unsigned long blocknr,
struct buffer_head *bh_in)
{
struct buffer_head *bh = NULL;
@@ -487,7 +487,7 @@
else
journal->j_revoke = journal->j_revoke_table[0];

- for (i = 0; i < journal->j_revoke->hash_size; i++)
+ for (i = 0; i < journal->j_revoke->hash_size; i++)
INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
}

@@ -498,7 +498,7 @@
* Called with the journal lock held.
*/

-void journal_write_revoke_records(journal_t *journal,
+void journal_write_revoke_records(journal_t *journal,
transaction_t *transaction)
{
struct journal_head *descriptor;
@@ -507,7 +507,7 @@
struct list_head *hash_list;
int i, offset, count;

- descriptor = NULL;
+ descriptor = NULL;
offset = 0;
count = 0;

@@ -519,10 +519,10 @@
hash_list = &revoke->hash_table[i];

while (!list_empty(hash_list)) {
- record = (struct jbd_revoke_record_s *)
+ record = (struct jbd_revoke_record_s *)
hash_list->next;
write_one_revoke_record(journal, transaction,
- &descriptor, &offset,
+ &descriptor, &offset,
record);
count++;
list_del(&record->hash);
@@ -534,14 +534,14 @@
jbd_debug(1, "Wrote %d revoke records\n", count);
}

-/*
+/*
* Write out one revoke record. We need to create a new descriptor
- * block if the old one is full or if we have not already created one.
+ * block if the old one is full or if we have not already created one.
*/

-static void write_one_revoke_record(journal_t *journal,
+static void write_one_revoke_record(journal_t *journal,
transaction_t *transaction,
- struct journal_head **descriptorp,
+ struct journal_head **descriptorp,
int *offsetp,
struct jbd_revoke_record_s *record)
{
@@ -584,21 +584,21 @@
*descriptorp = descriptor;
}

- * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+ * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
cpu_to_be32(record->blocknr);
offset += 4;
*offsetp = offset;
}

-/*
+/*
* Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to
* be waited for during commit, so it has to go onto the appropriate
* journal buffer list.
*/

-static void flush_descriptor(journal_t *journal,
- struct journal_head *descriptor,
+static void flush_descriptor(journal_t *journal,
+ struct journal_head *descriptor,
int offset)
{
journal_revoke_header_t *header;
@@ -618,7 +618,7 @@
}
#endif

-/*
+/*
* Revoke support for recovery.
*
* Recovery needs to be able to:
@@ -629,7 +629,7 @@
* check whether a given block in a given transaction should be replayed
* (ie. has not been revoked by a revoke record in that or a subsequent
* transaction)
- *
+ *
* empty the revoke table after recovery.
*/

@@ -637,11 +637,11 @@
* First, setting revoke records. We create a new revoke record for
* every block ever revoked in the log as we scan it for recovery, and
* we update the existing records if we find multiple revokes for a
- * single block.
+ * single block.
*/

-int journal_set_revoke(journal_t *journal,
- unsigned long blocknr,
+int journal_set_revoke(journal_t *journal,
+ unsigned long blocknr,
tid_t sequence)
{
struct jbd_revoke_record_s *record;
@@ -653,18 +653,18 @@
if (tid_gt(sequence, record->sequence))
record->sequence = sequence;
return 0;
- }
+ }
return insert_revoke_hash(journal, blocknr, sequence);
}

-/*
+/*
* Test revoke records. For a given block referenced in the log, has
* that block been revoked? A revoke record with a given transaction
* sequence number revokes all blocks in that transaction and earlier
* ones, but later transactions still need replayed.
*/

-int journal_test_revoke(journal_t *journal,
+int journal_test_revoke(journal_t *journal,
unsigned long blocknr,
tid_t sequence)
{
diff -urN linux-2.6.18-rc4/fs/jbd/transaction.c linux-2.6.18-rc4-ws/fs/jbd/transaction.c
--- linux-2.6.18-rc4/fs/jbd/transaction.c 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/fs/jbd/transaction.c 2006-08-10 22:50:12.309745809 -0700
@@ -1,6 +1,6 @@
/*
* linux/fs/transaction.c
- *
+ *
* Written by Stephen C. Tweedie <[email protected]>, 1998
*
* Copyright 1998 Red Hat corp --- All Rights Reserved
@@ -10,7 +10,7 @@
* option, any later version, incorporated herein by reference.
*
* Generic filesystem transaction handling code; part of the ext2fs
- * journaling system.
+ * journaling system.
*
* This file manages transactions (compound commits managed by the
* journaling code) and handles (individual atomic operations by the
@@ -74,7 +74,7 @@
* start_this_handle: Given a handle, deal with any locking or stalling
* needed to make sure that there is enough journal space for the handle
* to begin. Attach the handle to a transaction and set up the
- * transaction's buffer credits.
+ * transaction's buffer credits.
*/

static int start_this_handle(journal_t *journal, handle_t *handle)
@@ -117,7 +117,7 @@
if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
spin_unlock(&journal->j_state_lock);
- ret = -EROFS;
+ ret = -EROFS;
goto out;
}

@@ -182,7 +182,7 @@
goto repeat;
}

- /*
+ /*
* The commit code assumes that it can get enough log space
* without forcing a checkpoint. This is *critical* for
* correctness: a checkpoint of a buffer which is also
@@ -191,7 +191,7 @@
*
* We must therefore ensure the necessary space in the journal
* *before* starting to dirty potentially checkpointed buffers
- * in the new transaction.
+ * in the new transaction.
*
* The worst part is, any transaction currently committing can
* reduce the free space arbitrarily. Be careful to account for
@@ -246,13 +246,13 @@
}

/**
- * handle_t *journal_start() - Obtain a new handle.
+ * handle_t *journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
*
* We make sure that the transaction can guarantee at least nblocks of
* modified buffers in the log. We block until the log can guarantee
- * that much space.
+ * that much space.
*
* This function is visible to journal users (like ext3fs), so is not
* called with the journal already locked.
@@ -292,11 +292,11 @@
* int journal_extend() - extend buffer credits.
* @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by.
- *
+ *
* Some transactions, such as large extends and truncates, can be done
* atomically all at once or in several stages. The operation requests
* a credit for a number of buffer modications in advance, but can
- * extend its credit if it needs more.
+ * extend its credit if it needs more.
*
* journal_extend tries to give the running handle more buffer credits.
* It does not guarantee that allocation - this is a best-effort only.
@@ -363,7 +363,7 @@
* int journal_restart() - restart a handle .
* @handle: handle to restart
* @nblocks: nr credits requested
- *
+ *
* Restart a handle for a multi-transaction filesystem
* operation.
*
@@ -462,7 +462,7 @@
/**
* void journal_unlock_updates (journal_t* journal) - release barrier
* @journal: Journal to release the barrier on.
- *
+ *
* Release a transaction barrier obtained with journal_lock_updates().
*
* Should be called without the journal lock held.
@@ -547,8 +547,8 @@
jbd_lock_bh_state(bh);

/* We now hold the buffer lock so it is safe to query the buffer
- * state. Is the buffer dirty?
- *
+ * state. Is the buffer dirty?
+ *
* If so, there are two possibilities. The buffer may be
* non-journaled, and undergoing a quite legitimate writeback.
* Otherwise, it is journaled, and we don't expect dirty buffers
@@ -566,7 +566,7 @@
*/
if (jh->b_transaction) {
J_ASSERT_JH(jh,
- jh->b_transaction == transaction ||
+ jh->b_transaction == transaction ||
jh->b_transaction ==
journal->j_committing_transaction);
if (jh->b_next_transaction)
@@ -653,7 +653,7 @@
* buffer had better remain locked during the kmalloc,
* but that should be true --- we hold the journal lock
* still and the buffer is already on the BUF_JOURNAL
- * list so won't be flushed.
+ * list so won't be flushed.
*
* Subtle point, though: if this is a get_undo_access,
* then we will be relying on the frozen_data to contain
@@ -764,8 +764,8 @@
* manually rather than reading off disk), then we need to keep the
* buffer_head locked until it has been completely filled with new
* data. In this case, we should be able to make the assertion that
- * the bh is not already part of an existing transaction.
- *
+ * the bh is not already part of an existing transaction.
+ *
* The buffer should already be locked by the caller by this point.
* There is no lock ranking violation: it was a newly created,
* unlocked buffer beforehand. */
@@ -777,7 +777,7 @@
*
* Call this if you create a new bh.
*/
-int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
@@ -846,13 +846,13 @@
* do not reuse freed space until the deallocation has been committed,
* since if we overwrote that space we would make the delete
* un-rewindable in case of a crash.
- *
+ *
* To deal with that, journal_get_undo_access requests write access to a
* buffer for parts of non-rewindable operations such as delete
* operations on the bitmaps. The journaling code must keep a copy of
* the buffer's contents prior to the undo_access call until such time
* as we know that the buffer has definitely been committed to disk.
- *
+ *
* We never need to know which transaction the committed data is part
* of, buffers touched here are guaranteed to be dirtied later and so
* will be committed to a new transaction in due course, at which point
@@ -910,13 +910,13 @@
return err;
}

-/**
+/**
* int journal_dirty_data() - mark a buffer as containing dirty data which
* needs to be flushed before we can commit the
- * current transaction.
+ * current transaction.
* @handle: transaction
* @bh: bufferhead to mark
- *
+ *
* The buffer is placed on the transaction's data list and is marked as
* belonging to the transaction.
*
@@ -945,15 +945,15 @@

/*
* What if the buffer is already part of a running transaction?
- *
+ *
* There are two cases:
* 1) It is part of the current running transaction. Refile it,
* just in case we have allocated it as metadata, deallocated
- * it, then reallocated it as data.
+ * it, then reallocated it as data.
* 2) It is part of the previous, still-committing transaction.
* If all we want to do is to guarantee that the buffer will be
* written to disk before this new transaction commits, then
- * being sure that the *previous* transaction has this same
+ * being sure that the *previous* transaction has this same
* property is sufficient for us! Just leave it on its old
* transaction.
*
@@ -1075,18 +1075,18 @@
return 0;
}

-/**
+/**
* int journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
- * @bh: buffer to mark
- *
+ * @bh: buffer to mark
+ *
* mark dirty metadata which needs to be journaled as part of the current
* transaction.
*
* The buffer is placed on the transaction's metadata list and is marked
- * as belonging to the transaction.
+ * as belonging to the transaction.
*
- * Returns error number or 0 on success.
+ * Returns error number or 0 on success.
*
* Special care needs to be taken if the buffer already belongs to the
* current committing transaction (in which case we should have frozen
@@ -1134,11 +1134,11 @@

set_buffer_jbddirty(bh);

- /*
+ /*
* Metadata already on the current transaction list doesn't
* need to be filed. Metadata on another transaction's list must
* be committing, and will be refiled once the commit completes:
- * leave it alone for now.
+ * leave it alone for now.
*/
if (jh->b_transaction != transaction) {
JBUFFER_TRACE(jh, "already on other transaction");
@@ -1164,7 +1164,7 @@
return 0;
}

-/*
+/*
* journal_release_buffer: undo a get_write_access without any buffer
* updates, if the update decided in the end that it didn't need access.
*
@@ -1175,20 +1175,20 @@
BUFFER_TRACE(bh, "entry");
}

-/**
+/**
* void journal_forget() - bforget() for potentially-journaled buffers.
* @handle: transaction handle
* @bh: bh to 'forget'
*
* We can only do the bforget if there are no commits pending against the
* buffer. If the buffer is dirty in the current running transaction we
- * can safely unlink it.
+ * can safely unlink it.
*
* bh may not be a journalled buffer at all - it may be a non-JBD
* buffer which came off the hashtable. Check for this.
*
* Decrements bh->b_count by one.
- *
+ *
* Allow this call even if the handle has aborted --- it may be part of
* the caller's cleanup after an abort.
*/
@@ -1236,7 +1236,7 @@

drop_reserve = 1;

- /*
+ /*
* We are no longer going to journal this buffer.
* However, the commit of this transaction is still
* important to the buffer: the delete that we are now
@@ -1245,7 +1245,7 @@
*
* So, if we have a checkpoint on the buffer, we should
* now refile the buffer on our BJ_Forget list so that
- * we know to remove the checkpoint after we commit.
+ * we know to remove the checkpoint after we commit.
*/

if (jh->b_cp_transaction) {
@@ -1263,7 +1263,7 @@
}
}
} else if (jh->b_transaction) {
- J_ASSERT_JH(jh, (jh->b_transaction ==
+ J_ASSERT_JH(jh, (jh->b_transaction ==
journal->j_committing_transaction));
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
@@ -1293,7 +1293,7 @@
/**
* int journal_stop() - complete a transaction
* @handle: tranaction to complete.
- *
+ *
* All done for a particular handle.
*
* There is not much action needed here. We just return any remaining
@@ -1302,7 +1302,7 @@
* filesystem is marked for synchronous update.
*
* journal_stop itself will not usually return an error, but it may
- * do so in unusual circumstances. In particular, expect it to
+ * do so in unusual circumstances. In particular, expect it to
* return -EIO if a journal_abort has been executed since the
* transaction began.
*/
@@ -1387,7 +1387,7 @@

/*
* Special case: JFS_SYNC synchronous updates require us
- * to wait for the commit to complete.
+ * to wait for the commit to complete.
*/
if (handle->h_sync && !(current->flags & PF_MEMALLOC))
err = log_wait_commit(journal, tid);
@@ -1438,7 +1438,7 @@
* jbd_lock_bh_state(jh2bh(jh)) is held.
*/

-static inline void
+static inline void
__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
{
if (!*list) {
@@ -1453,7 +1453,7 @@
}
}

-/*
+/*
* Remove a buffer from a transaction list, given the transaction's list
* head pointer.
*
@@ -1474,7 +1474,7 @@
jh->b_tnext->b_tprev = jh->b_tprev;
}

-/*
+/*
* Remove a buffer from the appropriate transaction list.
*
* Note that this function can *change* the value of
@@ -1594,17 +1594,17 @@
}


-/**
+/**
* int journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
* @unused_gfp_mask: unused
*
- *
+ *
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
* so try_to_free_buffers() can reap them.
- *
+ *
* This function returns non-zero if we wish try_to_free_buffers()
* to be called. We do this if the page is releasable by try_to_free_buffers().
* We also do it if the page has locked or dirty buffers and the caller wants
@@ -1628,7 +1628,7 @@
* cannot happen because we never reallocate freed data as metadata
* while the data is part of a transaction. Yes?
*/
-int journal_try_to_free_buffers(journal_t *journal,
+int journal_try_to_free_buffers(journal_t *journal,
struct page *page, gfp_t unused_gfp_mask)
{
struct buffer_head *head;
@@ -1696,7 +1696,7 @@
}

/*
- * journal_invalidatepage
+ * journal_invalidatepage
*
* This code is tricky. It has a number of cases to deal with.
*
@@ -1704,15 +1704,15 @@
*
* i_size must be updated on disk before we start calling invalidatepage on the
* data.
- *
+ *
* This is done in ext3 by defining an ext3_setattr method which
* updates i_size before truncate gets going. By maintaining this
* invariant, we can be sure that it is safe to throw away any buffers
* attached to the current transaction: once the transaction commits,
* we know that the data will not be needed.
- *
+ *
* Note however that we can *not* throw away data belonging to the
- * previous, committing transaction!
+ * previous, committing transaction!
*
* Any disk blocks which *are* part of the previous, committing
* transaction (and which therefore cannot be discarded immediately) are
@@ -1731,7 +1731,7 @@
* don't make guarantees about the order in which data hits disk --- in
* particular we don't guarantee that new dirty data is flushed before
* transaction commit --- so it is always safe just to discard data
- * immediately in that mode. --sct
+ * immediately in that mode. --sct
*/

/*
@@ -1875,9 +1875,9 @@
return may_free;
}

-/**
+/**
* void journal_invalidatepage()
- * @journal: journal to use for flush...
+ * @journal: journal to use for flush...
* @page: page to flush
* @offset: length of page to invalidate.
*
@@ -1885,7 +1885,7 @@
*
*/
void journal_invalidatepage(journal_t *journal,
- struct page *page,
+ struct page *page,
unsigned long offset)
{
struct buffer_head *head, *bh, *next;
@@ -1923,8 +1923,8 @@
}
}

-/*
- * File a buffer on the given transaction list.
+/*
+ * File a buffer on the given transaction list.
*/
void __journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist)
@@ -1947,7 +1947,7 @@
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
* state. */

- if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+ if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
if (test_clear_buffer_dirty(bh) ||
test_clear_buffer_jbddirty(bh))
@@ -2007,7 +2007,7 @@
jbd_unlock_bh_state(jh2bh(jh));
}

-/*
+/*
* Remove a buffer from its current buffer list in preparation for
* dropping it from its current transaction entirely. If the buffer has
* already started to be used by a subsequent transaction, refile the
@@ -2059,7 +2059,7 @@
* to the caller to remove the journal_head if necessary. For the
* unlocked journal_refile_buffer call, the caller isn't going to be
* doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
+ * ourselves to avoid a jh leak.
*
* *** The journal_head may be freed by this call! ***
*/
diff -urN linux-2.6.18-rc4/include/linux/ext3_jbd.h linux-2.6.18-rc4-ws/include/linux/ext3_jbd.h
--- linux-2.6.18-rc4/include/linux/ext3_jbd.h 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/include/linux/ext3_jbd.h 2006-08-10 22:50:58.738719098 -0700
@@ -23,7 +23,7 @@

/* Define the number of blocks we need to account to a transaction to
* modify one block of data.
- *
+ *
* We may have to touch one inode, one bitmap buffer, up to three
* indirection blocks, the group and superblock summaries, and the data
* block to complete the transaction. */
@@ -88,16 +88,16 @@
#endif

int
-ext3_mark_iloc_dirty(handle_t *handle,
+ext3_mark_iloc_dirty(handle_t *handle,
struct inode *inode,
struct ext3_iloc *iloc);

-/*
+/*
* On success, We end up with an outstanding reference count against
- * iloc->bh. This _must_ be cleaned up later.
+ * iloc->bh. This _must_ be cleaned up later.
*/

-int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
struct ext3_iloc *iloc);

int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
diff -urN linux-2.6.18-rc4/include/linux/jbd.h linux-2.6.18-rc4-ws/include/linux/jbd.h
--- linux-2.6.18-rc4/include/linux/jbd.h 2006-08-06 11:20:11.000000000 -0700
+++ linux-2.6.18-rc4-ws/include/linux/jbd.h 2006-08-10 22:50:12.336742882 -0700
@@ -1,6 +1,6 @@
/*
* linux/include/linux/jbd.h
- *
+ *
* Written by Stephen C. Tweedie <[email protected]>
*
* Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
@@ -94,8 +94,8 @@
* number of outstanding buffers possible at any time. When the
* operation completes, any buffer credits not used are credited back to
* the transaction, so that at all times we know how many buffers the
- * outstanding updates on a transaction might possibly touch.
- *
+ * outstanding updates on a transaction might possibly touch.
+ *
* This is an opaque datatype.
**/
typedef struct handle_s handle_t; /* Atomic operation type */
@@ -105,7 +105,7 @@
* typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem.
*
* journal_t is linked to from the fs superblock structure.
- *
+ *
* We use the journal_t to keep track of all outstanding transaction
* activity on the filesystem, and to manage the state of the log
* writing process.
@@ -125,7 +125,7 @@
* On-disk structures
*/

-/*
+/*
* Descriptor block types:
*/

@@ -146,8 +146,8 @@
} journal_header_t;


-/*
- * The block tag: used to describe a single buffer in the journal
+/*
+ * The block tag: used to describe a single buffer in the journal
*/
typedef struct journal_block_tag_s
{
@@ -155,9 +155,9 @@
__be32 t_flags; /* See below */
} journal_block_tag_t;

-/*
+/*
* The revoke descriptor: used on disk to describe a series of blocks to
- * be revoked from the log
+ * be revoked from the log
*/
typedef struct journal_revoke_header_s
{
@@ -371,10 +371,10 @@
**/

/* Docbook can't yet cope with the bit fields, but will leave the documentation
- * in so it can be fixed later.
+ * in so it can be fixed later.
*/

-struct handle_s
+struct handle_s
{
/* Which compound transaction is this update a part of? */
transaction_t *h_transaction;
@@ -432,7 +432,7 @@
*
*/

-struct transaction_s
+struct transaction_s
{
/* Pointer to the journal for this transaction. [no locking] */
journal_t *t_journal;
@@ -452,7 +452,7 @@
T_RUNDOWN,
T_FLUSH,
T_COMMIT,
- T_FINISHED
+ T_FINISHED
} t_state;

/*
@@ -566,7 +566,7 @@
* journal_t.
* @j_flags: General journaling state flags
* @j_errno: Is there an outstanding uncleared error on the journal (from a
- * prior abort)?
+ * prior abort)?
* @j_sb_buffer: First part of superblock buffer
* @j_superblock: Second part of superblock buffer
* @j_format_version: Version of the superblock format
@@ -580,7 +580,7 @@
* @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
* to start committing, or for a barrier lock to be released
* @j_wait_logspace: Wait queue for waiting for checkpointing to complete
- * @j_wait_done_commit: Wait queue for waiting for commit to complete
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete
* @j_wait_checkpoint: Wait queue to trigger checkpointing
* @j_wait_commit: Wait queue to trigger commit
* @j_wait_updates: Wait queue to wait for updates to complete
@@ -589,7 +589,7 @@
* @j_tail: Journal tail - identifies the oldest still-used block in the
* journal.
* @j_free: Journal free - how many free blocks are there in the journal?
- * @j_first: The block number of the first usable block
+ * @j_first: The block number of the first usable block
* @j_last: The block number one beyond the last usable block
* @j_dev: Device where we store the journal
* @j_blocksize: blocksize for the location where we store the journal.
@@ -601,12 +601,12 @@
* @j_list_lock: Protects the buffer lists and internal buffer state.
* @j_inode: Optional inode where we store the journal. If present, all journal
* block numbers are mapped into this inode via bmap().
- * @j_tail_sequence: Sequence number of the oldest transaction in the log
+ * @j_tail_sequence: Sequence number of the oldest transaction in the log
* @j_transaction_sequence: Sequence number of the next transaction to grant
* @j_commit_sequence: Sequence number of the most recently committed
* transaction
* @j_commit_request: Sequence number of the most recent transaction wanting
- * commit
+ * commit
* @j_uuid: Uuid of client object.
* @j_task: Pointer to the current commit thread for this journal
* @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a
@@ -820,8 +820,8 @@
void *j_private;
};

-/*
- * Journal flag definitions
+/*
+ * Journal flag definitions
*/
#define JFS_UNMOUNT 0x001 /* Journal thread is being destroyed */
#define JFS_ABORT 0x002 /* Journaling has been aborted for errors. */
@@ -830,7 +830,7 @@
#define JFS_LOADED 0x010 /* The journal superblock has been loaded */
#define JFS_BARRIER 0x020 /* Use IDE barriers */

-/*
+/*
* Function declarations for the journaling transaction and buffer
* management
*/
@@ -859,7 +859,7 @@
void __journal_insert_checkpoint(struct journal_head *, transaction_t *);

/* Buffer IO */
-extern int
+extern int
journal_write_metadata_buffer(transaction_t *transaction,
struct journal_head *jh_in,
struct journal_head **jh_out,
@@ -887,7 +887,7 @@
/* The journaling code user interface:
*
* Create and destroy handles
- * Register buffer modifications against the current transaction.
+ * Register buffer modifications against the current transaction.
*/

extern handle_t *journal_start(journal_t *, int nblocks);
@@ -914,11 +914,11 @@
int start, int len, int bsize);
extern journal_t * journal_init_inode (struct inode *);
extern int journal_update_format (journal_t *);
-extern int journal_check_used_features
+extern int journal_check_used_features
(journal_t *, unsigned long, unsigned long, unsigned long);
-extern int journal_check_available_features
+extern int journal_check_available_features
(journal_t *, unsigned long, unsigned long, unsigned long);
-extern int journal_set_features
+extern int journal_set_features
(journal_t *, unsigned long, unsigned long, unsigned long);
extern int journal_create (journal_t *);
extern int journal_load (journal_t *journal);
@@ -1012,7 +1012,7 @@
* bit, when set, indicates that we have had a fatal error somewhere,
* either inside the journaling layer or indicated to us by the client
* (eg. ext3), and that we and should not commit any further
- * transactions.
+ * transactions.
*/

static inline int is_journal_aborted(journal_t *journal)
@@ -1079,7 +1079,7 @@
#define BJ_Reserved 7 /* Buffer is reserved for access by journal */
#define BJ_Locked 8 /* Locked for I/O during commit */
#define BJ_Types 9
-
+
extern int jbd_blocks_per_page(struct inode *inode);

#ifdef __KERNEL__


2006-08-12 05:59:54

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Fri, 11 Aug 2006 16:00:02 -0700 Andrew Morton wrote:

> On Fri, 11 Aug 2006 13:57:37 -0700
> "Randy.Dunlap" <[email protected]> wrote:
>
> > On Thu, 10 Aug 2006 13:29:56 +0400 Alex Tomas wrote:
> >
> > > AM> - The existing comments could benefit from some rework by a
> > > AM> native English speaker.
> > >
> > > could someone assist here, please?
> >
> > See if this helps.
>
> Thanks, Randy. The Kconfig help text could do with some help too,
> if you're feeling keen..

Uh, yes. Well, I don't really care for the "ext3dev" name, but
I tried to ignore that "feature" and fix it up anyway.
Feel free to ignore any parts that you don't want.


---
From: Randy Dunlap <[email protected]>

Clean up help text and module names in ext4 & jbd2 Kconfig entries.
Add "depends on EXPERIMENTAL".

Signed-off-by: Randy Dunlap <[email protected]>
---
fs/Kconfig | 59 ++++++++++++++++++++++++++++++-----------------------------
1 files changed, 30 insertions(+), 29 deletions(-)

--- linux-2618-rc4-ext4.orig/fs/Kconfig
+++ linux-2618-rc4-ext4/fs/Kconfig
@@ -139,28 +139,29 @@ config EXT3_FS_SECURITY
extended attributes for file security labels, say N.

config EXT3DEV_FS
- tristate "Developmenting extended fs support"
+ tristate "Ext3dev/ext4 extended fs support development"
+ depends on EXPERIMENTAL
select JBD2
help
- Ext3dev is a precede filesystem toward next generation
- of extended fs, based on ext3 filesystem code. It will be
- renamed ext4 fs later once this ext3dev is mature and stabled.
+ Ext3dev is a predecessor filesystem of the next generation
+ extended fs ext4, based on ext3 filesystem code. It will be
+ renamed ext4 fs later, once ext3dev is mature and stabled.

Unlike the change from ext2 filesystem to ext3 filesystem,
the on-disk format of ext3dev is not the same as ext3 any more:
- it is based on extent maps and it support 48 bit physical block
+ it is based on extent maps and it supports 48-bit physical block
numbers. These combined on-disk format changes will allow
- ext3dev/ext4 to handle more than 16TB filesystem volume --
- a hard limit that ext3 can not overcome without changing
+ ext3dev/ext4 to handle more than 16 TB filesystem volumes --
+ a hard limit that ext3 cannot overcome without changing the
on-disk format.

- Other than extent maps and 48 bit block number, ext3dev also is
+ Other than extent maps and 48-bit block number, ext3dev also is
likely to have other new features such as persistent preallocation,
- high resolution time stamps and larger file support etc. These
+ high resolution time stamps, and larger file support etc. These
features will be added to ext3dev gradually.

- To compile this file system support as a module, choose M here: the
- module will be called ext2. Be aware however that the file system
+ To compile this file system support as a module, choose M here. The
+ module will be called ext3dev. Be aware, however, that the filesystem
of your root partition (the one containing the directory /) cannot
be compiled as a module, and so this could be dangerous.

@@ -177,17 +178,17 @@ config EXT3DEV_FS_XATTR

If unsure, say N.

- You need this for POSIX ACL support on ext3.
+ You need this for POSIX ACL support on ext3dev/ext4.

config EXT3DEV_FS_POSIX_ACL
bool "Ext3dev POSIX Access Control Lists"
depends on EXT3DEV_FS_XATTR
select FS_POSIX_ACL
help
- Posix Access Control Lists (ACLs) support permissions for users and
+ POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.

- To learn more about Access Control Lists, visit the Posix ACLs for
+ To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.

If you don't know what Access Control Lists are, say N
@@ -199,7 +200,7 @@ config EXT3DEV_FS_SECURITY
Security labels support alternative access control models
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
- labels in the ext3 filesystem.
+ labels in the ext3dev/ext4 filesystem.

If you are not using a security module that requires using
extended attributes for file security labels, say N.
@@ -240,31 +241,31 @@ config JBD2
tristate
help
This is a generic journaling layer for block devices that support
- both 32 bit and 64 bit block numbers. It is currently used by
- the ext3dev/ext4 file system, but it could also be used to add
+ both 32-bit and 64-bit block numbers. It is currently used by
+ the ext3dev/ext4 filesystem, but it could also be used to add
journal support to other file systems or block devices such
- as RAID or LVM.
+ as RAID or LVM.

- If you are using the ext4, you need to say Y here. If you are not
- using ext4 then you will probably want to say N.
+ If you are using ext3dev/ext4, you need to say Y here. If you are not
+ using ext3dev/ext4 then you will probably want to say N.

- To compile this device as a module, choose M here: the module will be
- called jbd. If you are compiling ext4 into the kernel,
+ To compile this device as a module, choose M here. The module will be
+ called jbd2. If you are compiling ext3dev/ext4 into the kernel,
you cannot compile this code as a module.

config JBD2_DEBUG
- bool "JBD2 (ext4) debugging support"
+ bool "JBD2 (ext3dev/ext4) debugging support"
depends on JBD2
help
- If you are using the ext4 journaled file system (or potentially any
- other file system/device using JBD2), this option allows you to
- enable debugging output while the system is running, in order to
- help track down any problems you are having. By default the
- debugging output will be turned off.
+ If you are using the ext3dev/ext4 journaled file system (or
+ potentially any other filesystem/device using JBD2), this option
+ allows you to enable debugging output while the system is running,
+ in order to help track down any problems you are having.
+ By default the debugging output will be turned off.

If you select Y here, then you will be able to turn on debugging
with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
- 1 and 5, the higher the number, the more debugging output is
+ 1 and 5. The higher the number, the more debugging output is
generated. To turn debugging off again, do
"echo 0 > /proc/sys/fs/jbd2-debug".

2006-08-12 17:43:11

by djwong

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

Randy.Dunlap wrote:

> Uh, yes. Well, I don't really care for the "ext3dev" name, but
> I tried to ignore that "feature" and fix it up anyway.
> Feel free to ignore any parts that you don't want.

Three nits to pick:

> + renamed ext4 fs later, once ext3dev is mature and stabled.

I think you want "stabilized", not "stabled".

(Until someone writes horsefs, that is. ;))

> + Other than extent maps and 48-bit block number, ext3dev also is

"...48-bit block numbers..."

> + By default the debugging output will be turned off.

"By default, the..."

--D

2006-08-12 18:17:31

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Sat, 12 Aug 2006 10:43:04 -0700 Darrick J. Wong wrote:

> Randy.Dunlap wrote:
>
> > Uh, yes. Well, I don't really care for the "ext3dev" name, but
> > I tried to ignore that "feature" and fix it up anyway.
> > Feel free to ignore any parts that you don't want.
>
> Three nits to pick:
>
> > + renamed ext4 fs later, once ext3dev is mature and
> > stabled.
>
> I think you want "stabilized", not "stabled".
>
> (Until someone writes horsefs, that is. ;))
>
> > + Other than extent maps and 48-bit block number,
> > ext3dev also is
>
> "...48-bit block numbers..."
>
> > + By default the debugging output will be turned off.
>
> "By default, the..."

Thanks, all fixed, although I think that the comma on the last
one is optional. New patch is below, although what I would
really prefer to see is this:

- Drop the "ext3dev" name. Use "ext4dev" temporarily, then
switch to "ext4".

---
From: Randy Dunlap <[email protected]>

Clean up help text and module names in ext4 & jbd2 Kconfig entries.
Add "depends on EXPERIMENTAL".

Signed-off-by: Randy Dunlap <[email protected]>
---
fs/Kconfig | 59 ++++++++++++++++++++++++++++++-----------------------------
1 files changed, 30 insertions(+), 29 deletions(-)

--- linux-2618-rc4-ext4.orig/fs/Kconfig
+++ linux-2618-rc4-ext4/fs/Kconfig
@@ -139,28 +139,29 @@ config EXT3_FS_SECURITY
extended attributes for file security labels, say N.

config EXT3DEV_FS
- tristate "Developmenting extended fs support"
+ tristate "Ext3dev/ext4 extended fs support development (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
select JBD2
help
- Ext3dev is a precede filesystem toward next generation
- of extended fs, based on ext3 filesystem code. It will be
- renamed ext4 fs later once this ext3dev is mature and stabled.
+ Ext3dev is a predecessor filesystem of the next generation
+ extended fs ext4, based on ext3 filesystem code. It will be
+ renamed ext4 fs later, once ext3dev is mature and stabilized.

Unlike the change from ext2 filesystem to ext3 filesystem,
the on-disk format of ext3dev is not the same as ext3 any more:
- it is based on extent maps and it support 48 bit physical block
+ it is based on extent maps and it supports 48-bit physical block
numbers. These combined on-disk format changes will allow
- ext3dev/ext4 to handle more than 16TB filesystem volume --
- a hard limit that ext3 can not overcome without changing
+ ext3dev/ext4 to handle more than 16 TB filesystem volumes --
+ a hard limit that ext3 cannot overcome without changing the
on-disk format.

- Other than extent maps and 48 bit block number, ext3dev also is
+ Other than extent maps and 48-bit block numbers, ext3dev also is
likely to have other new features such as persistent preallocation,
- high resolution time stamps and larger file support etc. These
+ high resolution time stamps, and larger file support etc. These
features will be added to ext3dev gradually.

- To compile this file system support as a module, choose M here: the
- module will be called ext2. Be aware however that the file system
+ To compile this file system support as a module, choose M here. The
+ module will be called ext3dev. Be aware, however, that the filesystem
of your root partition (the one containing the directory /) cannot
be compiled as a module, and so this could be dangerous.

@@ -177,17 +178,17 @@ config EXT3DEV_FS_XATTR

If unsure, say N.

- You need this for POSIX ACL support on ext3.
+ You need this for POSIX ACL support on ext3dev/ext4.

config EXT3DEV_FS_POSIX_ACL
bool "Ext3dev POSIX Access Control Lists"
depends on EXT3DEV_FS_XATTR
select FS_POSIX_ACL
help
- Posix Access Control Lists (ACLs) support permissions for users and
+ POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.

- To learn more about Access Control Lists, visit the Posix ACLs for
+ To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.

If you don't know what Access Control Lists are, say N
@@ -199,7 +200,7 @@ config EXT3DEV_FS_SECURITY
Security labels support alternative access control models
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
- labels in the ext3 filesystem.
+ labels in the ext3dev/ext4 filesystem.

If you are not using a security module that requires using
extended attributes for file security labels, say N.
@@ -240,31 +241,31 @@ config JBD2
tristate
help
This is a generic journaling layer for block devices that support
- both 32 bit and 64 bit block numbers. It is currently used by
- the ext3dev/ext4 file system, but it could also be used to add
+ both 32-bit and 64-bit block numbers. It is currently used by
+ the ext3dev/ext4 filesystem, but it could also be used to add
journal support to other file systems or block devices such
- as RAID or LVM.
+ as RAID or LVM.

- If you are using the ext4, you need to say Y here. If you are not
- using ext4 then you will probably want to say N.
+ If you are using ext3dev/ext4, you need to say Y here. If you are not
+ using ext3dev/ext4 then you will probably want to say N.

- To compile this device as a module, choose M here: the module will be
- called jbd. If you are compiling ext4 into the kernel,
+ To compile this device as a module, choose M here. The module will be
+ called jbd2. If you are compiling ext3dev/ext4 into the kernel,
you cannot compile this code as a module.

config JBD2_DEBUG
- bool "JBD2 (ext4) debugging support"
+ bool "JBD2 (ext3dev/ext4) debugging support"
depends on JBD2
help
- If you are using the ext4 journaled file system (or potentially any
- other file system/device using JBD2), this option allows you to
- enable debugging output while the system is running, in order to
- help track down any problems you are having. By default the
- debugging output will be turned off.
+ If you are using the ext3dev/ext4 journaled file system (or
+ potentially any other filesystem/device using JBD2), this option
+ allows you to enable debugging output while the system is running,
+ in order to help track down any problems you are having.
+ By default, the debugging output will be turned off.

If you select Y here, then you will be able to turn on debugging
with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
- 1 and 5, the higher the number, the more debugging output is
+ 1 and 5. The higher the number, the more debugging output is
generated. To turn debugging off again, do
"echo 0 > /proc/sys/fs/jbd2-debug".

2006-08-14 16:26:38

by Mingming Cao

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Sat, 2006-08-12 at 11:20 -0700, Randy.Dunlap wrote:
> On Sat, 12 Aug 2006 10:43:04 -0700 Darrick J. Wong wrote:
>
> > Randy.Dunlap wrote:
> >
> > > Uh, yes. Well, I don't really care for the "ext3dev" name, but
> > > I tried to ignore that "feature" and fix it up anyway.
> > > Feel free to ignore any parts that you don't want.
> >
> > Three nits to pick:
> >
> > > + renamed ext4 fs later, once ext3dev is mature and
> > > stabled.
> >
> > I think you want "stabilized", not "stabled".
> >
> > (Until someone writes horsefs, that is. ;))
> >
> > > + Other than extent maps and 48-bit block number,
> > > ext3dev also is
> >
> > "...48-bit block numbers..."
> >
> > > + By default the debugging output will be turned off.
> >
> > "By default, the..."
>
> Thanks, all fixed, although I think that the comma on the last
> one is optional.

Thanks, Randy and Darrick.

> New patch is below, although what I would
> really prefer to see is this:
>
> - Drop the "ext3dev" name. Use "ext4dev" temporarily, then
> switch to "ext4".
>

I think ext4dev is a better name too. Would you like to make that
changes as well?

Thanks,

Mingming
> ---
> From: Randy Dunlap <[email protected]>
>
> Clean up help text and module names in ext4 & jbd2 Kconfig entries.
> Add "depends on EXPERIMENTAL".
>
> Signed-off-by: Randy Dunlap <[email protected]>
> ---
> fs/Kconfig | 59 ++++++++++++++++++++++++++++++-----------------------------
> 1 files changed, 30 insertions(+), 29 deletions(-)
>
> --- linux-2618-rc4-ext4.orig/fs/Kconfig
> +++ linux-2618-rc4-ext4/fs/Kconfig
> @@ -139,28 +139,29 @@ config EXT3_FS_SECURITY
> extended attributes for file security labels, say N.
>
> config EXT3DEV_FS
> - tristate "Developmenting extended fs support"
> + tristate "Ext3dev/ext4 extended fs support development (EXPERIMENTAL)"
> + depends on EXPERIMENTAL
> select JBD2
> help
> - Ext3dev is a precede filesystem toward next generation
> - of extended fs, based on ext3 filesystem code. It will be
> - renamed ext4 fs later once this ext3dev is mature and stabled.
> + Ext3dev is a predecessor filesystem of the next generation
> + extended fs ext4, based on ext3 filesystem code. It will be
> + renamed ext4 fs later, once ext3dev is mature and stabilized.
>
> Unlike the change from ext2 filesystem to ext3 filesystem,
> the on-disk format of ext3dev is not the same as ext3 any more:
> - it is based on extent maps and it support 48 bit physical block
> + it is based on extent maps and it supports 48-bit physical block
> numbers. These combined on-disk format changes will allow
> - ext3dev/ext4 to handle more than 16TB filesystem volume --
> - a hard limit that ext3 can not overcome without changing
> + ext3dev/ext4 to handle more than 16 TB filesystem volumes --
> + a hard limit that ext3 cannot overcome without changing the
> on-disk format.
>
> - Other than extent maps and 48 bit block number, ext3dev also is
> + Other than extent maps and 48-bit block numbers, ext3dev also is
> likely to have other new features such as persistent preallocation,
> - high resolution time stamps and larger file support etc. These
> + high resolution time stamps, and larger file support etc. These
> features will be added to ext3dev gradually.
>
> - To compile this file system support as a module, choose M here: the
> - module will be called ext2. Be aware however that the file system
> + To compile this file system support as a module, choose M here. The
> + module will be called ext3dev. Be aware, however, that the filesystem
> of your root partition (the one containing the directory /) cannot
> be compiled as a module, and so this could be dangerous.
>
> @@ -177,17 +178,17 @@ config EXT3DEV_FS_XATTR
>
> If unsure, say N.
>
> - You need this for POSIX ACL support on ext3.
> + You need this for POSIX ACL support on ext3dev/ext4.
>
> config EXT3DEV_FS_POSIX_ACL
> bool "Ext3dev POSIX Access Control Lists"
> depends on EXT3DEV_FS_XATTR
> select FS_POSIX_ACL
> help
> - Posix Access Control Lists (ACLs) support permissions for users and
> + POSIX Access Control Lists (ACLs) support permissions for users and
> groups beyond the owner/group/world scheme.
>
> - To learn more about Access Control Lists, visit the Posix ACLs for
> + To learn more about Access Control Lists, visit the POSIX ACLs for
> Linux website <http://acl.bestbits.at/>.
>
> If you don't know what Access Control Lists are, say N
> @@ -199,7 +200,7 @@ config EXT3DEV_FS_SECURITY
> Security labels support alternative access control models
> implemented by security modules like SELinux. This option
> enables an extended attribute handler for file security
> - labels in the ext3 filesystem.
> + labels in the ext3dev/ext4 filesystem.
>
> If you are not using a security module that requires using
> extended attributes for file security labels, say N.
> @@ -240,31 +241,31 @@ config JBD2
> tristate
> help
> This is a generic journaling layer for block devices that support
> - both 32 bit and 64 bit block numbers. It is currently used by
> - the ext3dev/ext4 file system, but it could also be used to add
> + both 32-bit and 64-bit block numbers. It is currently used by
> + the ext3dev/ext4 filesystem, but it could also be used to add
> journal support to other file systems or block devices such
> - as RAID or LVM.
> + as RAID or LVM.
>
> - If you are using the ext4, you need to say Y here. If you are not
> - using ext4 then you will probably want to say N.
> + If you are using ext3dev/ext4, you need to say Y here. If you are not
> + using ext3dev/ext4 then you will probably want to say N.
>
> - To compile this device as a module, choose M here: the module will be
> - called jbd. If you are compiling ext4 into the kernel,
> + To compile this device as a module, choose M here. The module will be
> + called jbd2. If you are compiling ext3dev/ext4 into the kernel,
> you cannot compile this code as a module.
>
> config JBD2_DEBUG
> - bool "JBD2 (ext4) debugging support"
> + bool "JBD2 (ext3dev/ext4) debugging support"
> depends on JBD2
> help
> - If you are using the ext4 journaled file system (or potentially any
> - other file system/device using JBD2), this option allows you to
> - enable debugging output while the system is running, in order to
> - help track down any problems you are having. By default the
> - debugging output will be turned off.
> + If you are using the ext3dev/ext4 journaled file system (or
> + potentially any other filesystem/device using JBD2), this option
> + allows you to enable debugging output while the system is running,
> + in order to help track down any problems you are having.
> + By default, the debugging output will be turned off.
>
> If you select Y here, then you will be able to turn on debugging
> with "echo N > /proc/sys/fs/jbd2-debug", where N is a number between
> - 1 and 5, the higher the number, the more debugging output is
> + 1 and 5. The higher the number, the more debugging output is
> generated. To turn debugging off again, do
> "echo 0 > /proc/sys/fs/jbd2-debug".
>

2006-08-14 17:19:32

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Mon, 14 Aug 2006 09:26:31 -0700 Mingming Cao wrote:

> On Sat, 2006-08-12 at 11:20 -0700, Randy.Dunlap wrote:
> >
> > New patch is below, although what I would
> > really prefer to see is this:
> >
> > - Drop the "ext3dev" name. Use "ext4dev" temporarily, then
> > switch to "ext4".
>
> I think ext4dev is a better name too. Would you like to make that
> changes as well?

---
From: Randy Dunlap <[email protected]>

Rename ext3dev to ext4dev.

Signed-off-by: Randy Dunlap <[email protected]>
---
fs/Kconfig | 58 +++++++++++++++++++++++-----------------------
fs/Makefile | 2 -
fs/ext4/Makefile | 10 +++----
fs/ext4/acl.h | 6 ++--
fs/ext4/file.c | 2 -
fs/ext4/inode.c | 2 -
fs/ext4/namei.c | 6 ++--
fs/ext4/super.c | 18 +++++++-------
fs/ext4/symlink.c | 4 +--
fs/ext4/xattr.c | 8 +++---
fs/ext4/xattr.h | 8 +++---
include/linux/ext4_fs_i.h | 4 +--
12 files changed, 64 insertions(+), 64 deletions(-)

--- linux-2618-rc4-ext4.orig/fs/Kconfig
+++ linux-2618-rc4-ext4/fs/Kconfig
@@ -138,38 +138,38 @@ config EXT3_FS_SECURITY
If you are not using a security module that requires using
extended attributes for file security labels, say N.

-config EXT3DEV_FS
- tristate "Ext3dev/ext4 extended fs support development (EXPERIMENTAL)"
+config EXT4DEV_FS
+ tristate "Ext4dev extended fs support development (EXPERIMENTAL)"
depends on EXPERIMENTAL
select JBD2
help
- Ext3dev is a predecessor filesystem of the next generation
+ Ext4dev is a predecessor filesystem of the next generation
extended fs ext4, based on ext3 filesystem code. It will be
- renamed ext4 fs later, once ext3dev is mature and stabilized.
+ renamed ext4 fs later, once ext4dev is mature and stabilized.

Unlike the change from ext2 filesystem to ext3 filesystem,
- the on-disk format of ext3dev is not the same as ext3 any more:
+ the on-disk format of ext4dev is not the same as ext3 any more:
it is based on extent maps and it supports 48-bit physical block
numbers. These combined on-disk format changes will allow
- ext3dev/ext4 to handle more than 16 TB filesystem volumes --
+ ext4dev to handle more than 16 TB filesystem volumes --
a hard limit that ext3 cannot overcome without changing the
on-disk format.

- Other than extent maps and 48-bit block numbers, ext3dev also is
+ Other than extent maps and 48-bit block numbers, ext4dev also is
likely to have other new features such as persistent preallocation,
high resolution time stamps, and larger file support etc. These
- features will be added to ext3dev gradually.
+ features will be added to ext4dev gradually.

To compile this file system support as a module, choose M here. The
- module will be called ext3dev. Be aware, however, that the filesystem
+ module will be called ext4dev. Be aware, however, that the filesystem
of your root partition (the one containing the directory /) cannot
be compiled as a module, and so this could be dangerous.

If unsure, say N.

-config EXT3DEV_FS_XATTR
- bool "Ext3dev extended attributes"
- depends on EXT3DEV_FS
+config EXT4DEV_FS_XATTR
+ bool "Ext4dev extended attributes"
+ depends on EXT4DEV_FS
default y
help
Extended attributes are name:value pairs associated with inodes by
@@ -178,11 +178,11 @@ config EXT3DEV_FS_XATTR

If unsure, say N.

- You need this for POSIX ACL support on ext3dev/ext4.
+ You need this for POSIX ACL support on ext4dev.

-config EXT3DEV_FS_POSIX_ACL
- bool "Ext3dev POSIX Access Control Lists"
- depends on EXT3DEV_FS_XATTR
+config EXT4DEV_FS_POSIX_ACL
+ bool "Ext4dev POSIX Access Control Lists"
+ depends on EXT4DEV_FS_XATTR
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -193,14 +193,14 @@ config EXT3DEV_FS_POSIX_ACL

If you don't know what Access Control Lists are, say N

-config EXT3DEV_FS_SECURITY
- bool "Ext3dev Security Labels"
- depends on EXT3DEV_FS_XATTR
+config EXT4DEV_FS_SECURITY
+ bool "Ext4dev Security Labels"
+ depends on EXT4DEV_FS_XATTR
help
Security labels support alternative access control models
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
- labels in the ext3dev/ext4 filesystem.
+ labels in the ext4dev filesystem.

If you are not using a security module that requires using
extended attributes for file security labels, say N.
@@ -242,22 +242,22 @@ config JBD2
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
- the ext3dev/ext4 filesystem, but it could also be used to add
+ the ext4dev filesystem, but it could also be used to add
journal support to other file systems or block devices such
as RAID or LVM.

- If you are using ext3dev/ext4, you need to say Y here. If you are not
- using ext3dev/ext4 then you will probably want to say N.
+ If you are using ext4dev, you need to say Y here. If you are not
+ using ext4dev then you will probably want to say N.

To compile this device as a module, choose M here. The module will be
- called jbd2. If you are compiling ext3dev/ext4 into the kernel,
+ called jbd2. If you are compiling ext4dev into the kernel,
you cannot compile this code as a module.

config JBD2_DEBUG
- bool "JBD2 (ext3dev/ext4) debugging support"
+ bool "JBD2 (ext4dev) debugging support"
depends on JBD2
help
- If you are using the ext3dev/ext4 journaled file system (or
+ If you are using the ext4dev journaled file system (or
potentially any other filesystem/device using JBD2), this option
allows you to enable debugging output while the system is running,
in order to help track down any problems you are having.
@@ -272,9 +272,9 @@ config JBD2_DEBUG
config FS_MBCACHE
# Meta block cache for Extended Attributes (ext2/ext3/ext4)
tristate
- depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT3DEV_FS_XATTR
- default y if EXT2_FS=y || EXT3_FS=y || EXT3DEV_FS=y
- default m if EXT2_FS=m || EXT3_FS=m || EXT3DEV_FS=m
+ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR
+ default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
+ default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m

config REISERFS_FS
tristate "Reiserfs support"
--- linux-2618-rc4-ext4.orig/fs/Makefile
+++ linux-2618-rc4-ext4/fs/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o
# Do not add any filesystems before this line
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
-obj-$(CONFIG_EXT3DEV_FS) += ext4/ # Before ext2 so root fs can be ext3dev
+obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext3dev
obj-$(CONFIG_JBD) += jbd/
obj-$(CONFIG_JBD2) += jbd2/
obj-$(CONFIG_EXT2_FS) += ext2/
--- linux-2618-rc4-ext4.orig/fs/ext4/Makefile
+++ linux-2618-rc4-ext4/fs/ext4/Makefile
@@ -2,11 +2,11 @@
# Makefile for the linux ext4-filesystem routines.
#

-obj-$(CONFIG_EXT3DEV_FS) += ext3dev.o
+obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o

-ext3dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o

-ext3dev-$(CONFIG_EXT3DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
-ext3dev-$(CONFIG_EXT3DEV_FS_POSIX_ACL) += acl.o
-ext3dev-$(CONFIG_EXT3DEV_FS_SECURITY) += xattr_security.o
+ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
+ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o
--- linux-2618-rc4-ext4.orig/fs/ext4/acl.h
+++ linux-2618-rc4-ext4/fs/ext4/acl.h
@@ -51,7 +51,7 @@ static inline int ext4_acl_count(size_t
}
}

-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL

/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
if the ACL has not been cached */
@@ -62,7 +62,7 @@ extern int ext4_permission (struct inode
extern int ext4_acl_chmod (struct inode *);
extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);

-#else /* CONFIG_EXT3DEV_FS_POSIX_ACL */
+#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */
#include <linux/sched.h>
#define ext4_permission NULL

@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct i
{
return 0;
}
-#endif /* CONFIG_EXT3DEV_FS_POSIX_ACL */
+#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */

--- linux-2618-rc4-ext4.orig/fs/ext4/xattr.h
+++ linux-2618-rc4-ext4/fs/ext4/xattr.h
@@ -56,7 +56,7 @@ struct ext4_xattr_entry {
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)

-# ifdef CONFIG_EXT3DEV_FS_XATTR
+# ifdef CONFIG_EXT4DEV_FS_XATTR

extern struct xattr_handler ext4_xattr_user_handler;
extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -79,7 +79,7 @@ extern void exit_ext4_xattr(void);

extern struct xattr_handler *ext4_xattr_handlers[];

-# else /* CONFIG_EXT3DEV_FS_XATTR */
+# else /* CONFIG_EXT4DEV_FS_XATTR */

static inline int
ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -131,9 +131,9 @@ exit_ext4_xattr(void)

#define ext4_xattr_handlers NULL

-# endif /* CONFIG_EXT3DEV_FS_XATTR */
+# endif /* CONFIG_EXT4DEV_FS_XATTR */

-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
struct inode *dir);
#else
--- linux-2618-rc4-ext4.orig/fs/ext4/file.c
+++ linux-2618-rc4-ext4/fs/ext4/file.c
@@ -126,7 +126,7 @@ const struct file_operations ext4_file_o
struct inode_operations ext4_file_inode_operations = {
.truncate = ext4_truncate,
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/inode.c
+++ linux-2618-rc4-ext4/fs/ext4/inode.c
@@ -2589,7 +2589,7 @@ void ext4_read_inode(struct inode * inod
struct buffer_head *bh;
int block;

-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
--- linux-2618-rc4-ext4.orig/fs/ext4/namei.c
+++ linux-2618-rc4-ext4/fs/ext4/namei.c
@@ -1689,7 +1689,7 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
inode->i_op = &ext4_special_inode_operations;
#endif
err = ext4_add_nondir(handle, dentry, inode);
@@ -2364,7 +2364,7 @@ struct inode_operations ext4_dir_inode_o
.mknod = ext4_mknod,
.rename = ext4_rename,
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -2375,7 +2375,7 @@ struct inode_operations ext4_dir_inode_o

struct inode_operations ext4_special_inode_operations = {
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/super.c
+++ linux-2618-rc4-ext4/fs/ext4/super.c
@@ -448,7 +448,7 @@ static struct inode *ext4_alloc_inode(st
ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
if (!ei)
return NULL;
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
@@ -470,7 +470,7 @@ static void init_once(void * foo, kmem_c
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
mutex_init(&ei->truncate_mutex);
@@ -499,7 +499,7 @@ static void destroy_inodecache(void)
static void ext4_clear_inode(struct inode *inode)
{
struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
if (EXT4_I(inode)->i_acl &&
EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
posix_acl_release(EXT4_I(inode)->i_acl);
@@ -793,7 +793,7 @@ static int parse_options (char *options,
case Opt_orlov:
clear_opt (sbi->s_mount_opt, OLDALLOC);
break;
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
case Opt_user_xattr:
set_opt (sbi->s_mount_opt, XATTR_USER);
break;
@@ -806,7 +806,7 @@ static int parse_options (char *options,
printk("EXT4 (no)user_xattr options not supported\n");
break;
#endif
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
case Opt_acl:
set_opt(sbi->s_mount_opt, POSIX_ACL);
break;
@@ -2683,9 +2683,9 @@ static int ext4_get_sb(struct file_syste
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
}

-static struct file_system_type ext3dev_fs_type = {
+static struct file_system_type ext4dev_fs_type = {
.owner = THIS_MODULE,
- .name = "ext3dev",
+ .name = "ext4dev",
.get_sb = ext4_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
@@ -2699,7 +2699,7 @@ static int __init init_ext4_fs(void)
err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&ext3dev_fs_type);
+ err = register_filesystem(&ext4dev_fs_type);
if (err)
goto out;
return 0;
@@ -2712,7 +2712,7 @@ out1:

static void __exit exit_ext4_fs(void)
{
- unregister_filesystem(&ext3dev_fs_type);
+ unregister_filesystem(&ext4dev_fs_type);
destroy_inodecache();
exit_ext4_xattr();
}
--- linux-2618-rc4-ext4.orig/fs/ext4/symlink.c
+++ linux-2618-rc4-ext4/fs/ext4/symlink.c
@@ -34,7 +34,7 @@ struct inode_operations ext4_symlink_ino
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -45,7 +45,7 @@ struct inode_operations ext4_symlink_ino
struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/xattr.c
+++ linux-2618-rc4-ext4/fs/ext4/xattr.c
@@ -104,12 +104,12 @@ static struct mb_cache *ext4_xattr_cache

static struct xattr_handler *ext4_xattr_handler_map[] = {
[EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
#endif
[EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
[EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
#endif
};
@@ -117,11 +117,11 @@ static struct xattr_handler *ext4_xattr_
struct xattr_handler *ext4_xattr_handlers[] = {
&ext4_xattr_user_handler,
&ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
&ext4_xattr_acl_access_handler,
&ext4_xattr_acl_default_handler,
#endif
-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
&ext4_xattr_security_handler,
#endif
NULL
--- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_i.h
+++ linux-2618-rc4-ext4/include/linux/ext4_fs_i.h
@@ -103,7 +103,7 @@ struct ext4_inode_info {
struct ext4_block_alloc_info *i_block_alloc_info;

__u32 i_dir_start_lookup;
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
@@ -113,7 +113,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore xattr_sem;
#endif
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif

2006-08-14 17:52:50

by Jeff Garzik

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

Randy.Dunlap wrote:
> @@ -2683,9 +2683,9 @@ static int ext4_get_sb(struct file_syste
> return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
> }
>
> -static struct file_system_type ext3dev_fs_type = {
> +static struct file_system_type ext4dev_fs_type = {
> .owner = THIS_MODULE,
> - .name = "ext3dev",
> + .name = "ext4dev",
> .get_sb = ext4_get_sb,
> .kill_sb = kill_block_super,
> .fs_flags = FS_REQUIRES_DEV,
> @@ -2699,7 +2699,7 @@ static int __init init_ext4_fs(void)
> err = init_inodecache();
> if (err)
> goto out1;
> - err = register_filesystem(&ext3dev_fs_type);
> + err = register_filesystem(&ext4dev_fs_type);
> if (err)
> goto out;
> return 0;
> @@ -2712,7 +2712,7 @@ out1:
>
> static void __exit exit_ext4_fs(void)
> {
> - unregister_filesystem(&ext3dev_fs_type);
> + unregister_filesystem(&ext4dev_fs_type);
> destroy_inodecache();
> exit_ext4_xattr();


IMO these non-CONFIG bits should just be ext4_

Jeff


2006-08-14 18:02:41

by Randy Dunlap

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Mon, 14 Aug 2006 13:52:42 -0400 Jeff Garzik wrote:

> IMO these non-CONFIG bits should just be ext4_

Agreed. Replacement patch below.

---
From: Randy Dunlap <[email protected]>

Rename ext3dev to ext4dev.

Signed-off-by: Randy Dunlap <[email protected]>
---
fs/Kconfig | 58 +++++++++++++++++++++++-----------------------
fs/Makefile | 2 -
fs/ext4/Makefile | 10 +++----
fs/ext4/acl.h | 6 ++--
fs/ext4/file.c | 2 -
fs/ext4/inode.c | 2 -
fs/ext4/namei.c | 6 ++--
fs/ext4/super.c | 18 +++++++-------
fs/ext4/symlink.c | 4 +--
fs/ext4/xattr.c | 8 +++---
fs/ext4/xattr.h | 8 +++---
include/linux/ext4_fs_i.h | 4 +--
12 files changed, 64 insertions(+), 64 deletions(-)

--- linux-2618-rc4-ext4.orig/fs/Kconfig
+++ linux-2618-rc4-ext4/fs/Kconfig
@@ -138,38 +138,38 @@ config EXT3_FS_SECURITY
If you are not using a security module that requires using
extended attributes for file security labels, say N.

-config EXT3DEV_FS
- tristate "Ext3dev/ext4 extended fs support development (EXPERIMENTAL)"
+config EXT4DEV_FS
+ tristate "Ext4dev extended fs support development (EXPERIMENTAL)"
depends on EXPERIMENTAL
select JBD2
help
- Ext3dev is a predecessor filesystem of the next generation
+ Ext4dev is a predecessor filesystem of the next generation
extended fs ext4, based on ext3 filesystem code. It will be
- renamed ext4 fs later, once ext3dev is mature and stabilized.
+ renamed ext4 fs later, once ext4dev is mature and stabilized.

Unlike the change from ext2 filesystem to ext3 filesystem,
- the on-disk format of ext3dev is not the same as ext3 any more:
+ the on-disk format of ext4dev is not the same as ext3 any more:
it is based on extent maps and it supports 48-bit physical block
numbers. These combined on-disk format changes will allow
- ext3dev/ext4 to handle more than 16 TB filesystem volumes --
+ ext4dev to handle more than 16 TB filesystem volumes --
a hard limit that ext3 cannot overcome without changing the
on-disk format.

- Other than extent maps and 48-bit block numbers, ext3dev also is
+ Other than extent maps and 48-bit block numbers, ext4dev also is
likely to have other new features such as persistent preallocation,
high resolution time stamps, and larger file support etc. These
- features will be added to ext3dev gradually.
+ features will be added to ext4dev gradually.

To compile this file system support as a module, choose M here. The
- module will be called ext3dev. Be aware, however, that the filesystem
+ module will be called ext4dev. Be aware, however, that the filesystem
of your root partition (the one containing the directory /) cannot
be compiled as a module, and so this could be dangerous.

If unsure, say N.

-config EXT3DEV_FS_XATTR
- bool "Ext3dev extended attributes"
- depends on EXT3DEV_FS
+config EXT4DEV_FS_XATTR
+ bool "Ext4dev extended attributes"
+ depends on EXT4DEV_FS
default y
help
Extended attributes are name:value pairs associated with inodes by
@@ -178,11 +178,11 @@ config EXT3DEV_FS_XATTR

If unsure, say N.

- You need this for POSIX ACL support on ext3dev/ext4.
+ You need this for POSIX ACL support on ext4dev.

-config EXT3DEV_FS_POSIX_ACL
- bool "Ext3dev POSIX Access Control Lists"
- depends on EXT3DEV_FS_XATTR
+config EXT4DEV_FS_POSIX_ACL
+ bool "Ext4dev POSIX Access Control Lists"
+ depends on EXT4DEV_FS_XATTR
select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
@@ -193,14 +193,14 @@ config EXT3DEV_FS_POSIX_ACL

If you don't know what Access Control Lists are, say N

-config EXT3DEV_FS_SECURITY
- bool "Ext3dev Security Labels"
- depends on EXT3DEV_FS_XATTR
+config EXT4DEV_FS_SECURITY
+ bool "Ext4dev Security Labels"
+ depends on EXT4DEV_FS_XATTR
help
Security labels support alternative access control models
implemented by security modules like SELinux. This option
enables an extended attribute handler for file security
- labels in the ext3dev/ext4 filesystem.
+ labels in the ext4dev filesystem.

If you are not using a security module that requires using
extended attributes for file security labels, say N.
@@ -242,22 +242,22 @@ config JBD2
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
- the ext3dev/ext4 filesystem, but it could also be used to add
+ the ext4dev filesystem, but it could also be used to add
journal support to other file systems or block devices such
as RAID or LVM.

- If you are using ext3dev/ext4, you need to say Y here. If you are not
- using ext3dev/ext4 then you will probably want to say N.
+ If you are using ext4dev, you need to say Y here. If you are not
+ using ext4dev then you will probably want to say N.

To compile this device as a module, choose M here. The module will be
- called jbd2. If you are compiling ext3dev/ext4 into the kernel,
+ called jbd2. If you are compiling ext4dev into the kernel,
you cannot compile this code as a module.

config JBD2_DEBUG
- bool "JBD2 (ext3dev/ext4) debugging support"
+ bool "JBD2 (ext4dev) debugging support"
depends on JBD2
help
- If you are using the ext3dev/ext4 journaled file system (or
+ If you are using the ext4dev journaled file system (or
potentially any other filesystem/device using JBD2), this option
allows you to enable debugging output while the system is running,
in order to help track down any problems you are having.
@@ -272,9 +272,9 @@ config JBD2_DEBUG
config FS_MBCACHE
# Meta block cache for Extended Attributes (ext2/ext3/ext4)
tristate
- depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT3DEV_FS_XATTR
- default y if EXT2_FS=y || EXT3_FS=y || EXT3DEV_FS=y
- default m if EXT2_FS=m || EXT3_FS=m || EXT3DEV_FS=m
+ depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR
+ default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
+ default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m

config REISERFS_FS
tristate "Reiserfs support"
--- linux-2618-rc4-ext4.orig/fs/Makefile
+++ linux-2618-rc4-ext4/fs/Makefile
@@ -54,7 +54,7 @@ obj-$(CONFIG_PROFILING) += dcookies.o
# Do not add any filesystems before this line
obj-$(CONFIG_REISERFS_FS) += reiserfs/
obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
-obj-$(CONFIG_EXT3DEV_FS) += ext4/ # Before ext2 so root fs can be ext3dev
+obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext3dev
obj-$(CONFIG_JBD) += jbd/
obj-$(CONFIG_JBD2) += jbd2/
obj-$(CONFIG_EXT2_FS) += ext2/
--- linux-2618-rc4-ext4.orig/fs/ext4/Makefile
+++ linux-2618-rc4-ext4/fs/ext4/Makefile
@@ -2,11 +2,11 @@
# Makefile for the linux ext4-filesystem routines.
#

-obj-$(CONFIG_EXT3DEV_FS) += ext3dev.o
+obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o

-ext3dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o

-ext3dev-$(CONFIG_EXT3DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
-ext3dev-$(CONFIG_EXT3DEV_FS_POSIX_ACL) += acl.o
-ext3dev-$(CONFIG_EXT3DEV_FS_SECURITY) += xattr_security.o
+ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o
+ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o
--- linux-2618-rc4-ext4.orig/fs/ext4/acl.h
+++ linux-2618-rc4-ext4/fs/ext4/acl.h
@@ -51,7 +51,7 @@ static inline int ext4_acl_count(size_t
}
}

-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL

/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
if the ACL has not been cached */
@@ -62,7 +62,7 @@ extern int ext4_permission (struct inode
extern int ext4_acl_chmod (struct inode *);
extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);

-#else /* CONFIG_EXT3DEV_FS_POSIX_ACL */
+#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */
#include <linux/sched.h>
#define ext4_permission NULL

@@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct i
{
return 0;
}
-#endif /* CONFIG_EXT3DEV_FS_POSIX_ACL */
+#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */

--- linux-2618-rc4-ext4.orig/fs/ext4/xattr.h
+++ linux-2618-rc4-ext4/fs/ext4/xattr.h
@@ -56,7 +56,7 @@ struct ext4_xattr_entry {
#define EXT4_XATTR_SIZE(size) \
(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)

-# ifdef CONFIG_EXT3DEV_FS_XATTR
+# ifdef CONFIG_EXT4DEV_FS_XATTR

extern struct xattr_handler ext4_xattr_user_handler;
extern struct xattr_handler ext4_xattr_trusted_handler;
@@ -79,7 +79,7 @@ extern void exit_ext4_xattr(void);

extern struct xattr_handler *ext4_xattr_handlers[];

-# else /* CONFIG_EXT3DEV_FS_XATTR */
+# else /* CONFIG_EXT4DEV_FS_XATTR */

static inline int
ext4_xattr_get(struct inode *inode, int name_index, const char *name,
@@ -131,9 +131,9 @@ exit_ext4_xattr(void)

#define ext4_xattr_handlers NULL

-# endif /* CONFIG_EXT3DEV_FS_XATTR */
+# endif /* CONFIG_EXT4DEV_FS_XATTR */

-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
extern int ext4_init_security(handle_t *handle, struct inode *inode,
struct inode *dir);
#else
--- linux-2618-rc4-ext4.orig/fs/ext4/file.c
+++ linux-2618-rc4-ext4/fs/ext4/file.c
@@ -126,7 +126,7 @@ const struct file_operations ext4_file_o
struct inode_operations ext4_file_inode_operations = {
.truncate = ext4_truncate,
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/inode.c
+++ linux-2618-rc4-ext4/fs/ext4/inode.c
@@ -2589,7 +2589,7 @@ void ext4_read_inode(struct inode * inod
struct buffer_head *bh;
int block;

-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
--- linux-2618-rc4-ext4.orig/fs/ext4/namei.c
+++ linux-2618-rc4-ext4/fs/ext4/namei.c
@@ -1689,7 +1689,7 @@ retry:
err = PTR_ERR(inode);
if (!IS_ERR(inode)) {
init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
inode->i_op = &ext4_special_inode_operations;
#endif
err = ext4_add_nondir(handle, dentry, inode);
@@ -2364,7 +2364,7 @@ struct inode_operations ext4_dir_inode_o
.mknod = ext4_mknod,
.rename = ext4_rename,
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -2375,7 +2375,7 @@ struct inode_operations ext4_dir_inode_o

struct inode_operations ext4_special_inode_operations = {
.setattr = ext4_setattr,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/super.c
+++ linux-2618-rc4-ext4/fs/ext4/super.c
@@ -448,7 +448,7 @@ static struct inode *ext4_alloc_inode(st
ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
if (!ei)
return NULL;
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
ei->i_acl = EXT4_ACL_NOT_CACHED;
ei->i_default_acl = EXT4_ACL_NOT_CACHED;
#endif
@@ -470,7 +470,7 @@ static void init_once(void * foo, kmem_c
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
init_rwsem(&ei->xattr_sem);
#endif
mutex_init(&ei->truncate_mutex);
@@ -499,7 +499,7 @@ static void destroy_inodecache(void)
static void ext4_clear_inode(struct inode *inode)
{
struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
if (EXT4_I(inode)->i_acl &&
EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
posix_acl_release(EXT4_I(inode)->i_acl);
@@ -793,7 +793,7 @@ static int parse_options (char *options,
case Opt_orlov:
clear_opt (sbi->s_mount_opt, OLDALLOC);
break;
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
case Opt_user_xattr:
set_opt (sbi->s_mount_opt, XATTR_USER);
break;
@@ -806,7 +806,7 @@ static int parse_options (char *options,
printk("EXT4 (no)user_xattr options not supported\n");
break;
#endif
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
case Opt_acl:
set_opt(sbi->s_mount_opt, POSIX_ACL);
break;
@@ -2683,9 +2683,9 @@ static int ext4_get_sb(struct file_syste
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
}

-static struct file_system_type ext3dev_fs_type = {
+static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
- .name = "ext3dev",
+ .name = "ext4dev",
.get_sb = ext4_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
@@ -2699,7 +2699,7 @@ static int __init init_ext4_fs(void)
err = init_inodecache();
if (err)
goto out1;
- err = register_filesystem(&ext3dev_fs_type);
+ err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
return 0;
@@ -2712,7 +2712,7 @@ out1:

static void __exit exit_ext4_fs(void)
{
- unregister_filesystem(&ext3dev_fs_type);
+ unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
exit_ext4_xattr();
}
--- linux-2618-rc4-ext4.orig/fs/ext4/symlink.c
+++ linux-2618-rc4-ext4/fs/ext4/symlink.c
@@ -34,7 +34,7 @@ struct inode_operations ext4_symlink_ino
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
@@ -45,7 +45,7 @@ struct inode_operations ext4_symlink_ino
struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = ext4_listxattr,
--- linux-2618-rc4-ext4.orig/fs/ext4/xattr.c
+++ linux-2618-rc4-ext4/fs/ext4/xattr.c
@@ -104,12 +104,12 @@ static struct mb_cache *ext4_xattr_cache

static struct xattr_handler *ext4_xattr_handler_map[] = {
[EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler,
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler,
[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
#endif
[EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
[EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler,
#endif
};
@@ -117,11 +117,11 @@ static struct xattr_handler *ext4_xattr_
struct xattr_handler *ext4_xattr_handlers[] = {
&ext4_xattr_user_handler,
&ext4_xattr_trusted_handler,
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
&ext4_xattr_acl_access_handler,
&ext4_xattr_acl_default_handler,
#endif
-#ifdef CONFIG_EXT3DEV_FS_SECURITY
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
&ext4_xattr_security_handler,
#endif
NULL
--- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_i.h
+++ linux-2618-rc4-ext4/include/linux/ext4_fs_i.h
@@ -103,7 +103,7 @@ struct ext4_inode_info {
struct ext4_block_alloc_info *i_block_alloc_info;

__u32 i_dir_start_lookup;
-#ifdef CONFIG_EXT3DEV_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
/*
* Extended attributes can be read independently of the main file
* data. Taking i_mutex even when reading would cause contention
@@ -113,7 +113,7 @@ struct ext4_inode_info {
*/
struct rw_semaphore xattr_sem;
#endif
-#ifdef CONFIG_EXT3DEV_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif

2006-08-14 18:14:00

by Jeff Garzik

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

Randy.Dunlap wrote:
> On Mon, 14 Aug 2006 13:52:42 -0400 Jeff Garzik wrote:
>
>> IMO these non-CONFIG bits should just be ext4_
>
> Agreed. Replacement patch below.
>
> ---
> From: Randy Dunlap <[email protected]>
>
> Rename ext3dev to ext4dev.
>
> Signed-off-by: Randy Dunlap <[email protected]>

ACK


2006-08-15 18:59:11

by Pavel Machek

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

Hi!

> > AM> - The existing comments could benefit from some rework by a
> > AM> native English speaker.
> >
> > could someone assist here, please?
>
> See if this helps.
> Patch applies on top of all ext4 patches from
> http://ext2.sourceforge.net/48bitext3/patches/latest/.

> --- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_extents.h
> +++ linux-2618-rc4-ext4/include/linux/ext4_fs_extents.h
> @@ -22,29 +22,29 @@
> #include <linux/ext4_fs.h>
>
> /*
> - * with AGRESSIVE_TEST defined capacity of index/leaf blocks
> - * become very little, so index split, in-depth growing and
> - * other hard changes happens much more often
> - * this is for debug purposes only
> + * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks
> + * becomes very small, so index split, in-depth growing and
> + * other hard changes happen much more often.
> + * This is for debug purposes only.
> */
> #define AGRESSIVE_TEST_

Using _ for disabling is unusual/nasty. Can't we simply #undef it?
--
Thanks for all the (sleeping) penguins.

2006-08-18 13:08:12

by Andreas Dilger

[permalink] [raw]
Subject: Re: [Ext2-devel] [PATCH 1/9] extents for ext4

On Aug 15, 2006 15:40 +0000, Pavel Machek wrote:
> > --- linux-2618-rc4-ext4.orig/include/linux/ext4_fs_extents.h
> > +++ linux-2618-rc4-ext4/include/linux/ext4_fs_extents.h
> > @@ -22,29 +22,29 @@
> > #include <linux/ext4_fs.h>
> >
> > /*
> > - * with AGRESSIVE_TEST defined capacity of index/leaf blocks
> > - * become very little, so index split, in-depth growing and
> > - * other hard changes happens much more often
> > - * this is for debug purposes only
> > + * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks
> > + * becomes very small, so index split, in-depth growing and
> > + * other hard changes happen much more often.
> > + * This is for debug purposes only.
> > */
> > #define AGRESSIVE_TEST_
>
> Using _ for disabling is unusual/nasty.

I've always thought the same. I'd prefer just commenting out the whole
line.

> Can't we simply #undef it?

Use of #undef is not so great, since that means it isn't possible to
#define this flag in another header, on the make command-line, etc.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.