2007-08-01 15:34:23

by Aneesh Kumar K.V

[permalink] [raw]
Subject: e2fsprogs patches

I have updated the patches to take care of comments posted by Andreas.
I also reworked the undoe2fs to use io_channel rather than open coding
open/write.

Regarding the naming of tdb file i decided to go with
mke2fs-<device_name> instead of mke2fs-<device_name>-<time-stamp>.
I guess having multiple version of this file in the tdb_dir will confuse
the user.

-aneesh


2007-08-01 15:35:40

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 2/4] e2fsprogs: Add undoe2fs

From: Aneesh Kumar K.V <[email protected]>

undoe2fs can be used to replay the transaction saved
in the transaction file using undo I/O Manager

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
misc/Makefile.in | 10 ++-
misc/undoe2fs.c | 217 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 225 insertions(+), 2 deletions(-)
create mode 100644 misc/undoe2fs.c

diff --git a/misc/Makefile.in b/misc/Makefile.in
index ccad78c..51bb17a 100644
--- a/misc/Makefile.in
+++ b/misc/Makefile.in
@@ -15,7 +15,7 @@ INSTALL = @INSTALL@
@IMAGER_CMT@E2IMAGE_MAN= e2image.8

SPROGS= mke2fs badblocks tune2fs dumpe2fs blkid logsave \
- $(E2IMAGE_PROG) @FSCK_PROG@
+ $(E2IMAGE_PROG) @FSCK_PROG@ undoe2fs
USPROGS= mklost+found filefrag
SMANPAGES= tune2fs.8 mklost+found.8 mke2fs.8 dumpe2fs.8 badblocks.8 \
e2label.8 findfs.8 blkid.8 $(E2IMAGE_MAN) \
@@ -39,6 +39,7 @@ E2IMAGE_OBJS= e2image.o
FSCK_OBJS= fsck.o base_device.o
BLKID_OBJS= blkid.o
FILEFRAG_OBJS= filefrag.o
+UNDOE2FS_OBJS= undoe2fs.o

XTRA_CFLAGS= -I$(srcdir)/../e2fsck -I.

@@ -47,7 +48,7 @@ SRCS= $(srcdir)/tune2fs.c $(srcdir)/mklost+found.c $(srcdir)/mke2fs.c \
$(srcdir)/badblocks.c $(srcdir)/fsck.c $(srcdir)/util.c \
$(srcdir)/uuidgen.c $(srcdir)/blkid.c $(srcdir)/logsave.c \
$(srcdir)/filefrag.c $(srcdir)/base_device.c \
- $(srcdir)/../e2fsck/profile.c
+ $(srcdir)/../e2fsck/profile.c $(srcdir)/undoe2fs.c

LIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
DEPLIBS= $(LIBEXT2FS) $(LIBCOM_ERR)
@@ -108,6 +109,10 @@ e2image: $(E2IMAGE_OBJS) $(DEPLIBS)
@echo " LD $@"
@$(CC) $(ALL_LDFLAGS) -o e2image $(E2IMAGE_OBJS) $(LIBS) $(LIBINTL)

+undoe2fs: $(UNDOE2FS_OBJS) $(DEPLIBS)
+ @echo " LD $@"
+ @$(CC) $(ALL_LDFLAGS) -o undoe2fs $(UNDOE2FS_OBJS) $(LIBS)
+
base_device: base_device.c
@echo " LD $@"
@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(srcdir)/base_device.c \
@@ -434,3 +439,4 @@ filefrag.o: $(srcdir)/filefrag.c
base_device.o: $(srcdir)/base_device.c $(srcdir)/fsck.h
profile.o: $(srcdir)/../e2fsck/profile.c $(top_srcdir)/lib/et/com_err.h \
$(srcdir)/../e2fsck/profile.h prof_err.h
+undoe2fs.o: $(srcdir)/undoe2fs.c $(top_srcdir)/lib/ext2fs/tdb.h
diff --git a/misc/undoe2fs.c b/misc/undoe2fs.c
new file mode 100644
index 0000000..db76346
--- /dev/null
+++ b/misc/undoe2fs.c
@@ -0,0 +1,217 @@
+/*
+ * Copyright IBM Corporation, 2007
+ * Author Aneesh Kumar K.V <[email protected]>
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+#include <fcntl.h>
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#include "ext2fs/tdb.h"
+#include "ext2fs/ext2fs.h"
+#include "nls-enable.h"
+
+
+
+static void usage(char *prg_name)
+{
+ fprintf(stderr,
+ _("Usage: %s <transaction file> <filesystem>\n"), prg_name);
+ exit(1);
+
+}
+static int check_filesystem(TDB_CONTEXT *tdb, io_channel channel)
+{
+ __u32 s_mtime;
+ __u8 s_uuid[16];
+ errcode_t retval;
+ TDB_DATA tdb_key, tdb_data;
+ struct ext2_super_block super;
+
+ io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
+ retval = io_channel_read_blk(channel, 1, -SUPERBLOCK_SIZE, &super);
+ if (retval) {
+ com_err(__FUNCTION__,
+ retval, _("Failed to read the file system data \n"));
+ return retval;
+ }
+
+ tdb_key.dptr = "filesystem MTIME";
+ tdb_key.dsize = sizeof("filesystem MTIME");
+ tdb_data = tdb_fetch(tdb, tdb_key);
+ if (!tdb_data.dptr) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ com_err(__FUNCTION__, retval,
+ _("Failed tdb_fetch %s\n"), tdb_errorstr(tdb));
+ return retval;
+ }
+
+ s_mtime = *(__u32 *)tdb_data.dptr;
+ if (super.s_mtime != s_mtime) {
+
+ com_err(__FUNCTION__, 0,
+ _("The file system Mount time didn't match %u\n"),
+ s_mtime);
+
+ return -1;
+ }
+
+
+ tdb_key.dptr = "filesystem UUID";
+ tdb_key.dsize = sizeof("filesystem UUID");
+ tdb_data = tdb_fetch(tdb, tdb_key);
+ if (!tdb_data.dptr) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ com_err(__FUNCTION__, retval,
+ _("Failed tdb_fetch %s\n"), tdb_errorstr(tdb));
+ return retval;
+ }
+ memcpy(s_uuid, tdb_data.dptr, sizeof(s_uuid));
+ if (memcmp(s_uuid, super.s_uuid, sizeof(s_uuid))) {
+ com_err(__FUNCTION__, 0,
+ _("The file system UUID didn't match \n"));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_blk_size(TDB_CONTEXT *tdb, io_channel channel)
+{
+ int block_size;
+ errcode_t retval;
+ TDB_DATA tdb_key, tdb_data;
+
+ tdb_key.dptr = "filesystem BLKSIZE";
+ tdb_key.dsize = sizeof("filesystem BLKSIZE");
+ tdb_data = tdb_fetch(tdb, tdb_key);
+ if (!tdb_data.dptr) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ com_err(__FUNCTION__, retval,
+ _("Failed tdb_fetch %s\n"), tdb_errorstr(tdb));
+ return retval;
+ }
+
+ block_size = *(int *)tdb_data.dptr;
+ io_channel_set_blksize(channel, block_size);
+
+ return 0;
+}
+
+main(int argc, char *argv[])
+{
+ int c,force = 0;
+ TDB_CONTEXT *tdb;
+ TDB_DATA key, data;
+ io_channel channel;
+ errcode_t retval;
+ int mount_flags;
+ unsigned long blk_num;
+ char *device_name, *tdb_file, *prg_name;
+ unsigned long long int location;
+ io_manager manager = unix_io_manager;
+
+ static struct option long_opt[] = {
+ {"force", 0, 0, 'f'},
+ {0, 0, 0, 0}
+ };
+
+
+
+ prg_name = argv[0];
+ while((c = getopt_long(argc, argv, "f",
+ long_opt, NULL)) != EOF) {
+ switch (c) {
+
+ case 'f':
+ force = 1;
+ break;
+ default:
+ usage(prg_name);
+ }
+ }
+
+ if (argc != optind+2)
+ usage(prg_name);
+
+ tdb_file = argv[optind];
+ device_name = argv[optind+1];
+
+ tdb = tdb_open(tdb_file, 0, 0, O_RDONLY, 0600);
+
+ if (!tdb) {
+ com_err(prg_name, retval,
+ _("Failed tdb_open %s\n"), strerror(errno));
+ exit(1);
+ }
+
+ retval = ext2fs_check_if_mounted(device_name, &mount_flags);
+ if (retval) {
+ com_err(prg_name, retval, _("Error while determining whether "
+ "%s is mounted.\n"), device_name);
+ exit(1);
+ }
+
+ if (mount_flags & EXT2_MF_MOUNTED) {
+ com_err(prg_name, retval, _("undoe2fs should only be run on "
+ "unmounted file system\n"));
+ exit(1);
+ }
+
+ retval = manager->open(device_name,
+ IO_FLAG_EXCLUSIVE | IO_FLAG_RW, &channel);
+ if (retval) {
+ com_err(prg_name, retval,
+ _("Failed to open %s\n"), device_name);
+ exit(1);
+ }
+
+ if (!force && check_filesystem(tdb, channel)) {
+ exit(1);
+ }
+
+ if (set_blk_size(tdb, channel)) {
+ exit(1);
+ }
+
+ for (key = tdb_firstkey(tdb); key.dptr; key = tdb_nextkey(tdb, key)) {
+
+ if (!strcmp(key.dptr, "filesystem MTIME") ||
+ !strcmp(key.dptr, "filesystem UUID") ||
+ !strcmp(key.dptr, "filesystem BLKSIZE")) {
+ continue;
+ }
+
+
+ data = tdb_fetch(tdb, key);
+ if (!data.dptr) {
+ com_err(prg_name, 0,
+ _("Failed tdb_fetch %s\n"), tdb_errorstr(tdb));
+ exit(1);
+ }
+ blk_num = *(unsigned long *)key.dptr;
+ printf(_("Replayed transaction of size %d at location %ld\n"),
+ data.dsize, blk_num);
+ retval = io_channel_write_blk(channel, blk_num,
+ -data.dsize, data.dptr);
+ if (retval == -1) {
+ com_err(prg_name, retval,
+ _("Failed write %s\n"),
+ strerror(errno));
+ exit(1);
+ }
+ }
+ io_channel_close(channel);
+ tdb_close(tdb);
+
+}
--
1.5.3.rc2.22.g69a9b-dirty

2007-08-01 15:36:20

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 1/4] e2fsprogs: Add undo I/O manager

From: Aneesh Kumar K.V <[email protected]>

This I/O manager saves the contents of the location being overwritten
to a tdb database. This helps in undoing the changes done to the
file system.

The call sequence involve

set_undo_io_backing_manager(unix_io_manager);
set_undo_io_backup_file("/tmp/test.tdb");
retval = ext2fs_open2(dev_name, 0, flags,
superblock, block_size, undo_io_manager,
&current_fs);

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
lib/ext2fs/Makefile.in | 7 +-
lib/ext2fs/ext2_io.h | 5 +
lib/ext2fs/undo_io.c | 565 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 575 insertions(+), 2 deletions(-)
create mode 100644 lib/ext2fs/undo_io.c

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 70e18e7..7afd5eb 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -66,7 +66,8 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
unix_io.o \
unlink.o \
valid_blk.o \
- version.o
+ version.o \
+ undo_io.o

SRCS= ext2_err.c \
$(srcdir)/alloc.c \
@@ -132,7 +133,8 @@ SRCS= ext2_err.c \
$(srcdir)/tst_bitops.c \
$(srcdir)/tst_byteswap.c \
$(srcdir)/tst_getsize.c \
- $(srcdir)/tst_iscan.c
+ $(srcdir)/tst_iscan.c \
+ $(srcdir)/undo_io.c

HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h \
tdb.h
@@ -573,3 +575,4 @@ tst_iscan.o: $(srcdir)/tst_iscan.c $(srcdir)/ext2_fs.h \
$(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
$(srcdir)/ext2_fs.h $(srcdir)/ext3_extents.h $(top_srcdir)/lib/et/com_err.h \
$(srcdir)/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+undo_io.o: $(srcdir)/undo_io.c $(srcdir)/ext2_fs.h $(srcdir)/ext2fs.h
diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h
index eada278..476eb4d 100644
--- a/lib/ext2fs/ext2_io.h
+++ b/lib/ext2fs/ext2_io.h
@@ -96,6 +96,11 @@ extern errcode_t io_channel_write_byte(io_channel channel,
/* unix_io.c */
extern io_manager unix_io_manager;

+/* undo_io.c */
+extern io_manager undo_io_manager;
+extern errcode_t set_undo_io_backing_manager(io_manager manager);
+extern errcode_t set_undo_io_backup_file(char *file_name);
+
/* test_io.c */
extern io_manager test_io_manager, test_io_backing_manager;
extern void (*test_io_cb_read_blk)
diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c
new file mode 100644
index 0000000..30e2514
--- /dev/null
+++ b/lib/ext2fs/undo_io.c
@@ -0,0 +1,565 @@
+/*
+ * undo_io.c --- This is the undo io manager that copies the old data that
+ * copies the old data being overwritten into a tdb database
+ *
+ * Copyright IBM Corporation, 2007
+ * Author Aneesh Kumar K.V <[email protected]>
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#include <fcntl.h>
+#include <time.h>
+#ifdef __linux__
+#include <sys/utsname.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+
+#include "tdb.h"
+
+#include "ext2_fs.h"
+#include "ext2fs.h"
+
+/*
+ * For checking structure magic numbers...
+ */
+
+#define EXT2_CHECK_MAGIC(struct, code) \
+ if ((struct)->magic != (code)) return (code)
+
+
+
+struct undo_private_data {
+ int magic;
+ TDB_CONTEXT *tdb;
+ char *tdb_file;
+
+ /* The backing io channel */
+ io_channel real;
+
+ /* to support offset in unix I/O manager */
+ ext2_loff_t offset;
+};
+
+static errcode_t undo_open(const char *name, int flags, io_channel *channel);
+static errcode_t undo_close(io_channel channel);
+static errcode_t undo_set_blksize(io_channel channel, int blksize);
+static errcode_t undo_read_blk(io_channel channel, unsigned long block,
+ int count, void *data);
+static errcode_t undo_write_blk(io_channel channel, unsigned long block,
+ int count, const void *data);
+static errcode_t undo_flush(io_channel channel);
+static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
+ int size, const void *data);
+static errcode_t undo_set_option(io_channel channel, const char *option,
+ const char *arg);
+
+static struct struct_io_manager struct_undo_manager = {
+ EXT2_ET_MAGIC_IO_MANAGER,
+ "Undo I/O Manager",
+ undo_open,
+ undo_close,
+ undo_set_blksize,
+ undo_read_blk,
+ undo_write_blk,
+ undo_flush,
+ undo_write_byte,
+ undo_set_option
+};
+
+io_manager undo_io_manager = &struct_undo_manager;
+static io_manager undo_io_backing_manager ;
+static char *tdb_file ;
+static int tdb_data_size = 0;
+
+errcode_t set_undo_io_backing_manager(io_manager manager)
+{
+ /*
+ * We may want to do some validation later
+ */
+ undo_io_backing_manager = manager;
+ return 0;
+}
+
+errcode_t set_undo_io_backup_file(char *file_name)
+{
+ tdb_file = strdup(file_name);
+
+ if (tdb_file == NULL) {
+ return EXT2_ET_NO_MEMORY;
+ }
+
+ return 0;
+}
+
+static errcode_t write_file_system_identity(io_channel undo_channel,
+ TDB_CONTEXT *tdb)
+{
+ errcode_t retval;
+ struct ext2_super_block super;
+ TDB_DATA tdb_key, tdb_data;
+ struct undo_private_data *data;
+ io_channel channel;
+ int block_size ;
+
+ data = (struct undo_private_data *) undo_channel->private_data;
+ channel = data->real;
+ block_size = channel->block_size;
+
+ io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
+ retval = io_channel_read_blk(channel, 1, -SUPERBLOCK_SIZE, &super);
+ if (retval)
+ goto err_out;
+
+ /* Write to tdb file in the file system byte order */
+ tdb_key.dptr = "filesystem MTIME";
+ tdb_key.dsize = sizeof("filesystem MTIME");
+ tdb_data.dptr = (unsigned char *) &(super.s_mtime);
+ tdb_data.dsize = sizeof(super.s_mtime);
+
+ retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
+ if (retval == -1) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ goto err_out;
+ }
+
+ tdb_key.dptr = "filesystem UUID";
+ tdb_key.dsize = sizeof("filesystem UUID");
+ tdb_data.dptr = (unsigned char *)&(super.s_uuid);
+ tdb_data.dsize = sizeof(super.s_uuid);
+
+ retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
+ if (retval == -1) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ goto err_out;
+ }
+
+ /* Also store the block size */
+ tdb_key.dptr = "filesystem BLKSIZE";
+ tdb_key.dsize = sizeof("filesystem BLKSIZE");
+ tdb_data.dptr = (unsigned char *)&(undo_channel->block_size);
+ tdb_data.dsize = sizeof(undo_channel->block_size);
+
+ retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
+ if (retval == -1) {
+ retval = EXT2_ET_TDB_SUCCESS + tdb_error(tdb);
+ }
+
+err_out:
+ io_channel_set_blksize(channel, block_size);
+ return retval;
+}
+
+static errcode_t undo_write_tdb(io_channel channel,
+ unsigned long block, int count)
+
+{
+ int size, loop_count = 0, i;
+ unsigned long block_num, backing_blk_num;
+ errcode_t retval = 0;
+ ext2_loff_t offset;
+ struct undo_private_data *data;
+ TDB_DATA tdb_key, tdb_data;
+ char *read_ptr;
+
+ data = (struct undo_private_data *) channel->private_data;
+
+
+ if (data->tdb == NULL) {
+ /*
+ * Transaction database not initialized
+ */
+ return 0;
+ }
+
+ /*
+ * Set the block size used to read for tdb
+ */
+ if (!tdb_data_size) {
+ tdb_data_size = channel->block_size;
+
+ /*
+ * First write. Write the file system identity
+ */
+ retval = write_file_system_identity(channel, data->tdb);
+ if (retval)
+ return retval;
+ }
+
+ if (count == 1)
+ size = channel->block_size;
+ else {
+ if (count < 0)
+ size = -count;
+ else
+ size = count * channel->block_size;
+ }
+
+ /*
+ * Data is stored in tdb database as blocks of tdb_data_size size
+ * This helps in efficient lookup further.
+ *
+ * We divide the disk to blocks of tdb_data_size.
+ */
+
+ block_num = ((block*channel->block_size)+data->offset)/tdb_data_size;
+
+
+ loop_count = (size + tdb_data_size -1)/tdb_data_size;
+
+ tdb_transaction_start(data->tdb);
+ for (i = 0; i < loop_count; i++) {
+
+ tdb_key.dptr = (unsigned char *)&block_num;
+ tdb_key.dsize = sizeof(block_num);
+
+ /*
+ * Check if we have the record already
+ */
+ if (tdb_exists(data->tdb, tdb_key)) {
+
+ /* Try the next block */
+ block_num++;
+ continue;
+ }
+
+ /*
+ * Read one block using the backing I/O manager
+ * The backing I/O manager block size may be
+ * different from the tdb_data_size.
+ * Also we need to recalcuate the block number with respect
+ * to the backing I/O manager.
+ */
+
+ offset = block_num * tdb_data_size;
+ backing_blk_num = (offset - data->offset) / channel->block_size;
+
+ count = tdb_data_size +
+ ((offset - data->offset) % channel->block_size);
+
+ retval = ext2fs_get_mem(count, &read_ptr);
+ if (retval) {
+ tdb_transaction_cancel(data->tdb);
+ return retval;
+ }
+
+ memset(read_ptr, 0, count);
+
+ retval = io_channel_read_blk(data->real,
+ backing_blk_num,
+ -count, read_ptr);
+ if (retval) {
+ free(read_ptr);
+ tdb_transaction_cancel(data->tdb);
+ return retval;
+ }
+
+
+ tdb_data.dptr = read_ptr +
+ ((offset - data->offset) % channel->block_size);
+
+ tdb_data.dsize = tdb_data_size;
+
+#ifdef DEBUG
+ printf("Printing with key %ld data %x and size %d\n",
+ block_num,
+ tdb_data.dptr, tdb_data_size);
+#endif
+
+ retval = tdb_store(data->tdb, tdb_key, tdb_data, TDB_INSERT);
+ if (retval == -1) {
+ /*
+ * TDB_ERR_EXISTS cannot happen because we
+ * have already verified it doesn't exist
+ */
+ tdb_transaction_cancel(data->tdb);
+ retval = EXT2_ET_TDB_ERR_IO;
+ free(read_ptr);
+ return retval;
+
+ }
+ free(read_ptr);
+ /* Next block */
+ block_num++;
+ }
+
+ tdb_transaction_commit(data->tdb);
+
+
+ return retval;
+}
+
+static TDB_CONTEXT *undo_setup_tdb(char *tdb_file,
+ struct undo_private_data *data)
+{
+ errcode_t retval;
+
+ data->tdb = tdb_open(tdb_file, 0, TDB_CLEAR_IF_FIRST,
+ O_RDWR | O_CREAT | O_TRUNC | O_EXCL, 0600);
+ return data->tdb;
+
+}
+static errcode_t undo_open(const char *name, int flags, io_channel *channel)
+{
+ io_channel io = NULL;
+ struct undo_private_data *data = NULL;
+ errcode_t retval;
+ int open_flags;
+ struct stat st;
+
+ if (name == 0)
+ return EXT2_ET_BAD_DEVICE_NAME;
+ retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
+ if (retval)
+ return retval;
+ memset(io, 0, sizeof(struct struct_io_channel));
+ io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
+ retval = ext2fs_get_mem(sizeof(struct undo_private_data), &data);
+ if (retval)
+ goto cleanup;
+
+ io->manager = undo_io_manager;
+ retval = ext2fs_get_mem(strlen(name)+1, &io->name);
+ if (retval)
+ goto cleanup;
+
+ strcpy(io->name, name);
+ io->private_data = data;
+ io->block_size = 1024;
+ io->read_error = 0;
+ io->write_error = 0;
+ io->refcount = 1;
+
+ memset(data, 0, sizeof(struct undo_private_data));
+ data->magic = EXT2_ET_MAGIC_UNIX_IO_CHANNEL;
+
+ if (undo_io_backing_manager) {
+ retval = undo_io_backing_manager->open(name, flags,
+ &data->real);
+ if (retval)
+ goto cleanup;
+ } else {
+ data->real = 0;
+ }
+
+ /* setup the tdb file */
+ if (undo_setup_tdb(tdb_file, data) == NULL ) {
+ /*
+ * This retval results in the below
+ * string in com_err
+ * "TDB: Record exists". This helps
+ * in finding out that the error is
+ * with respect to TDB
+ */
+ retval = EXT2_ET_TDB_ERR_EXISTS;
+ goto cleanup;
+ }
+
+ *channel = io;
+ return 0;
+
+cleanup:
+ if (data->real)
+ io_channel_close(data->real);
+
+ if (data)
+ ext2fs_free_mem(&data);
+
+ if (io)
+ ext2fs_free_mem(&io);
+
+ return retval;
+}
+
+static errcode_t undo_close(io_channel channel)
+{
+ struct undo_private_data *data;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (--channel->refcount > 0)
+ return 0;
+
+ if (data->real)
+ retval = io_channel_close(data->real);
+
+ if (data->tdb)
+ tdb_close(data->tdb);
+
+ ext2fs_free_mem(&channel->private_data);
+ if (channel->name)
+ ext2fs_free_mem(&channel->name);
+ ext2fs_free_mem(&channel);
+
+ return retval;
+}
+
+static errcode_t undo_set_blksize(io_channel channel, int blksize)
+{
+ struct undo_private_data *data;
+ errcode_t retval;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (data->real)
+ retval = io_channel_set_blksize(data->real, blksize);
+
+ channel->block_size = blksize;
+ return retval;
+}
+
+
+static errcode_t undo_read_blk(io_channel channel, unsigned long block,
+ int count, void *buf)
+{
+ errcode_t retval;
+ struct undo_private_data *data;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (data->real)
+ retval = io_channel_read_blk(data->real, block, count, buf);
+
+ return retval;
+}
+
+static errcode_t undo_write_blk(io_channel channel, unsigned long block,
+ int count, const void *buf)
+{
+ struct undo_private_data *data;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ /*
+ * First write the existing content into database
+ */
+ retval = undo_write_tdb(channel, block, count);
+ if (retval)
+ return retval;
+
+
+ if (data->real)
+ retval = io_channel_write_blk(data->real, block, count, buf);
+
+ return retval;
+
+}
+
+static errcode_t undo_write_byte(io_channel channel, unsigned long offset,
+ int size, const void *buf)
+{
+ struct undo_private_data *data;
+ errcode_t retval = 0;
+ ssize_t actual;
+ ext2_loff_t location;
+ unsigned long blk_num, count;;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ location = offset + data->offset;
+ blk_num = location/channel->block_size;
+ /*
+ * the size specified may spread across multiple blocks
+ * also make sure we account for the fact that block start
+ * offset for tdb is different from the backing I/O manager
+ * due to possible different block size
+ */
+ count = (size + (location % channel->block_size) +
+ channel->block_size -1)/channel->block_size;
+
+ retval = undo_write_tdb(channel, blk_num, count);
+ if (retval)
+ return retval;
+
+ if (data->real && data->real->manager->write_byte)
+ retval = io_channel_write_byte(data->real, offset, size, buf);
+
+ return retval;
+}
+
+/*
+ * Flush data buffers to disk.
+ */
+static errcode_t undo_flush(io_channel channel)
+{
+ errcode_t retval = 0;
+ struct undo_private_data *data;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ if (data->real)
+ retval = io_channel_flush(data->real);
+
+ return retval;
+}
+
+static errcode_t undo_set_option(io_channel channel, const char *option,
+ const char *arg)
+{
+ errcode_t retval = 0;
+ struct undo_private_data *data;
+ unsigned long tmp;
+ char *end;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct undo_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_UNIX_IO_CHANNEL);
+
+ /*
+ * Need to support offset option to work with
+ * Unix I/O manager
+ */
+ if (data->real && data->real->manager->set_option) {
+
+ retval = data->real->manager->set_option(data->real,
+ option, arg);
+ }
+
+ if (!retval && !strcmp(option, "offset")) {
+ if (!arg)
+ return EXT2_ET_INVALID_ARGUMENT;
+
+ tmp = strtoul(arg, &end, 0);
+ if (*end)
+ return EXT2_ET_INVALID_ARGUMENT;
+ data->offset = tmp;
+ }
+
+ return retval;
+}
--
1.5.3.rc2.22.g69a9b-dirty

2007-08-01 15:37:11

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 3/4] e2fsprogs: Make mke2fs use undo I/O manager.

From: Aneesh Kumar K.V <[email protected]>

When running mke2fs, if a file system is detected
on the device, we use Undo I/O manager as the io manager.
This helps in reverting the changes made to the filesystem
in case we wrongly selected the device.

The environment variable MKE2FS_SCRATCH_DIR
is used to indicate the directory within which the tdb
file need to be created. The file will be named mke2fs-XXXXXX
If MKE2FS_SCRATCH_DIR is not set /var/lib/e2fsprogs is used


Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
misc/mke2fs.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 108 insertions(+), 1 deletions(-)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 0c6d4f3..3ff4b90 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -44,6 +44,7 @@ extern int optind;
#endif
#include <sys/ioctl.h>
#include <sys/types.h>
+#include <libgen.h>

#include "ext2fs/ext2_fs.h"
#include "et/com_err.h"
@@ -1521,6 +1522,102 @@ static void PRS(int argc, char *argv[])
fs_param.s_blocks_count);
}

+static int filesystem_exist(const char *name)
+{
+ errcode_t retval;
+ io_channel channel;
+ __u16 s_magic;
+ struct ext2_super_block super;
+ io_manager manager = unix_io_manager;
+
+ retval = manager->open(name, IO_FLAG_EXCLUSIVE, &channel);
+ if (retval) {
+ /*
+ * We don't handle error cases instead we
+ * declare that the file system doesn't exist
+ * and let the rest of mke2fs take care of
+ * error
+ */
+ retval = 0;
+ goto open_err_out;
+ }
+
+ io_channel_set_blksize(channel, SUPERBLOCK_OFFSET);
+ retval = io_channel_read_blk(channel, 1, -SUPERBLOCK_SIZE, &super);
+ if (retval) {
+ retval = 0;
+ goto err_out;
+ }
+
+#if defined(WORDS_BIGENDIAN)
+ s_magic = ext2fs_swab16(super.s_magic);
+#else
+ s_magic = super.s_magic;
+#endif
+
+ if (s_magic == EXT2_SUPER_MAGIC)
+ retval = 1;
+
+err_out:
+ io_channel_close(channel);
+
+open_err_out:
+
+ return retval;
+}
+
+static int mke2fs_setup_tdb(const char *name)
+{
+ errcode_t retval = 0;
+ char *tdb_dir, tdb_file[PATH_MAX];
+ char *device_name, *tmp_name;
+
+#if 0 /* FIXME!! */
+ /*
+ * Configuration via a conf file would be
+ * nice
+ */
+ profile_get_string(profile, "scratch_files",
+ "directory", 0, 0,
+ &tdb_dir);
+#endif
+ tmp_name = strdup(name);
+ device_name = basename(tmp_name);
+
+ tdb_dir = getenv("MKE2FS_SCRATCH_DIR");
+ if (!tdb_dir) {
+ printf(_("MKE2FS_SCRATCH_DIR not configured\n"));
+ printf(_("Using /var/lib/e2fsprogs\n"));
+ tdb_dir="/var/lib/e2fsprogs";
+ }
+ if (access(tdb_dir, W_OK)) {
+ fprintf(stderr,
+ _("Cannot create file under %s\n"),
+ tdb_dir);
+ retval = EXT2_ET_INVALID_ARGUMENT;
+ goto err_out;
+
+ }
+
+ sprintf(tdb_file, "%s/mke2fs-%s", tdb_dir, device_name);
+
+ if (!access(tdb_file, F_OK)) {
+ fprintf(stderr,
+ _("File exist %s\n"), tdb_file);
+ retval = EXT2_ET_INVALID_ARGUMENT;
+ goto err_out;
+ }
+
+ set_undo_io_backup_file(tdb_file);
+ printf(_("previous filesystem detected; to undo "
+ "the mke2fs operation, please run the "
+ "command \n'undoe2fs %s %s' in order to recover\n\n"),
+ tdb_file, name);
+err_out:
+ free(tmp_name);
+ return retval;
+}
+
int main (int argc, char *argv[])
{
errcode_t retval = 0;
@@ -1543,7 +1640,17 @@ int main (int argc, char *argv[])
io_ptr = test_io_manager;
test_io_backing_manager = unix_io_manager;
#else
- io_ptr = unix_io_manager;
+ if (filesystem_exist(device_name)) {
+
+ io_ptr = undo_io_manager;
+ set_undo_io_backing_manager(unix_io_manager);
+ retval = mke2fs_setup_tdb(device_name);
+ if (retval)
+ exit(1);
+
+ } else {
+ io_ptr = unix_io_manager;
+ }
#endif

/*
--
1.5.3.rc2.22.g69a9b-dirty

2007-08-01 15:36:42

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 4/4] e2fsprogs: Support for large inode migration.

From: Aneesh Kumar K.V <[email protected]>

Add new option -I <inode_size> to tune2fs.
This is used to change the inode size. The size
need to be multiple of 2 and we don't allow to
decrease the inode size.

As a part of increasing the inode size we increase the
inode table size. We also move the used data blocks around
and update the respective inodes to point to the new block


tune2fs use undo I/O manager when migrating to large
inode. This helps in reverting the changes if end results
are not correct.The environment variable TUNE2FS_SCRATCH_DIR
is used to indicate the directory within which the tdb
file need to be created. The file will be named tune2fs-XXXXXX
If TUNE2FS_SCRATCH_DIR is not set /var/lib/e2fsprogs is used

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
misc/tune2fs.c | 535 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 532 insertions(+), 3 deletions(-)

diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 833b994..88842a2 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -42,6 +42,7 @@ extern int optind;
#include <time.h>
#include <unistd.h>
#include <sys/types.h>
+#include <libgen.h>

#include "ext2fs/ext2_fs.h"
#include "ext2fs/ext2fs.h"
@@ -61,6 +62,7 @@ char * new_label, *new_last_mounted, *new_UUID;
char * io_options;
static int c_flag, C_flag, e_flag, f_flag, g_flag, i_flag, l_flag, L_flag;
static int m_flag, M_flag, r_flag, s_flag = -1, u_flag, U_flag, T_flag;
+static int I_flag;
static time_t last_check_time;
static int print_label;
static int max_mount_count, mount_count, mount_flags;
@@ -71,10 +73,20 @@ static unsigned short errors;
static int open_flag;
static char *features_cmd;
static char *mntopts_cmd;
+static unsigned long int new_inode_size;

int journal_size, journal_flags;
char *journal_device;

+static struct list_head blk_move_list;
+
+struct blk_move {
+ struct list_head list;
+ blk_t old_loc;
+ blk_t new_loc;
+};
+
+
static const char *please_fsck = N_("Please run e2fsck on the filesystem.\n");

void do_findfs(int argc, char **argv);
@@ -89,7 +101,8 @@ static void usage(void)
"\t[-o [^]mount_options[,...]] [-r reserved_blocks_count]\n"
"\t[-u user] [-C mount_count] [-L volume_label] "
"[-M last_mounted_dir]\n"
- "\t[-O [^]feature[,...]] [-T last_check_time] [-U UUID]"
+ "\t[-O [^]feature[,...]] [-T last_check_time] [-U UUID]\n"
+ "\t[ -I new_inode_size ]"
" device\n"), program_name);
exit (1);
}
@@ -505,7 +518,7 @@ static void parse_tune2fs_options(int argc, char **argv)
struct passwd * pw;

printf("tune2fs %s (%s)\n", E2FSPROGS_VERSION, E2FSPROGS_DATE);
- while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:J:L:M:O:T:U:")) != EOF)
+ while ((c = getopt(argc, argv, "c:e:fg:i:jlm:o:r:s:u:C:J:L:M:O:T:U:I:")) != EOF)
switch (c)
{
case 'c':
@@ -702,6 +715,25 @@ static void parse_tune2fs_options(int argc, char **argv)
open_flag = EXT2_FLAG_RW |
EXT2_FLAG_JOURNAL_DEV_OK;
break;
+ case 'I':
+ new_inode_size = strtoul (optarg, &tmp, 0);
+ if (*tmp) {
+ com_err (program_name, 0,
+ _("bad Inode size - %s"),
+ optarg);
+ usage();
+ }
+ if (!((new_inode_size &
+ (new_inode_size - 1)) == 0)) {
+ com_err (program_name, 0,
+ _("Inode size must be a "
+ "power of two- %s"),
+ optarg);
+ usage();
+ }
+ open_flag = EXT2_FLAG_RW;
+ I_flag = 1;
+ break;
default:
usage();
}
@@ -739,6 +771,469 @@ void do_findfs(int argc, char **argv)
exit(0);
}

+static int get_move_bitmap(ext2_filsys fs, int new_ino_blks_per_grp,
+ ext2fs_block_bitmap bmap)
+{
+ dgrp_t i;
+ blk_t j, needed_blocks = 0;
+ blk_t start_blk, end_blk;
+
+ for (i = 0; i < fs->group_desc_count; i++) {
+
+ start_blk = fs->group_desc[i].bg_inode_table +
+ fs->inode_blocks_per_group;
+
+ end_blk = fs->group_desc[i].bg_inode_table +
+ new_ino_blks_per_grp;
+
+ for (j = start_blk; j < end_blk; j++) {
+
+ if (ext2fs_test_block_bitmap(fs->block_map, j)) {
+ /* FIXME!!
+ * What happens if the block is marked
+ * as a bad block
+ */
+ ext2fs_mark_block_bitmap(bmap, j);
+ needed_blocks++;
+ } else {
+ /*
+ * We are going to use this block for
+ * inode table. So mark them used.
+ */
+ ext2fs_mark_block_bitmap(fs->block_map, j);
+ }
+ }
+ }
+
+ if (needed_blocks > fs->super->s_free_blocks_count ) {
+ return ENOSPC;
+ }
+
+ return 0;
+}
+
+static int move_block(ext2_filsys fs, ext2fs_block_bitmap bmap)
+{
+ char *buf;
+ errcode_t retval;
+ blk_t blk, new_blk;
+ struct blk_move *bmv;
+
+
+ retval = ext2fs_get_mem(fs->blocksize, &buf);
+ if (retval)
+ return retval;
+
+ for (blk = fs->super->s_first_data_block;
+ blk < fs->super->s_blocks_count; blk++) {
+
+ if (!ext2fs_test_block_bitmap(bmap, blk))
+ continue;
+
+ retval = ext2fs_new_block(fs, blk, NULL, &new_blk);
+ if (retval)
+ goto err_out;
+
+ /* Mark this block as allocated */
+ ext2fs_mark_block_bitmap(fs->block_map, new_blk);
+
+ /* Add it to block move list */
+ retval = ext2fs_get_mem(sizeof(struct blk_move), &bmv);
+ if (retval)
+ goto err_out;
+
+ bmv->old_loc = blk;
+ bmv->new_loc = new_blk;
+
+ list_add(&(bmv->list), &blk_move_list);
+
+ retval = io_channel_read_blk(fs->io, blk, 1, buf);
+ if (retval)
+ goto err_out;
+
+ retval = io_channel_write_blk(fs->io, new_blk, 1, buf);
+ if (retval)
+ goto err_out;
+ }
+
+err_out:
+ ext2fs_free_mem(&buf);
+ return retval;
+}
+static blk_t transalate_block(blk_t blk)
+{
+ struct list_head *entry;
+ struct blk_move *bmv;
+
+ list_for_each(entry, &blk_move_list) {
+
+ bmv = list_entry(entry, struct blk_move, list);
+ if (bmv->old_loc == blk)
+ return bmv->new_loc;
+ }
+
+ return 0;
+}
+
+static int process_block(ext2_filsys fs, blk_t *block_nr,
+ e2_blkcnt_t blockcnt,
+ blk_t ref_block EXT2FS_ATTR((unused)),
+ int ref_offset EXT2FS_ATTR((unused)),
+ void *priv_data EXT2FS_ATTR((unused)))
+{
+ int ret = 0;
+ blk_t new_blk;
+
+
+ new_blk = transalate_block(*block_nr);
+ if (new_blk) {
+ *block_nr = new_blk;
+ /*
+ * This will force the ext2fs_write_inode in the iterator
+ */
+ ret |= BLOCK_CHANGED;
+ }
+
+ return ret;
+}
+
+static int inode_scan_and_fix(ext2_filsys fs)
+{
+ errcode_t retval = 0;
+ ext2_ino_t ino;
+ blk_t blk;
+ char *block_buf = 0;
+ struct ext2_inode inode;
+ ext2_inode_scan scan = NULL;
+
+ retval = ext2fs_get_mem(fs->blocksize * 3, &block_buf);
+ if (retval)
+ return retval;
+
+ retval = ext2fs_open_inode_scan(fs, 0, &scan);
+ if (retval)
+ goto err_out;
+
+ while (1) {
+
+ retval = ext2fs_get_next_inode(scan, &ino, &inode);
+ if (retval)
+ goto err_out;
+
+ if (!ino)
+ break;
+
+ if (inode.i_links_count == 0)
+ continue; /* inode not in use */
+
+ /* FIXME!!
+ * If we end up modifying the journal inode
+ * the sb->s_jnl_blocks will differ. But a
+ * subsequent e2fsck fixes that.
+ * Do we need to fix this ??
+ */
+
+ if (inode.i_file_acl) {
+
+ blk = transalate_block(inode.i_file_acl);
+ if (!blk)
+ continue;
+
+ inode.i_file_acl = blk;
+
+ /*
+ * Write the inode to disk so that inode table
+ * resizing can work
+ */
+ retval = ext2fs_write_inode(fs, ino, &inode);
+ if (retval)
+ goto err_out;
+ }
+
+ if (!ext2fs_inode_has_valid_blocks(&inode))
+ continue;
+
+ retval = ext2fs_block_iterate2(fs, ino, 0,
+ block_buf, process_block,
+ 0);
+ if (retval)
+ goto err_out;
+
+ }
+
+err_out:
+ ext2fs_free_mem(&block_buf);
+
+ return retval;
+
+}
+
+
+static int expand_inode_table(ext2_filsys fs, unsigned long int new_inode_size)
+{
+ dgrp_t i;
+ blk_t blk;
+ errcode_t retval;
+ int new_ino_blks_per_grp, j;
+ char *old_itable = NULL, *new_itable = NULL;
+ char *tmp_old_itable = NULL, *tmp_new_itable = NULL;
+ unsigned long int old_inode_size;
+ int old_itable_size, new_itable_size;
+
+ old_itable_size = fs->inode_blocks_per_group * fs->blocksize;
+ old_inode_size = EXT2_INODE_SIZE(fs->super);
+
+ new_ino_blks_per_grp = ext2fs_div_ceil(
+ EXT2_INODES_PER_GROUP(fs->super) *
+ new_inode_size,
+ fs->blocksize);
+
+ new_itable_size = new_ino_blks_per_grp * fs->blocksize;
+
+ retval = ext2fs_get_mem(old_itable_size, &old_itable);
+ if (retval)
+ return retval;
+
+ retval = ext2fs_get_mem(new_itable_size, &new_itable);
+ if (retval)
+ goto err_out;
+
+ tmp_old_itable = old_itable;
+ tmp_new_itable = new_itable;
+
+ for (i = 0; i < fs->group_desc_count; i++) {
+
+ blk = fs->group_desc[i].bg_inode_table;
+ retval = io_channel_read_blk(fs->io, blk,
+ fs->inode_blocks_per_group, old_itable);
+ if (retval)
+ goto err_out;
+
+ for (j = 0; j < EXT2_INODES_PER_GROUP(fs->super); j++) {
+
+ memcpy(new_itable, old_itable, old_inode_size);
+
+ memset(new_itable+old_inode_size, 0,
+ new_inode_size - old_inode_size);
+
+ new_itable += new_inode_size;
+ old_itable += old_inode_size;
+ }
+
+ /* reset the pointer */
+ old_itable = tmp_old_itable;
+ new_itable = tmp_new_itable;
+
+ retval = io_channel_write_blk(fs->io, blk,
+ new_ino_blks_per_grp, new_itable);
+ if (retval)
+ goto err_out;
+ }
+
+ /* Update the meta data */
+ fs->inode_blocks_per_group = new_ino_blks_per_grp;
+ fs->super->s_inode_size = new_inode_size;
+
+err_out:
+ if (old_itable)
+ ext2fs_free_mem(&old_itable);
+
+ if (new_itable)
+ ext2fs_free_mem(&new_itable);
+
+ return retval;
+
+}
+
+static errcode_t ext2fs_calculate_summary_stats(ext2_filsys fs)
+{
+ blk_t blk;
+ ext2_ino_t ino;
+ unsigned int group = 0;
+ unsigned int count = 0;
+ int total_free = 0;
+ int group_free = 0;
+
+ /*
+ * First calculate the block statistics
+ */
+ for (blk = fs->super->s_first_data_block;
+ blk < fs->super->s_blocks_count; blk++) {
+ if (!ext2fs_fast_test_block_bitmap(fs->block_map, blk)) {
+ group_free++;
+ total_free++;
+ }
+ count++;
+ if ((count == fs->super->s_blocks_per_group) ||
+ (blk == fs->super->s_blocks_count-1)) {
+ fs->group_desc[group++].bg_free_blocks_count =
+ group_free;
+ count = 0;
+ group_free = 0;
+ }
+ }
+ fs->super->s_free_blocks_count = total_free;
+
+ /*
+ * Next, calculate the inode statistics
+ */
+ group_free = 0;
+ total_free = 0;
+ count = 0;
+ group = 0;
+
+ /* Protect loop from wrap-around if s_inodes_count maxed */
+ for (ino = 1; ino <= fs->super->s_inodes_count && ino > 0; ino++) {
+ if (!ext2fs_fast_test_inode_bitmap(fs->inode_map, ino)) {
+ group_free++;
+ total_free++;
+ }
+ count++;
+ if ((count == fs->super->s_inodes_per_group) ||
+ (ino == fs->super->s_inodes_count)) {
+ fs->group_desc[group++].bg_free_inodes_count =
+ group_free;
+ count = 0;
+ group_free = 0;
+ }
+ }
+ fs->super->s_free_inodes_count = total_free;
+ ext2fs_mark_super_dirty(fs);
+ return 0;
+}
+
+#define list_for_each_safe(pos, pnext, head) \
+ for (pos = (head)->next, pnext = pos->next; pos != (head); \
+ pos = pnext, pnext = pos->next)
+
+static void free_blk_move_list()
+{
+ struct list_head *entry, *tmp;
+ struct blk_move *bmv;
+
+ list_for_each_safe(entry, tmp, &blk_move_list) {
+
+ bmv = list_entry(entry, struct blk_move, list);
+ list_del(entry);
+ ext2fs_free_mem(&bmv);
+ }
+
+ return ;
+}
+static int resize_inode(ext2_filsys fs, unsigned long int new_inode_size)
+{
+ errcode_t retval;
+ int new_ino_blks_per_grp;
+ ext2fs_block_bitmap bmap;
+
+ if (new_inode_size <= EXT2_INODE_SIZE(fs->super)) {
+ fprintf(stderr, _("New Inode size too small\n"));
+ return EXT2_ET_INVALID_ARGUMENT;
+ }
+
+ ext2fs_read_inode_bitmap(fs);
+ ext2fs_read_block_bitmap(fs);
+ INIT_LIST_HEAD(&blk_move_list);
+
+
+ new_ino_blks_per_grp = ext2fs_div_ceil(
+ EXT2_INODES_PER_GROUP(fs->super)*
+ new_inode_size,
+ fs->blocksize);
+
+ /* We may change the file system.
+ * Mark the file system as invalid so that
+ * the user is prompted to run fsck.
+ */
+ fs->super->s_state &= ~EXT2_VALID_FS;
+
+ retval = ext2fs_allocate_block_bitmap(fs, _("blocks to be moved"),
+ &bmap);
+ if (retval)
+ return retval;
+
+ retval = get_move_bitmap(fs, new_ino_blks_per_grp, bmap);
+ if (retval)
+ goto err_out;
+
+ retval = move_block(fs, bmap);
+ if (retval)
+ goto err_out;
+
+ retval = inode_scan_and_fix(fs);
+ if (retval)
+ goto err_out;
+
+ retval = expand_inode_table(fs, new_inode_size);
+ if (retval)
+ goto err_out;
+
+ ext2fs_calculate_summary_stats(fs);
+
+ fs->super->s_state |= EXT2_VALID_FS;
+ /* mark super block and block bitmap as dirty */
+ ext2fs_mark_super_dirty(fs);
+ ext2fs_mark_bb_dirty(fs);
+
+err_out:
+ free_blk_move_list();
+ ext2fs_free_block_bitmap(bmap);
+
+ return retval;
+}
+
+static int tune2fs_setup_tdb(const char *name)
+{
+ errcode_t retval = 0;
+ char *tdb_dir, tdb_file[PATH_MAX];
+ char *device_name, *tmp_name;
+
+#if 0 /* FIXME!! */
+ /*
+ * Configuration via a conf file would be
+ * nice
+ */
+ profile_get_string(profile, "scratch_files",
+ "directory", 0, 0,
+ &tdb_dir);
+#endif
+ tmp_name = strdup(name);
+ device_name = basename(tmp_name);
+
+ tdb_dir = getenv("TUNE2FS_SCRATCH_DIR");
+ if (!tdb_dir) {
+ com_err(__FUNCTION__, 0,
+ _("TUNE2FS_SCRATCH_DIR not configured\n"));
+ printf(_("Using /var/lib/e2fsprogs\n"));
+ tdb_dir="/var/lib/e2fsprogs";
+
+ }
+ if (access(tdb_dir, W_OK)) {
+ fprintf(stderr,
+ _("Cannot create file under %s\n"),
+ tdb_dir);
+ retval = EXT2_ET_INVALID_ARGUMENT;
+ goto err_out;
+
+ }
+
+ sprintf(tdb_file, "%s/tune2fs-%s", tdb_dir, device_name);
+
+ if (!access(tdb_file, F_OK)) {
+ fprintf(stderr,
+ _("File exist %s\n"), tdb_file);
+ retval = EXT2_ET_INVALID_ARGUMENT;
+ goto err_out;
+ }
+
+ set_undo_io_backup_file(tdb_file);
+ printf(_("To undo the tune2fs operations please run "
+ "the command\nundoe2fs %s %s\n\n"),
+ tdb_file, name);
+err_out:
+ free(tmp_name);
+ return retval;
+}

int main (int argc, char ** argv)
{
@@ -768,7 +1263,19 @@ int main (int argc, char ** argv)
io_ptr = test_io_manager;
test_io_backing_manager = unix_io_manager;
#else
- io_ptr = unix_io_manager;
+ if (I_flag) {
+ /*
+ * If inode resize is requested use the
+ * Undo I/O manager
+ */
+ io_ptr = undo_io_manager;
+ set_undo_io_backing_manager(unix_io_manager);
+ retval = tune2fs_setup_tdb(device_name);
+ if (retval)
+ exit(1);
+ } else {
+ io_ptr = unix_io_manager;
+ }
#endif
retval = ext2fs_open2(device_name, io_options, open_flag,
0, 0, io_ptr, &fs);
@@ -919,6 +1426,28 @@ int main (int argc, char ** argv)
}
ext2fs_mark_super_dirty(fs);
}
+ if (I_flag) {
+ if (mount_flags & EXT2_MF_MOUNTED) {
+ fputs(_("The Inode size may only be "
+ "changed when the filesystem is "
+ "unmounted.\n"), stderr);
+ exit(1);
+ }
+ /*
+ * We want to update group descriptor also
+ * with the new free inode count
+ */
+ fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+ if (resize_inode(fs, new_inode_size)) {
+
+ fputs(_("Error in resizing the Inode.\n"
+ "Run undoe2fs to undo the "
+ "file system changes. \n"), stderr);
+ } else {
+ printf (_("Setting Inode size %d\n"),
+ new_inode_size);
+ }
+ }

if (l_flag)
list_super (sb);
--
1.5.3.rc2.22.g69a9b-dirty

2007-08-02 15:09:54

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2fsprogs: Add undo I/O manager

On Aug 01, 2007 21:04 +0530, Aneesh Kumar K.V wrote:
> +static errcode_t write_file_system_identity(io_channel undo_channel,
> + TDB_CONTEXT *tdb)
> +{
> + /* Write to tdb file in the file system byte order */
> + tdb_key.dptr = "filesystem MTIME";
> + tdb_key.dsize = sizeof("filesystem MTIME");
> + tdb_data.dptr = (unsigned char *) &(super.s_mtime);
> + tdb_data.dsize = sizeof(super.s_mtime);
> +
> + tdb_key.dptr = "filesystem UUID";
> + tdb_key.dsize = sizeof("filesystem UUID");
> + tdb_data.dptr = (unsigned char *)&(super.s_uuid);
> + tdb_data.dsize = sizeof(super.s_uuid);

Is this the mtime and UUID of the new filesystem or the old one? It
should be the UUID and mtime of the new filesystem, so that the
undo file can be verified against the current superblock. This poses
a bit of a problem, because that information isn't saved until after
the mke2fs run is finished.

One possibility is to overwrite this information at the end of mke2fs
after the new UUID and mtime are written?

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-08-02 15:09:53

by Andreas Dilger

[permalink] [raw]
Subject: Re: e2fsprogs patches

On Aug 01, 2007 21:04 +0530, Aneesh Kumar K.V wrote:
> Regarding the naming of tdb file i decided to go with
> mke2fs-<device_name> instead of mke2fs-<device_name>-<time-stamp>.
> I guess having multiple version of this file in the tdb_dir will confuse
> the user.

I thought about this also - it might make sense to allow multiple undo
files if e.g. e2fsck is run several times in a row, and the user doesn't
like the outcome. I think this also means that we should call the files
"undoe2fs-*" as having undo files for e2fsck is probably desirable.

I guess that also suggests we should save the s_lastcheck (last e2fsck)
time into the tdb, so we can be sure to undo them in the right order.

As for putting them into /var/lib/e2fsprogs, it might make sense to put
them into /var/tmp/e2fsprogs instead, because the files will be obsolete
as soon as the filesystem is mounted... Alternately, e2fsprogs should
install something like /etc/cron.daily/e2fsprogs to clean up
/var/lib/e2fsprogs in a timely manner, like:

tmpwatch -m 128 -d /var/lib/e2fsprogs

I also looked at logrotate, but it uses a fixed filename instead of
a regexp.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-08-02 18:34:02

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2fsprogs: Add undo I/O manager



Andreas Dilger wrote:
> On Aug 01, 2007 21:04 +0530, Aneesh Kumar K.V wrote:
>> +static errcode_t write_file_system_identity(io_channel undo_channel,
>> + TDB_CONTEXT *tdb)
>> +{
>> + /* Write to tdb file in the file system byte order */
>> + tdb_key.dptr = "filesystem MTIME";
>> + tdb_key.dsize = sizeof("filesystem MTIME");
>> + tdb_data.dptr = (unsigned char *) &(super.s_mtime);
>> + tdb_data.dsize = sizeof(super.s_mtime);
>> +
>> + tdb_key.dptr = "filesystem UUID";
>> + tdb_key.dsize = sizeof("filesystem UUID");
>> + tdb_data.dptr = (unsigned char *)&(super.s_uuid);
>> + tdb_data.dsize = sizeof(super.s_uuid);
>
> Is this the mtime and UUID of the new filesystem or the old one? It
> should be the UUID and mtime of the new filesystem, so that the
> undo file can be verified against the current superblock. This poses
> a bit of a problem, because that information isn't saved until after
> the mke2fs run is finished.
>
> One possibility is to overwrite this information at the end of mke2fs
> after the new UUID and mtime are written?
>

This can be done by writing the file system identity during the the io_channel_close.
How about this patch on top of the last series. I will merge this into the patcheset

diff --git a/lib/ext2fs/undo_io.c b/lib/ext2fs/undo_io.c
index 30e2514..a80bafc 100644
--- a/lib/ext2fs/undo_io.c
+++ b/lib/ext2fs/undo_io.c
@@ -159,8 +159,8 @@ static errcode_t write_file_system_identity(io_channel undo_channel,
/* Also store the block size */
tdb_key.dptr = "filesystem BLKSIZE";
tdb_key.dsize = sizeof("filesystem BLKSIZE");
- tdb_data.dptr = (unsigned char *)&(undo_channel->block_size);
- tdb_data.dsize = sizeof(undo_channel->block_size);
+ tdb_data.dptr = (unsigned char *)&(tdb_data_size);
+ tdb_data.dsize = sizeof(tdb_data_size);

retval = tdb_store(tdb, tdb_key, tdb_data, TDB_INSERT);
if (retval == -1) {
@@ -199,13 +199,6 @@ static errcode_t undo_write_tdb(io_channel channel,
*/
if (!tdb_data_size) {
tdb_data_size = channel->block_size;
-
- /*
- * First write. Write the file system identity
- */
- retval = write_file_system_identity(channel, data->tdb);
- if (retval)
- return retval;
}

if (count == 1)
@@ -405,6 +398,11 @@ static errcode_t undo_close(io_channel channel)
if (--channel->refcount > 0)
return 0;

+ /* Before closing Write the file system identity */
+ retval = write_file_system_identity(channel, data->tdb);
+ if (retval)
+ return retval;
+
if (data->real)
retval = io_channel_close(data->real);


2007-08-02 21:37:39

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2fsprogs: Add undo I/O manager

On Aug 03, 2007 00:02 +0530, Aneesh Kumar K.V wrote:
> Andreas Dilger wrote:
> >Is this the mtime and UUID of the new filesystem or the old one? It
> >should be the UUID and mtime of the new filesystem, so that the
> >undo file can be verified against the current superblock. This poses
> >a bit of a problem, because that information isn't saved until after
> >the mke2fs run is finished.
> >
> >One possibility is to overwrite this information at the end of mke2fs
> >after the new UUID and mtime are written?
>
> This can be done by writing the file system identity during the the
> io_channel_close.
> How about this patch on top of the last series. I will merge this into the
> patcheset

I thought about this also, but in fact for most uses of the undo manager
we want to save the information at the start instead of the end, so it
is possible to undo e.g. a partial e2fsck that crashes before it finishes.
Only with mke2fs (and, I guess tune2fs -U) does the UUID change at the
end.

Also, can you check if mke2fs does any non-iomanager output? I think
there is code to "zap" the old superblock at the start and old RAID info
at the end of the block device, and I'm not sure if this uses the normal
IO manager or not.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.

2007-08-03 04:49:55

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2fsprogs: Add undo I/O manager



Andreas Dilger wrote:
> On Aug 03, 2007 00:02 +0530, Aneesh Kumar K.V wrote:
>> Andreas Dilger wrote:
>>> Is this the mtime and UUID of the new filesystem or the old one? It
>>> should be the UUID and mtime of the new filesystem, so that the
>>> undo file can be verified against the current superblock. This poses
>>> a bit of a problem, because that information isn't saved until after
>>> the mke2fs run is finished.
>>>
>>> One possibility is to overwrite this information at the end of mke2fs
>>> after the new UUID and mtime are written?
>> This can be done by writing the file system identity during the the
>> io_channel_close.
>> How about this patch on top of the last series. I will merge this into the
>> patcheset
>
> I thought about this also, but in fact for most uses of the undo manager
> we want to save the information at the start instead of the end, so it
> is possible to undo e.g. a partial e2fsck that crashes before it finishes.
> Only with mke2fs (and, I guess tune2fs -U) does the UUID change at the
> end.

I am not sure whether saving the information at start is needed. I understand
that what we are looking for is the case when the application crashes without
doing a io_channel_close. In that case i would say the user can use the
--force option and replay the data from the tdb file. The UUID could very well
be changed on the disk before the application crashed. So even if we save
UUID at the start, there are cases where it won't match with the disk UUID.


That actually brings me to another change. I would be moving the block size
recording changes from write_file_system_identity to a separate function
and will be calling it at the first write. That make sure we have a record
that carry the blocksize even though we don't have one with mtime and UUID
in the tdb file.


>
> Also, can you check if mke2fs does any non-iomanager output? I think
> there is code to "zap" the old superblock at the start and old RAID info
> at the end of the block device, and I'm not sure if this uses the normal
> IO manager or not.
>


The zap_sector and zap_zero uses the io manager to zero out the blocks. So
they should be ok. I found that when we use -J device=<journal-device>. mke2fs
uses unix I/O manager to write to the journal super block. I guess that is ok
because we are not tracking changes to journal device.

I found that the journal_super_block have only space for 48 s_users
UUID entries. But in ext2fs_add_journal_device we are not checking
the limit. Does that mean repeated mke2fs with -J can lead to corruption ?

-aneesh

2007-08-03 18:28:32

by Andreas Dilger

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2fsprogs: Add undo I/O manager

On Aug 03, 2007 10:19 +0530, Aneesh Kumar K.V wrote:
> Andreas Dilger wrote:
> >I thought about this also, but in fact for most uses of the undo manager
> >we want to save the information at the start instead of the end, so it
> >is possible to undo e.g. a partial e2fsck that crashes before it finishes.
> >Only with mke2fs (and, I guess tune2fs -U) does the UUID change at the
> >end.
>
> I am not sure whether saving the information at start is needed. I
> understand that what we are looking for is the case when the application
> crashes without doing a io_channel_close. In that case i would say the
> user can use the --force option and replay the data from the tdb file.
> The UUID could very well be changed on the disk before the application
> crashed. So even if we save UUID at the start, there are cases where it
> won't match with the disk UUID.

While this is true, I don't think it is harmful to save the UUID at the
start. The UUID changing is the rare case, so to make this safer saving
the UUID at the start and the end is best.

> That actually brings me to another change. I would be moving the block size
> recording changes from write_file_system_identity to a separate function
> and will be calling it at the first write.

Definitely, yes.

Cheers, Andreas
--
Andreas Dilger
Principal Software Engineer
Cluster File Systems, Inc.