2007-05-09 08:29:22

by Aneesh Kumar K.V

[permalink] [raw]
Subject: [PATCH 1/2] Add stack I/O manager.


This I/O manager helps in stacking different I/O managers.
For example one can stack the undo I/O manager on top
of Unix I/O manager to achieve the undo functionality.

Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
lib/ext2fs/Makefile.in | 10 +-
lib/ext2fs/ext2_io.h | 4 +
lib/ext2fs/stack_io.c | 419 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 431 insertions(+), 2 deletions(-)
create mode 100644 lib/ext2fs/stack_io.c

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 799659e..f5d141a 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -66,7 +66,8 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
unix_io.o \
unlink.o \
valid_blk.o \
- version.o
+ version.o \
+ stack_io.o

SRCS= ext2_err.c \
$(srcdir)/alloc.c \
@@ -134,7 +135,8 @@ SRCS= ext2_err.c \
$(srcdir)/tst_bitops.c \
$(srcdir)/tst_byteswap.c \
$(srcdir)/tst_getsize.c \
- $(srcdir)/tst_iscan.c
+ $(srcdir)/tst_iscan.c \
+ $(srcdir)/stack_io.c

HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h
HFILES_IN= ext2_err.h ext2_types.h
@@ -550,3 +552,7 @@ tst_iscan.o: $(srcdir)/tst_iscan.c $(srcdir)/ext2_fs.h \
$(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
$(srcdir)/ext2_fs.h $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
$(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
+stack_io.o: $(srcdir)/stack_io.c $(srcdir)/ext2_fs.h \
+ $(top_builddir)/lib/ext2fs/ext2_types.h $(srcdir)/ext2fs.h \
+ $(srcdir)/ext2_fs.h $(top_srcdir)/lib/et/com_err.h $(srcdir)/ext2_io.h \
+ $(top_builddir)/lib/ext2fs/ext2_err.h $(srcdir)/bitops.h
diff --git a/lib/ext2fs/ext2_io.h b/lib/ext2fs/ext2_io.h
index eada278..65a294e 100644
--- a/lib/ext2fs/ext2_io.h
+++ b/lib/ext2fs/ext2_io.h
@@ -96,6 +96,10 @@ extern errcode_t io_channel_write_byte(io_channel channel,
/* unix_io.c */
extern io_manager unix_io_manager;

+/* stack_io.c */
+extern struct struct_io_manager *stack_io_manager_init(void);
+extern errcode_t stack_push_io_manager(struct struct_io_manager *stack,
+ struct struct_io_manager *io);
/* test_io.c */
extern io_manager test_io_manager, test_io_backing_manager;
extern void (*test_io_cb_read_blk)
diff --git a/lib/ext2fs/stack_io.c b/lib/ext2fs/stack_io.c
new file mode 100644
index 0000000..5adfe4d
--- /dev/null
+++ b/lib/ext2fs/stack_io.c
@@ -0,0 +1,419 @@
+/*
+ * stack_io.c --- This is the io manager used to stack different io managers.
+ *
+ * Copyright (C) 1996 Theodore Ts'o.
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#include <stdio.h>
+#include <string.h>
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+#include <time.h>
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_PRCTL_H
+#include <sys/prctl.h>
+#else
+#define PR_GET_DUMPABLE 3
+#endif
+#if (!defined(HAVE_PRCTL) && defined(linux))
+#include <sys/syscall.h>
+#endif
+
+#include "ext2_fs.h"
+#include "ext2fs.h"
+#include "kernel-list.h"
+
+/*
+ * For checking structure magic numbers...
+ */
+
+#define EXT2_CHECK_MAGIC(struct, code) \
+ if ((struct)->magic != (code)) return (code)
+
+#define EXT2_ET_MAGIC_STACK_IO_CHANNEL (2133571428L)
+
+struct stack_private_data {
+ int magic;
+ struct list_head st_elements;
+};
+
+struct stack_element {
+ struct struct_io_manager *io;
+ struct struct_io_channel *channel;
+ struct list_head list;
+};
+
+
+static errcode_t stack_open(const char *name, int flags, io_channel *channel);
+static errcode_t stack_close(io_channel channel);
+static errcode_t stack_set_blksize(io_channel channel, int blksize);
+static errcode_t stack_read_blk(io_channel channel, unsigned long block,
+ int count, void *data);
+static errcode_t stack_write_blk(io_channel channel, unsigned long block,
+ int count, const void *data);
+static errcode_t stack_flush(io_channel channel);
+static errcode_t stack_write_byte(io_channel channel, unsigned long offset,
+ int count, const void *buf);
+static errcode_t stack_set_option(io_channel channel, const char *option,
+ const char *arg);
+
+static struct struct_io_manager struct_stack_manager = {
+ .magic = EXT2_ET_MAGIC_IO_MANAGER,
+ .name = "Stack I/O Manager",
+ .open = stack_open,
+ .close = stack_close,
+ .set_blksize = stack_set_blksize,
+ .read_blk = stack_read_blk,
+ .write_blk = stack_write_blk,
+ .flush = stack_flush,
+ .write_byte = stack_write_byte,
+ .set_option = stack_set_option,
+};
+
+io_manager stack_io_manager = &struct_stack_manager;
+/* FIXME!! get rid of this. not thread safe */
+static struct stack_private_data private_data;
+struct struct_io_manager *stack_io_manager_init(void)
+{
+
+
+ INIT_LIST_HEAD(&(private_data.st_elements));
+ private_data.magic = EXT2_ET_MAGIC_STACK_IO_CHANNEL;
+
+ return stack_io_manager;
+
+}
+
+errcode_t stack_push_io_manager(struct struct_io_manager *stack, struct struct_io_manager *io)
+{
+ errcode_t retval;
+ (void)(stack);
+ struct stack_element *element;
+ retval = ext2fs_get_mem(sizeof(struct stack_element), &element);
+ if (retval)
+ return retval;
+ element->io = io;
+ list_add(&(element->list), &(private_data.st_elements));
+
+ return 0;
+}
+
+
+static errcode_t stack_open(const char *name, int flags, io_channel *channel)
+{
+ io_channel io = NULL, stack_io_channel = NULL;
+ errcode_t retval;
+ char *value;
+ struct list_head *entry;
+ struct stack_element *element;
+
+ if (name == 0)
+ return EXT2_ET_BAD_DEVICE_NAME;
+ retval = ext2fs_get_mem(sizeof(struct struct_io_channel), &io);
+ if (retval)
+ return retval;
+ memset(io, 0, sizeof(struct struct_io_channel));
+ io->magic = EXT2_ET_MAGIC_IO_CHANNEL;
+
+ io->manager = stack_io_manager;
+ retval = ext2fs_get_mem(strlen(name)+1, &io->name);
+ if (retval)
+ goto cleanup;
+
+ strcpy(io->name, name);
+ io->private_data = &(private_data);
+ io->block_size = 1024;
+ io->read_error = 0;
+ io->write_error = 0;
+ io->refcount = 1;
+
+ /*
+ * Now call all the io managers open routine
+ */
+ list_for_each(entry, &(private_data.st_elements)) {
+
+ element = list_entry(entry, struct stack_element, list);
+ if (element->io) {
+ retval = element->io->open(name, flags,
+ &stack_io_channel);
+ if (retval) {
+ /*
+ * FIXME!! we need cleanup previous
+ * successfull I/O manager open
+ */
+ goto cleanup;
+ }
+ }
+ element->channel = stack_io_channel;
+ }
+
+ *channel = io;
+ return 0;
+
+cleanup:
+ if (io)
+ ext2fs_free_mem(&io);
+ return retval;
+}
+
+static errcode_t stack_io_channel_close(io_channel channel)
+{
+ if (channel && channel->manager->close)
+ return channel->manager->close(channel);
+
+ return 0;
+
+}
+
+static errcode_t stack_close(io_channel channel)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ if (--channel->refcount > 0)
+ return 0;
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ stack_io_channel_close(element->channel);
+ }
+
+ if (channel->name)
+ ext2fs_free_mem(&channel->name);
+ ext2fs_free_mem(&channel);
+
+ return 0;
+}
+
+static errcode_t stack_io_channel_set_blksize(io_channel channel, int blksize)
+{
+ if (channel && channel->manager->set_blksize)
+ return channel->manager->set_blksize(channel, blksize);
+
+ return 0;
+
+}
+static errcode_t stack_set_blksize(io_channel channel, int blksize)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_set_blksize(element->channel, blksize);
+ if (retval)
+ return retval;
+ }
+
+ channel->block_size = blksize;
+ return retval;
+}
+
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); \
+ pos = pos->prev)
+
+static errcode_t stack_io_channel_read_blk(io_channel channel,
+ unsigned long block, int count, void *buf)
+{
+ if (channel && channel->manager->read_blk)
+ return channel->manager->read_blk(channel,
+ block, count, buf);
+
+ return 0;
+}
+static errcode_t stack_read_blk(io_channel channel, unsigned long block,
+ int count, void *buf)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ /*
+ * we should read int the reverse order
+ */
+ list_for_each_prev(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_read_blk(element->channel, block, count, buf);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+}
+
+static errcode_t stack_io_channel_write_blk(io_channel channel,
+ unsigned long block, int count, const void *buf)
+{
+ if (channel && channel->manager->write_blk)
+ return channel->manager->write_blk(channel,
+ block, count, buf);
+
+ return 0;
+}
+static errcode_t stack_write_blk(io_channel channel, unsigned long block,
+ int count, const void *buf)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_write_blk(element->channel, block, count, buf);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+}
+
+static errcode_t stack_io_channel_write_byte(io_channel channel,
+ unsigned long offset, int count, const void *buf)
+{
+ if (channel && channel->manager->write_byte)
+ return channel->manager->write_byte(channel,
+ offset, count, buf);
+
+ return 0;
+}
+
+static errcode_t stack_write_byte(io_channel channel, unsigned long offset,
+ int count, const void *buf)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_write_byte(element->channel, offset, count, buf);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+}
+
+static errcode_t stack_io_channel_flush(io_channel channel)
+{
+ if (channel && channel->manager->flush)
+ return channel->manager->flush(channel);
+
+ return 0;
+
+}
+
+/*
+ * Flush data buffers to disk.
+ */
+static errcode_t stack_flush(io_channel channel)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_flush(element->channel);
+ if (retval)
+ return retval;
+ }
+
+ return retval;
+}
+
+
+static errcode_t stack_io_channel_set_option(io_channel channel,
+ const char *option, const char *arg)
+{
+ if (channel && channel->manager->set_option)
+ return channel->manager->set_option(channel,
+ option, arg);
+
+ return 0;
+
+}
+static errcode_t stack_set_option(io_channel channel, const char *option,
+ const char *arg)
+{
+ struct stack_private_data *data;
+ struct list_head *entry;
+ struct stack_element *element;
+ errcode_t retval = 0;
+ struct struct_io_manager *io_mng;
+ int success;
+
+ EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
+ data = (struct stack_private_data *) channel->private_data;
+ EXT2_CHECK_MAGIC(data, EXT2_ET_MAGIC_STACK_IO_CHANNEL);
+
+ list_for_each(entry, &(data->st_elements)) {
+ element = list_entry(entry, struct stack_element, list);
+ retval = stack_io_channel_set_option(element->channel,
+ option, arg);
+ if (retval == 0) {
+ success = 1;
+ continue;
+ }
+
+ if (retval == EXT2_ET_INVALID_ARGUMENT) {
+ /*
+ * this may be the option for some other io manager
+ * if everybody said EXT2_ET_INVALID_ARGUMENT
+ * then we will return that
+ */
+ continue;
+ } else {
+ return retval;
+ }
+ }
+
+ if (success)
+ return 0;
+
+ return retval;
+}
--
1.5.2.rc0.71.g4342-dirty


2007-05-19 06:16:45

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/2] Add stack I/O manager.

On Wed, May 09, 2007 at 01:42:17PM +0530, Aneesh Kumar K.V wrote:
> From: Aneesh Kumar K.V <[email protected]>
>
> This I/O manager helps in stacking different I/O managers.
> For example one can stack the undo I/O manager on top
> of Unix I/O manager to achieve the undo functionality.

This is probably more generality than is strictly necessary; and the
place where the excess generality gets messy is the fact that you make
the stacking layer responsible for calling all of the io manager's
open routines (which is still a FIXME). So I would flush the stack_io
layer entirely.

What I would recommend as the fast and dirty approach. Basically, ape
the approach used by test_io layer _exactly_, except instead of using
a global variable test_io_backing_manager, you provide a function
which sets the static variable, undo_io_backing_manager. This
variable is used only by the subsequent call to the ->open method,
which just like test_io simply passes the name down to the backing
manager specified in the static variable. Then just make the undo_io
manager work the way test_io does, where does its thing, and then it
calls the appropriate function in its private->real io_channel.
Basically, make undo_io responsible for calling the next io_manager
down in the chain, This is workable because we don't need to
initialize the tdb file until we first try to write to the io_channel,
and ext2fs_open() only needs to do read operations, so we can set the
tdb filename via an optoin after ext2fs_open() returns.

I could imagine a more complicated approach where we still wouldn't
have a stacking layer, but have an integrated parsing layer which
would have registered names for each of the io_managers. It would
then take the part of the name before the colon, and treat it as the
type (i.e., "file"), and the part after the colon and pass it to the
relevant's io_manager (i.e., "/dev/sda4"). So you could pass in a
name like "file:/dev/sda4".

For the test_io manager, since it doesn't need any arguments, it could
just take the part after the colon, and use it to pass back to the
parsing layer. So names to test_io manager would look like this:
"test:file:/dev/sda4".

For managers that need two names, we would have to do something a bit
more complicated, like this: "undo:/var/undo/tdbfile#file:/dev/sda4".
But basically, we still make the stacking up to the individual
io_manager. The main advantage with this approach is that all of the
stacking is done via the ASCII string provided by the user, and
there's no need for any special ad-hoc options.

On the other hand, arguably, you want the program decided where to put
the undo file, so that it can be found automatically by e2fsck. So
the fast-and-dirty approach might be the better one anyway --- it
certainly is easier!

Regards,

- Ted

2007-05-21 10:27:41

by Aneesh Kumar K.V

[permalink] [raw]
Subject: Re: [PATCH 1/2] Add stack I/O manager.



Theodore Tso wrote:
> On Wed, May 09, 2007 at 01:42:17PM +0530, Aneesh Kumar K.V wrote:
>> From: Aneesh Kumar K.V <[email protected]>
>>
>> This I/O manager helps in stacking different I/O managers.
>> For example one can stack the undo I/O manager on top
>> of Unix I/O manager to achieve the undo functionality.
>
> This is probably more generality than is strictly necessary; and the
> place where the excess generality gets messy is the fact that you make
> the stacking layer responsible for calling all of the io manager's
> open routines (which is still a FIXME). So I would flush the stack_io
> layer entirely.


Can't we address the FIXME by calling the respective close routine in
the failure case.

One thing i didn't like with the stack_io was, we will be opening the
device at each stacked I/O manager layer; which i also think is okey
provided we expect to use these I/O managers independently

>
> What I would recommend as the fast and dirty approach. Basically, ape
> the approach used by test_io layer _exactly_, except instead of using
> a global variable test_io_backing_manager, you provide a function
> which sets the static variable, undo_io_backing_manager. This
> variable is used only by the subsequent call to the ->open method,
> which just like test_io simply passes the name down to the backing
> manager specified in the static variable. Then just make the undo_io
> manager work the way test_io does, where does its thing, and then it
> calls the appropriate function in its private->real io_channel.
> Basically, make undo_io responsible for calling the next io_manager
> down in the chain, This is workable because we don't need to
> initialize the tdb file until we first try to write to the io_channel,
> and ext2fs_open() only needs to do read operations, so we can set the
> tdb filename via an optoin after ext2fs_open() returns.
>

One thing i was confused about was the usage of read_blk, write_blk etc
pointers in test_private_data. With respect to undo I/O manager do i
need to provide them ?. If we really need them, then i was thinking a
generic stacking layer as i send in the patches would be better. That
means any pluggable functionality that we achieve right now by setting
test_io_cb_read_blk etc will be implemented as a I/O manager that does
the particular task. Later we stack all these I/O manager to get the
full functionality.

-aneesh

2007-05-21 15:12:38

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/2] Add stack I/O manager.

On Mon, May 21, 2007 at 03:57:26PM +0530, Aneesh Kumar K.V wrote:
> Can't we address the FIXME by calling the respective close routine in
> the failure case.

You can but it doesn't address the biggest problem, which is if you're
going to add all of this extra complexity, we might as well deal with
the name parsing issue. In your stacking I/O layer, you pass the same
name to all of the stacked modules. That's not necessarily the right
thing to do. It works for test_io because it doesn't need a name
parameter (it just passes the name straight down to the its
lower-layer module). It mostly works for the undo_io layer because
while the tdb pathname could be the passed in open argument, it's
ext2fs_openfs() is openfs, and you don't have to give it its tdb name
until after the ext2fs_openfs() succeeds, and you can pass it down as
an option.

In general, though, you need to worry about what sub-parts of the name
you need to pass down to each of the stacked modules. So the stack_io
layer adds a lot of complexity, but it's not a fully general solution.
So if it's not fully general, maybe something that's simpler is
sufficient, since in truth we probably don't need unlimited levels of
stacking.

> One thing i was confused about was the usage of read_blk, write_blk etc
> pointers in test_private_data. With respect to undo I/O manager do i
> need to provide them ?. If we really need them, then i was thinking a
> generic stacking layer as i send in the patches would be better. That
> means any pluggable functionality that we achieve right now by setting
> test_io_cb_read_blk etc will be implemented as a I/O manager that does
> the particular task. Later we stack all these I/O manager to get the
> full functionality.

The read_blk, write_blk, etc. pointers in test_private_data() are
specific to the test_io layer. That's part of the value-add which the
test_io module provides, and no, the undo I/O manager doesn't need
them. It's just there as part of what the user of the test i/o
manager might want to use, by allowing some arbitrary callback
function to be called for each test i/o. The main place I've used it
is when I want to set effectively a watchpoint on e2fsck, because I'm
trying to figure out which part of e2fsck is reading or writing a
particular block, and I want to dump out the contents of that block
when it reads/writes it. The fastest way to do that is to add a
callback function that tests for the block number, and when it is
triggered, I can have the debugging function print out some or all of
the contents the block. So that's strictly a test i/o manager thing;
the undo i/o manager wouldn't need this at all!

Regards,

- Ted