2011-02-25 12:49:42

by Lukas Czerner

[permalink] [raw]
Subject: [PATCH 1/4] e2image: Add support for qcow2 format

This commit adds support for exporting filesystem into QCOW2 image
format. Like sparse format this saves space, by writing only necessary
(metadata blocks) into image. Unlike sparse image, QCOW2 image is NOT
sparse, hence does not change its size by copying with not-sparse-aware
tools.

New options '-Q' has been added to tell the e2image to use QCOW2 as an
output image format. QCOW2 supports encryption and compression, however
e2image so far does no support such features, however you can still
scramble filenames with '-s' option.

Signed-off-by: Lukas Czerner <[email protected]>
---
lib/ext2fs/Makefile.in | 4 +-
lib/ext2fs/bitops.h | 4 +
lib/ext2fs/e2image.h | 21 +-
lib/ext2fs/qcow2.c | 227 +++++++++++++++
lib/ext2fs/qcow2.h | 94 +++++++
misc/e2image.8.in | 47 +++-
misc/e2image.c | 723 ++++++++++++++++++++++++++++++++++++++++++++----
7 files changed, 1048 insertions(+), 72 deletions(-)
create mode 100644 lib/ext2fs/qcow2.c
create mode 100644 lib/ext2fs/qcow2.h

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index 763694d..72a5331 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -70,6 +70,7 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
openfs.o \
progress.o \
punch.o \
+ qcow2.o \
read_bb.o \
read_bb_file.o \
res_gdt.o \
@@ -138,6 +139,7 @@ SRCS= ext2_err.c \
$(srcdir)/openfs.c \
$(srcdir)/progress.c \
$(srcdir)/punch.c \
+ $(srcdir)/qcow2.c \
$(srcdir)/read_bb.c \
$(srcdir)/read_bb_file.c \
$(srcdir)/res_gdt.c \
@@ -158,7 +160,7 @@ SRCS= ext2_err.c \
$(srcdir)/write_bb_file.c

HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h \
- tdb.h
+ tdb.h qcow2.h
HFILES_IN= ext2_err.h ext2_types.h

LIBRARY= libext2fs
diff --git a/lib/ext2fs/bitops.h b/lib/ext2fs/bitops.h
index 3ded002..83a01e4 100644
--- a/lib/ext2fs/bitops.h
+++ b/lib/ext2fs/bitops.h
@@ -31,6 +31,8 @@ extern __u64 ext2fs_swab64(__u64 val);
#define ext2fs_le32_to_cpu(x) ext2fs_swab32((x))
#define ext2fs_cpu_to_le16(x) ext2fs_swab16((x))
#define ext2fs_le16_to_cpu(x) ext2fs_swab16((x))
+#define ext2fs_cpu_to_be64(x) ((__u64)(x))
+#define ext2fs_be64_to_cpu(x) ((__u64)(x))
#define ext2fs_cpu_to_be32(x) ((__u32)(x))
#define ext2fs_be32_to_cpu(x) ((__u32)(x))
#define ext2fs_cpu_to_be16(x) ((__u16)(x))
@@ -42,6 +44,8 @@ extern __u64 ext2fs_swab64(__u64 val);
#define ext2fs_le32_to_cpu(x) ((__u32)(x))
#define ext2fs_cpu_to_le16(x) ((__u16)(x))
#define ext2fs_le16_to_cpu(x) ((__u16)(x))
+#define ext2fs_cpu_to_be64(x) ext2fs_swab64((x))
+#define ext2fs_be64_to_cpu(x) ext2fs_swab64((x))
#define ext2fs_cpu_to_be32(x) ext2fs_swab32((x))
#define ext2fs_be32_to_cpu(x) ext2fs_swab32((x))
#define ext2fs_cpu_to_be16(x) ext2fs_swab16((x))
diff --git a/lib/ext2fs/e2image.h b/lib/ext2fs/e2image.h
index 4de2c8d..a47f9e6 100644
--- a/lib/ext2fs/e2image.h
+++ b/lib/ext2fs/e2image.h
@@ -12,6 +12,14 @@
* %End-Header%
*/

+/* Image types */
+#define IMAGE_RAW 1
+#define IMAGE_QCOW2 2
+
+/* Image flags */
+#define INSTALL_FLAG 1
+#define SCRAMBLE_FLAG 2
+#define IS_QCOW2_FLAG 3

struct ext2_image_hdr {
__u32 magic_number; /* This must be EXT2_ET_MAGIC_E2IMAGE */
@@ -36,16 +44,3 @@ struct ext2_image_hdr {
__u32 offset_blockmap; /* Byte offset of the inode bitmaps */
__u32 offset_reserved[8];
};
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lib/ext2fs/qcow2.c b/lib/ext2fs/qcow2.c
new file mode 100644
index 0000000..17eab38
--- /dev/null
+++ b/lib/ext2fs/qcow2.c
@@ -0,0 +1,227 @@
+/*
+ * qcow2.c --- Set of qcow2 related functions
+ *
+ * Copyright (C) 2010 Red Hat, Inc., Lukas Czerner <[email protected]>
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <fcntl.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <assert.h>
+
+#include "ext2fs/ext2fs.h"
+#include "qcow2.h"
+
+/* Functions for converting qcow2 image into raw image */
+
+struct ext2_qcow2_hdr *qcow2_read_header(int fd, char *fname)
+{
+ void *buffer = NULL;
+ struct ext2_qcow2_hdr *hdr = NULL;
+ size_t size;
+
+ buffer = malloc(sizeof(struct ext2_qcow2_hdr));
+ if (!buffer)
+ return NULL;
+ memset(buffer, 0, sizeof(struct ext2_qcow2_hdr));
+
+ if (lseek(fd, 0, SEEK_SET) < 0)
+ return NULL;
+
+ size = read(fd, buffer, sizeof(struct ext2_qcow2_hdr));
+ if (size != sizeof(struct ext2_qcow2_hdr)) {
+ free(buffer);
+ return NULL;
+ }
+
+ hdr = (struct ext2_qcow2_hdr *)(buffer);
+
+ if ((ext2fs_be32_to_cpu(hdr->magic) != QCOW_MAGIC) ||
+ (ext2fs_be32_to_cpu(hdr->version) != 2)) {
+ free(hdr);
+ return NULL;
+ }
+
+ return hdr;
+}
+
+static int qcow2_read_l1_table(struct ext2_qcow2_image *img)
+{
+ int fd = img->fd;
+ size_t size, l1_size = img->l1_size * sizeof(__u64);
+ __u64 *table;
+
+ table = calloc(1, l1_size);
+ if (!table)
+ return errno;
+
+ if (lseek(fd, img->l1_offset, SEEK_SET) < 0)
+ return errno;
+
+ size = read(fd, table, l1_size);
+ if (size != l1_size) {
+ free(table);
+ return errno;
+ }
+
+ img->l1_table = table;
+
+ return 0;
+}
+
+static int qcow2_read_l2_table(struct ext2_qcow2_image *img, off_t offset,
+ __u64 **l2_table)
+{
+ int fd = img->fd;
+ size_t size;
+
+ assert(*l2_table);
+
+ if (lseek(fd, offset, SEEK_SET) < 0)
+ return errno;
+
+ size = read(fd, *l2_table, img->cluster_size);
+ if (size != img->cluster_size)
+ return errno;
+
+ return 0;
+}
+
+static int qcow2_copy_data(int fdin, int fdout, off_t off_in, off_t off_out,
+ void *buf, size_t count)
+{
+ size_t size;
+
+ assert(buf);
+
+ if (lseek(fdout, off_out, SEEK_SET) < 0)
+ return errno;
+
+ if (lseek(fdin, off_in, SEEK_SET) < 0)
+ return errno;
+
+ size = read(fdin, buf, count);
+ if (size != count)
+ return errno;
+
+ size = write(fdout, buf, count);
+ if (size != count)
+ return errno;
+
+ return 0;
+}
+
+
+int qcow2_write_raw_image(int qcow2_fd, int raw_fd,
+ struct ext2_qcow2_hdr *hdr)
+{
+ struct ext2_qcow2_image img;
+ int ret = 0;
+ unsigned int l1_index, l2_index;
+ off_t offset;
+ __u64 *l1_table, *l2_table;
+ void *copy_buf = NULL;
+ size_t size;
+
+ img.fd = qcow2_fd;
+ img.hdr = hdr;
+ img.l2_cache = NULL;
+ img.l1_table = NULL;
+ img.cluster_bits = ext2fs_be32_to_cpu(hdr->cluster_bits);
+ img.cluster_size = 1 << img.cluster_bits;
+ img.l1_size = ext2fs_be32_to_cpu(hdr->l1_size);
+ img.l1_offset = ext2fs_be64_to_cpu(hdr->l1_table_offset);
+ img.l2_size = 1 << (img.cluster_bits - 3);
+ img.image_size = ext2fs_be64_to_cpu(hdr->size);
+
+ l2_table = calloc(1, img.cluster_size);
+ if (!l2_table) {
+ ret = errno;
+ goto out;
+ }
+
+ copy_buf = calloc(1, 1 << img.cluster_bits);
+ if (!copy_buf) {
+ ret = errno;
+ goto out;
+ }
+
+ if (lseek(raw_fd, 0, SEEK_SET) < 0) {
+ ret = errno;
+ goto out;
+ }
+
+ ret = qcow2_read_l1_table(&img);
+ if (ret)
+ goto out;
+
+ l1_table = img.l1_table;
+ /* Walk through l1 table */
+ for (l1_index = 0; l1_index < img.l1_size; l1_index++) {
+ off_t off_out;
+
+ offset = ext2fs_be64_to_cpu(l1_table[l1_index]) &
+ ~QCOW_OFLAG_COPIED;
+
+ if ((offset > img.image_size) ||
+ (offset <= 0))
+ continue;
+
+ ret = qcow2_read_l2_table(&img, offset, &l2_table);
+ if (ret)
+ break;
+
+ /* Walk through l2 table and copy data blocks into raw image */
+ for (l2_index = 0; l2_index < img.l2_size; l2_index++) {
+ offset = ext2fs_be64_to_cpu(l2_table[l2_index]) &
+ ~QCOW_OFLAG_COPIED;
+
+ if (offset == 0)
+ continue;
+
+ off_out = (l1_index * img.l2_size) +
+ l2_index;
+ off_out <<= img.cluster_bits;
+ ret = qcow2_copy_data(qcow2_fd, raw_fd, offset,
+ off_out, copy_buf, img.cluster_size);
+ if (ret)
+ goto out;
+ }
+ }
+
+ /* Resize the output image to the filesystem size */
+ if (lseek(raw_fd, img.image_size, SEEK_SET) < 0)
+ return errno;
+
+ size = write(raw_fd, copy_buf, 1);
+ if (size != 1)
+ return errno;
+
+out:
+ if (copy_buf)
+ free(copy_buf);
+ if (img.l1_table)
+ free(img.l1_table);
+ if (l2_table)
+ free(l2_table);
+ return ret;
+}
diff --git a/lib/ext2fs/qcow2.h b/lib/ext2fs/qcow2.h
new file mode 100644
index 0000000..28eaac5
--- /dev/null
+++ b/lib/ext2fs/qcow2.h
@@ -0,0 +1,94 @@
+/*
+ * e2qcow.h ---
+ *
+ * Copyright
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ * %End-Header%
+ */
+
+/* Number of l2 tables in memory before writeback */
+#define L2_CACHE_PREALLOC 512
+
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 2
+#define QCOW_OFLAG_COPIED (1LL << 63)
+
+struct ext2_qcow2_hdr {
+ __u32 magic;
+ __u32 version;
+
+ __u64 backing_file_offset;
+ __u32 backing_file_size;
+
+ __u32 cluster_bits;
+ __u64 size;
+ __u32 crypt_method;
+
+ __u32 l1_size;
+ __u64 l1_table_offset;
+
+ __u64 refcount_table_offset;
+ __u32 refcount_table_clusters;
+
+ __u32 nb_snapshots;
+ __u64 snapshots_offset;
+};
+
+typedef struct ext2_qcow2_l2_table L2_CACHE_HEAD;
+
+struct ext2_qcow2_l2_table {
+ __u32 l1_index;
+ __u64 offset;
+ __u64 *data;
+ L2_CACHE_HEAD *next;
+};
+
+struct ext2_qcow2_l2_cache {
+ L2_CACHE_HEAD *used_head;
+ L2_CACHE_HEAD *used_tail;
+ L2_CACHE_HEAD *free_head;
+ __u32 free;
+ __u32 count;
+ __u64 next_offset;
+};
+
+struct ext2_qcow2_refcount {
+ __u64 *refcount_table;
+ __u64 refcount_table_offset;
+ __u64 refcount_block_offset;
+
+ __u32 refcount_table_clusters;
+ __u32 refcount_table_index;
+ __u32 refcount_block_index;
+
+ __u16 *refcount_block;
+};
+
+struct ext2_qcow2_image {
+ int fd;
+ struct ext2_qcow2_hdr *hdr;
+ struct ext2_qcow2_l2_cache *l2_cache;
+ struct ext2_qcow2_refcount refcount;
+ __u32 cluster_size;
+ __u32 cluster_bits;
+ __u32 l1_size;
+ __u32 l2_size;
+
+ __u64 *l1_table;
+ __u64 l2_offset;
+ __u64 l1_offset;
+ __u64 image_size;
+};
+
+/* Function prototypes */
+
+/* qcow2.c */
+
+/* Functions for converting qcow2 image into raw image */
+struct ext2_qcow2_hdr *qcow2_read_header(int, char *);
+int qcow2_write_raw_image(int, int, struct ext2_qcow2_hdr *);
+
diff --git a/misc/e2image.8.in b/misc/e2image.8.in
index e18a30b..4a28580 100644
--- a/misc/e2image.8.in
+++ b/misc/e2image.8.in
@@ -40,7 +40,8 @@ another program, such as
(Note that this is currently only supported when
creating a raw image file using the
.B \-r
-option, since the process of creating a normal image file currently
+option, since the process of creating a normal image file, or QCOW2
+image currently
requires random access to the file, which cannot be done using a
pipe. This restriction will hopefully be lifted in a future version of
.BR e2image .)
@@ -56,13 +57,14 @@ accessible in the case where the filesystem has been badly damaged.
.PP
To save disk space,
.B e2image
-creates the image file as a sparse file.
-Hence, if the image file
+creates the image file as a sparse file, or in QCOW2 format.
+Hence, if the sparse image file
needs to be copied to another location, it should
either be compressed first or copied using the
.B \-\-sparse=always
option to the GNU version of
-.BR cp .
+.BR cp .
+This does not apply to the QCOW2 image, which is not sparse.
.PP
The size of an ext2 image file depends primarily on the size of the
filesystems and how many inodes are in use. For a typical 10 gigabyte
@@ -129,6 +131,43 @@ the
option will prevent analysis of problems related to hash-tree indexed
directories.
.PP
+.SH QCOW2 IMAGE FILES
+The
+.B \-Q
+option will create a QCOW2 image file instead of a normal, or raw image file.
+A QCOW2 image contains all the information the raw image does, however unlike
+the raw image it is not sparse. The QCOW2 image minimize the amount of disk
+space by storing data in special format with pack data closely together, hence
+avoiding holes while still minimizing size.
+.PP
+In order to send filesystem to the maintainer as a part of bug report to
+e2fsprogs, use following commands (replace hda1 with the appropriate device):
+.PP
+.br
+\ \fBe2image \-Q /dev/hda1 hda1.qcow2\fR
+.br
+\ \fBbzip2 -z hda1.qcow2\fR
+.PP
+This will only send the metadata information, without any data blocks.
+However, the filenames in the directory blocks can still reveal
+information about the contents of the filesystem that the bug reporter
+may wish to keep confidential. To address this concern, the
+.B \-s
+option can be specified. This will cause
+.B e2image
+to scramble directory entries and zero out any unused portions
+of the directory blocks before writing the image file. However,
+the
+.B \-s
+option will prevent analysis of problems related to hash-tree indexed
+directories.
+.PP
+Note that QCOW2 image created by
+.B e2image
+is regular QCOW2 image and can be processed by tools aware of QCOW2 format
+such as for example
+.BR qemu-img .
+.PP
.SH AUTHOR
.B e2image
was written by Theodore Ts'o ([email protected]).
diff --git a/misc/e2image.c b/misc/e2image.c
index 003ac5a..6dc78d3 100644
--- a/misc/e2image.c
+++ b/misc/e2image.c
@@ -33,6 +33,7 @@ extern int optind;
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <assert.h>

#include "ext2fs/ext2_fs.h"
#include "ext2fs/ext2fs.h"
@@ -40,26 +41,96 @@ extern int optind;
#include "uuid/uuid.h"
#include "e2p/e2p.h"
#include "ext2fs/e2image.h"
+#include "ext2fs/qcow2.h"

#include "../version.h"
#include "nls-enable.h"

+#define QCOW_OFLAG_COPIED (1LL << 63)
+
+
const char * program_name = "e2image";
char * device_name = NULL;

+static blk64_t align_offset(blk64_t offset, int n)
+{
+ return (offset + n - 1) & ~(n - 1);
+}
+
+static int get_bits_from_size(size_t size)
+{
+ int res = 0;
+
+ if (size == 0)
+ return -1;
+
+ while (size != 1) {
+ /* Not a power of two */
+ if (size & 1)
+ return -1;
+
+ size >>= 1;
+ res++;
+ }
+ return res;
+}
+
static void usage(void)
{
- fprintf(stderr, _("Usage: %s [-rsI] device image_file\n"),
+ fprintf(stderr, _("Usage: %s [-rsIQ] device image_file\n"),
program_name);
exit (1);
}

-static void write_header(int fd, struct ext2_image_hdr *hdr, int blocksize)
+static void generic_write(int fd, char *buf, int blocksize, blk64_t block)
+{
+ int count, free_buf = 0;
+ errcode_t err;
+ blk64_t offset;
+
+ if (!blocksize)
+ return;
+
+ if (!buf) {
+ free_buf = 1;
+ buf = calloc(1, blocksize);
+ if (!buf) {
+ com_err(program_name, ENOMEM, "while allocating buffer");
+ exit(1);
+ }
+ }
+
+ count = write(fd, buf, blocksize);
+ if (count != blocksize) {
+ if (count == -1)
+ err = errno;
+ else
+ err = 0;
+
+ if (block)
+ com_err(program_name, err, "error writing block %llu",
+ block);
+ else
+ com_err(program_name, err, "error in write()");
+
+ exit(1);
+ }
+ if (free_buf)
+ free(buf);
+}
+
+static void write_header(int fd, void *hdr, int hdr_size, int wrt_size)
{
char *header_buf;
int actual;

- header_buf = malloc(blocksize);
+ /* Sanity check */
+ if (hdr_size > wrt_size) {
+ fprintf(stderr, _("Error: header size is bigger than "
+ "wrt_size\n"));
+ }
+
+ header_buf = malloc(wrt_size);
if (!header_buf) {
fputs(_("Couldn't allocate header buffer\n"), stderr);
exit(1);
@@ -69,21 +140,13 @@ static void write_header(int fd, struct ext2_image_hdr *hdr, int blocksize)
perror("lseek while writing header");
exit(1);
}
- memset(header_buf, 0, blocksize);
+ memset(header_buf, 0, wrt_size);

if (hdr)
- memcpy(header_buf, hdr, sizeof(struct ext2_image_hdr));
+ memcpy(header_buf, hdr, hdr_size);
+
+ generic_write(fd, header_buf, wrt_size, 0);

- actual = write(fd, header_buf, blocksize);
- if (actual < 0) {
- perror("write header");
- exit(1);
- }
- if (actual != blocksize) {
- fprintf(stderr, _("short write (only %d bytes) for "
- "writing image header"), actual);
- exit(1);
- }
free(header_buf);
}

@@ -93,7 +156,7 @@ static void write_image_file(ext2_filsys fs, int fd)
struct stat st;
errcode_t retval;

- write_header(fd, NULL, fs->blocksize);
+ write_header(fd, NULL, fs->blocksize, fs->blocksize);
memset(&hdr, 0, sizeof(struct ext2_image_hdr));

hdr.offset_super = lseek(fd, 0, SEEK_CUR);
@@ -142,7 +205,7 @@ static void write_image_file(ext2_filsys fs, int fd)
memcpy(hdr.fs_uuid, fs->super->s_uuid, sizeof(hdr.fs_uuid));

hdr.image_time = time(0);
- write_header(fd, &hdr, fs->blocksize);
+ write_header(fd, &hdr, fs->blocksize, fs->blocksize);
}

/*
@@ -150,6 +213,7 @@ static void write_image_file(ext2_filsys fs, int fd)
*/
ext2fs_block_bitmap meta_block_map;
ext2fs_block_bitmap scramble_block_map; /* Directory blocks to be scrambled */
+blk64_t meta_blocks_count;

struct process_block_struct {
ext2_ino_t ino;
@@ -226,6 +290,7 @@ static int process_dir_block(ext2_filsys fs EXT2FS_ATTR((unused)),
p = (struct process_block_struct *) priv_data;

ext2fs_mark_block_bitmap2(meta_block_map, *block_nr);
+ meta_blocks_count++;
if (scramble_block_map && p->is_dir && blockcnt >= 0)
ext2fs_mark_block_bitmap2(scramble_block_map, *block_nr);
return 0;
@@ -240,6 +305,7 @@ static int process_file_block(ext2_filsys fs EXT2FS_ATTR((unused)),
{
if (blockcnt < 0) {
ext2fs_mark_block_bitmap2(meta_block_map, *block_nr);
+ meta_blocks_count++;
}
return 0;
}
@@ -254,6 +320,7 @@ static void mark_table_blocks(ext2_filsys fs)
* Mark primary superblock
*/
ext2fs_mark_block_bitmap2(meta_block_map, first_block);
+ meta_blocks_count++;

/*
* Mark the primary superblock descriptors
@@ -262,6 +329,7 @@ static void mark_table_blocks(ext2_filsys fs)
ext2fs_mark_block_bitmap2(meta_block_map,
ext2fs_descriptor_block_loc2(fs, first_block, j));
}
+ meta_blocks_count += fs->desc_blocks;

for (i = 0; i < fs->group_desc_count; i++) {
/*
@@ -272,6 +340,7 @@ static void mark_table_blocks(ext2_filsys fs)
j < (unsigned) fs->inode_blocks_per_group;
j++, b++)
ext2fs_mark_block_bitmap2(meta_block_map, b);
+ meta_blocks_count += fs->inode_blocks_per_group;
}

/*
@@ -280,6 +349,7 @@ static void mark_table_blocks(ext2_filsys fs)
if (ext2fs_block_bitmap_loc(fs, i)) {
ext2fs_mark_block_bitmap2(meta_block_map,
ext2fs_block_bitmap_loc(fs, i));
+ meta_blocks_count++;
}

/*
@@ -288,6 +358,7 @@ static void mark_table_blocks(ext2_filsys fs)
if (ext2fs_inode_bitmap_loc(fs, i)) {
ext2fs_mark_block_bitmap2(meta_block_map,
ext2fs_inode_bitmap_loc(fs, i));
+ meta_blocks_count++;
}
}
}
@@ -311,30 +382,20 @@ static int check_zero_block(char *buf, int blocksize)
static void write_block(int fd, char *buf, int sparse_offset,
int blocksize, blk64_t block)
{
- int count;
- errcode_t err;
+ off_t ret = 0;

if (sparse_offset) {
#ifdef HAVE_LSEEK64
- if (lseek64(fd, sparse_offset, SEEK_CUR) < 0)
- perror("lseek");
+ ret = lseek64(fd, sparse_offset, SEEK_CUR);
#else
- if (lseek(fd, sparse_offset, SEEK_CUR) < 0)
- perror("lseek");
+ ret = lseek(fd, sparse_offset, SEEK_CUR);
#endif
}
- if (blocksize) {
- count = write(fd, buf, blocksize);
- if (count != blocksize) {
- if (count == -1)
- err = errno;
- else
- err = 0;
- com_err(program_name, err, "error writing block %llu",
- block);
- exit(1);
- }
+ if (ret < 0) {
+ strerror(errno);
+ exit(1);
}
+ generic_write(fd, buf, blocksize, block);
}

int name_id[256];
@@ -445,6 +506,7 @@ static void output_meta_data_blocks(ext2_filsys fs, int fd)
}
sparse += fs->blocksize;
if (sparse >= 1024*1024) {
+
write_block(fd, 0, sparse, 0, 0);
sparse = 0;
}
@@ -456,7 +518,538 @@ static void output_meta_data_blocks(ext2_filsys fs, int fd)
free(buf);
}

-static void write_raw_image_file(ext2_filsys fs, int fd, int scramble_flag)
+static void init_l1_table(struct ext2_super_block *sb, struct ext2_qcow2_image *image)
+{
+ blk64_t entries, sector_count, total_size;
+ int cluster_size, shift, l2_size, ret, header_size;
+ int i;
+ __u64 *l1_table, addr;
+
+ l1_table = calloc(image->l1_size, sizeof(__u64));
+ if (!l1_table) {
+ com_err(program_name, ENOMEM, "while allocating l1 table");
+ exit(1);
+ }
+
+ image->l1_table = l1_table;
+}
+
+static void init_l2_cache(struct ext2_qcow2_image *image)
+{
+ unsigned int count, i;
+ struct ext2_qcow2_l2_cache *cache;
+ struct ext2_qcow2_l2_table *table;
+
+ cache = calloc(1, sizeof(struct ext2_qcow2_l2_cache));
+ if (!cache)
+ goto alloc_err;
+
+ count = (image->l1_size > L2_CACHE_PREALLOC) ? L2_CACHE_PREALLOC :
+ image->l1_size;
+
+ cache->count = count;
+ cache->free = count;
+ cache->next_offset = image->l2_offset;
+
+ for (i = 0; i < count; i++) {
+ table = calloc(1, sizeof(struct ext2_qcow2_l2_table));
+ if (!table)
+ goto alloc_err;
+
+ table->data = calloc(image->l2_size, sizeof(__u64));
+ if (!table->data)
+ goto alloc_err;
+
+ table->next = cache->free_head;
+ cache->free_head = table;
+ }
+
+ image->l2_cache = cache;
+ return;
+
+alloc_err:
+ com_err(program_name, ENOMEM, "while allocating l2 cache");
+ exit(1);
+}
+
+static void put_l2_cache(struct ext2_qcow2_image *image)
+{
+ struct ext2_qcow2_l2_cache *cache = image->l2_cache;
+ struct ext2_qcow2_l2_table *tmp, *table;
+
+ if (!cache)
+ return;
+
+ table = cache->free_head;
+ cache->free_head = NULL;
+again:
+ while (table) {
+ tmp = table;
+ table = table->next;
+ free(tmp->data);
+ free(tmp);
+ }
+
+ if (cache->free != cache->count) {
+ fprintf(stderr, "Warning: There are still tables in the "
+ "cache while putting the cache, data will "
+ "be lost so the image may not be valid.\n");
+ table = cache->used_head;
+ cache->used_head = NULL;
+ goto again;
+ }
+
+ free(cache);
+}
+
+static int init_refcount(struct ext2_qcow2_image *img, blk64_t table_offset)
+{
+ struct ext2_qcow2_refcount *ref;
+ blk64_t table_clusters;
+
+ ref = &(img->refcount);
+
+ /*
+ * One refcount block addresses 2048 clusters, one refcount table
+ * addresses cluster/sizeof(__u64) refcount blocks, and we need
+ * to address meta_blocks_count clusters + qcow2 metadata clusters
+ * in the worst case.
+ */
+ table_clusters = meta_blocks_count + (table_offset >> img->cluster_bits);
+ table_clusters >>= (img->cluster_bits + 6 - 1);
+ table_clusters = (table_clusters == 0) ? 1 : table_clusters;
+
+ ref->refcount_table_offset = table_offset;
+ ref->refcount_table_clusters = table_clusters;
+ ref->refcount_table_index = 0;
+ ref->refcount_block_index = 0;
+
+ /* Allocate refcount table */
+ ref->refcount_table = calloc(ref->refcount_table_clusters,
+ img->cluster_size);
+ if (!ref->refcount_table)
+ return -ENOMEM;
+
+ /* Allocate refcount block */
+ ref->refcount_block = calloc(1, img->cluster_size);
+ if (!ref->refcount_block)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int initialize_qcow2_image(int fd, ext2_filsys fs,
+ struct ext2_qcow2_image *image)
+{
+ struct ext2_qcow2_hdr *header;
+ blk64_t total_size, offset;
+ int shift, l2_bits, header_size, l1_size, ret;
+ int cluster_bits = get_bits_from_size(fs->blocksize);
+ struct ext2_super_block *sb = fs->super;
+
+ /* Allocate header */
+ header = malloc(sizeof(struct ext2_qcow2_hdr));
+ if (!header)
+ return errno;
+ memset(header, 0, sizeof(struct ext2_qcow2_hdr));
+
+ total_size = ext2fs_blocks_count(sb) << cluster_bits;
+ image->cluster_size = 1 << cluster_bits;
+ image->l2_size = 1 << (cluster_bits - 3);
+ image->cluster_bits = cluster_bits;
+ image->fd = fd;
+
+ header->magic = ext2fs_cpu_to_be32(QCOW_MAGIC);
+ header->version = ext2fs_cpu_to_be32(QCOW_VERSION);
+ header->size = ext2fs_cpu_to_be64(total_size);
+ header->cluster_bits = ext2fs_cpu_to_be32(cluster_bits);
+
+ header_size = (sizeof(struct ext2_qcow2_hdr) + 7) & ~7;
+ offset = align_offset(header_size, image->cluster_size);
+
+ header->l1_table_offset = ext2fs_cpu_to_be64(offset);
+ image->l1_offset = offset;
+
+ l2_bits = cluster_bits - 3;
+ shift = cluster_bits + l2_bits;
+ l1_size = ((total_size + (1LL << shift) - 1) >> shift);
+ header->l1_size = ext2fs_cpu_to_be32(l1_size);
+ image->l1_size = l1_size;
+
+ /* Make space for L1 table */
+ offset += align_offset(l1_size * sizeof(blk64_t), image->cluster_size);
+
+ /* Initialize refcounting */
+ ret = init_refcount(image, offset);
+ if (ret)
+ return ret;
+ header->refcount_table_offset = ext2fs_cpu_to_be64(offset);
+ header->refcount_table_clusters =
+ ext2fs_cpu_to_be32(image->refcount.refcount_table_clusters);
+ offset += image->cluster_size;
+ offset += image->refcount.refcount_table_clusters << image->cluster_bits;
+
+ /* Make space for L2 tables */
+ image->l2_offset = offset;
+ offset += image->cluster_size;
+
+ /* Make space for first refcount block */
+ image->refcount.refcount_block_offset = offset;
+
+ image->hdr = header;
+ /* Initialize l1 and l2 tables */
+ init_l1_table(sb, image);
+ init_l2_cache(image);
+
+ return 0;
+}
+
+static void free_qcow2_image(struct ext2_qcow2_image *img)
+{
+ unsigned int i;
+
+ if (!img)
+ return;
+
+ if (img->hdr)
+ free(img->hdr);
+
+ if (img->l1_table)
+ free(img->l1_table);
+
+ if (img->refcount.refcount_table)
+ free(img->refcount.refcount_table);
+ if (img->refcount.refcount_block)
+ free(img->refcount.refcount_block);
+
+ put_l2_cache(img);
+
+ free(img);
+}
+
+/**
+ * Put table from used list (used_head) into free list (free_head).
+ * l2_table is used to return pointer to the next used table (used_head).
+ */
+static void put_used_table(struct ext2_qcow2_image *img,
+ struct ext2_qcow2_l2_table **l2_table)
+{
+ struct ext2_qcow2_l2_cache *cache = img->l2_cache;
+ struct ext2_qcow2_l2_table *table;
+
+ table = cache->used_head;
+ cache->used_head = table->next;
+
+ assert(table);
+ if (!table->next)
+ cache->used_tail = NULL;
+
+ /* Clean the table for case we will need to use it again */
+ memset(table->data, 0, img->cluster_size);
+ table->next = cache->free_head;
+ cache->free_head = table;
+
+ cache->free++;
+
+ *l2_table = cache->used_head;
+}
+
+static void flush_l2_cache(struct ext2_qcow2_image *image)
+{
+ blk64_t offset, seek = 0;
+ struct ext2_qcow2_l2_cache *cache = image->l2_cache;
+ struct ext2_qcow2_l2_table *table = cache->used_head;
+ int fd = image->fd;
+
+ /* Store current position */
+ if ((offset = lseek(fd, 0, SEEK_CUR)) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+
+ while (cache->free < cache->count) {
+ assert(table);
+
+ if (seek != table->offset) {
+ if (lseek(fd, table->offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ seek = table->offset;
+ }
+
+ generic_write(fd, (char *)table->data, image->cluster_size , 0);
+ put_used_table(image, &table);
+ seek += image->cluster_size;
+ }
+
+ /* Restore previous position */
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+}
+
+/**
+ * Get first free table (from free_head) and put it into tail of used list
+ * (to used_tail).
+ * l2_table is used to return pointer to moved table.
+ * Returns 1 if the cache is full, 0 otherwise.
+ */
+static void get_free_table(struct ext2_qcow2_image *image,
+ struct ext2_qcow2_l2_table **l2_table)
+{
+ struct ext2_qcow2_l2_table *table;
+ struct ext2_qcow2_l2_cache *cache = image->l2_cache;
+
+ if (0 == cache->free)
+ flush_l2_cache(image);
+
+ table = cache->free_head;
+ assert(table);
+ cache->free_head = table->next;
+
+ if (cache->used_tail)
+ cache->used_tail->next = table;
+ else
+ /* First item in the used list */
+ cache->used_head = table;
+
+ cache->used_tail = table;
+ cache->free--;
+
+ *l2_table = table;
+}
+
+static int add_l2_item(struct ext2_qcow2_image *img, blk64_t blk,
+ blk64_t data, blk64_t next)
+{
+ struct ext2_qcow2_l2_cache *cache = img->l2_cache;
+ struct ext2_qcow2_l2_table *table = cache->used_tail;
+ blk64_t l1_index = blk / img->l2_size;
+ blk64_t l2_index = blk & (img->l2_size - 1);
+ int ret = 0;
+
+ /*
+ * Need to create new table if it does not exist,
+ * or if it is full
+ * */
+ if (!table || (table->l1_index != l1_index)) {
+ get_free_table(img, &table);
+ table->l1_index = l1_index;
+ table->offset = cache->next_offset;
+ cache->next_offset = next;
+ img->l1_table[l1_index] =
+ ext2fs_cpu_to_be64(table->offset | QCOW_OFLAG_COPIED);
+ ret++;
+ }
+
+ table->data[l2_index] = ext2fs_cpu_to_be64(data | QCOW_OFLAG_COPIED);
+ return ret;
+}
+
+static int update_refcount(int fd, struct ext2_qcow2_image *img,
+ blk64_t offset, blk64_t rfblk_pos)
+{
+ struct ext2_qcow2_refcount *ref;
+ __u32 table_index;
+ int ret = 0;
+
+ ref = &(img->refcount);
+ table_index = offset >> (2 * img->cluster_bits - 1);
+
+ /*
+ * Need to create new refcount block when the offset addresses
+ * another item in the refcount table
+ */
+ if (table_index != ref->refcount_table_index) {
+
+ if (lseek(fd, ref->refcount_block_offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+
+ generic_write(fd, (char *)ref->refcount_block,
+ img->cluster_size, 0);
+ memset((char *)ref->refcount_block, 0, img->cluster_size);
+
+ ref->refcount_table[ref->refcount_table_index] =
+ ext2fs_cpu_to_be64(ref->refcount_block_offset);
+ ref->refcount_block_offset = rfblk_pos;
+ ref->refcount_block_index = 0;
+ ref->refcount_table_index = table_index;
+ ret++;
+ }
+
+ /*
+ * We are relying on the fact that we are creating the qcow2
+ * image sequentially, hence we will always allocate refcount
+ * block items sequentialy.
+ */
+ ref->refcount_block[ref->refcount_block_index] = ext2fs_cpu_to_be16(1);
+ ref->refcount_block_index++;
+ return ret;
+}
+
+static int sync_refcount(int fd, struct ext2_qcow2_image *img)
+{
+ struct ext2_qcow2_refcount *ref;
+
+ ref = &(img->refcount);
+
+ ref->refcount_table[ref->refcount_table_index] =
+ ext2fs_cpu_to_be64(ref->refcount_block_offset);
+ if (lseek(fd, ref->refcount_table_offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ generic_write(fd, (char *)ref->refcount_table,
+ ref->refcount_table_clusters << img->cluster_bits, 0);
+
+ if (lseek(fd, ref->refcount_block_offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ generic_write(fd, (char *)ref->refcount_block, img->cluster_size, 0);
+ return 0;
+}
+
+static void output_qcow2_meta_data_blocks(ext2_filsys fs, int fd)
+{
+ errcode_t retval;
+ blk64_t blk, datablk, offset, size, actual, end;
+ char *buf;
+ int sparse = 0;
+ struct ext2_qcow2_image *img;
+ unsigned int header_size, i;
+ blk64_t l1_index, l2_offset, l2_index;
+ char *buffer;
+ __u64 *l2_table;
+
+ /* allocate struct ext2_qcow2_image */
+ img = malloc(sizeof(struct ext2_qcow2_image));
+ if (!img) {
+ com_err(program_name, ENOMEM, "while allocating "
+ "ext2_qcow2_image");
+ exit(1);
+ }
+
+ retval = initialize_qcow2_image(fd, fs, img);
+ if (retval) {
+ com_err(program_name, retval, "while allocating initializing "
+ "ext2_qcow2_image");
+ exit(1);
+ }
+ header_size = align_offset(sizeof(struct ext2_qcow2_hdr),
+ img->cluster_size);
+ write_header(fd, img->hdr, sizeof(struct ext2_qcow2_hdr), header_size);
+
+ /* Refcount all qcow2 related metadata up to refcount_block_offset */
+ end = img->refcount.refcount_block_offset;
+ if (lseek(fd, end, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ blk = end + img->cluster_size;
+ for (offset = 0; offset <= end; offset += img->cluster_size) {
+ if (update_refcount(fd, img, offset, blk)) {
+ blk += img->cluster_size;
+ /*
+ * If we create new refcount block, we need to refcount
+ * it as well.
+ */
+ end += img->cluster_size;
+ }
+ }
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+
+ buf = malloc(fs->blocksize);
+ if (!buf) {
+ com_err(program_name, errno, "while allocating buffer");
+ exit(1);
+ }
+ /* Write qcow2 data blocks */
+ for (blk = 0; blk < ext2fs_blocks_count(fs->super); blk++) {
+ if ((blk >= fs->super->s_first_data_block) &&
+ ext2fs_test_block_bitmap2(meta_block_map, blk)) {
+ retval = io_channel_read_blk64(fs->io, blk, 1, buf);
+ if (retval) {
+ com_err(program_name, retval,
+ "error reading block %llu", blk);
+ }
+ if (scramble_block_map &&
+ ext2fs_test_block_bitmap2(scramble_block_map, blk))
+ scramble_dir_block(fs, blk, buf);
+ if (check_zero_block(buf, fs->blocksize))
+ continue;
+
+ if (update_refcount(fd, img, offset, offset)) {
+ /* Make space for another refcount block */
+ offset += img->cluster_size;
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ /*
+ * We have created the new refcount block, this
+ * means that we need to refcount it as well.So
+ * the prefious update_refcount refcounted the
+ * block itself and now we are going to create
+ * refcount for data. New refcount block should
+ * not be created!
+ */
+ if (update_refcount(fd, img, offset, offset)) {
+ fprintf(stderr, "Programming error\n");
+ exit(1);
+ }
+ }
+
+ generic_write(fd, buf, fs->blocksize, 0);
+
+ if (add_l2_item(img, blk, offset,
+ offset + img->cluster_size)) {
+ offset += img->cluster_size;
+ if (update_refcount(fd, img, offset,
+ offset + img->cluster_size)) {
+ offset += img->cluster_size;
+ if (update_refcount(fd, img, offset,
+ offset)) {
+ fprintf(stderr, "Programming"
+ "error\n");
+ exit(1);
+ }
+ }
+ offset += img->cluster_size;
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ continue;
+ }
+
+ offset += img->cluster_size;
+ }
+ }
+ update_refcount(fd, img, offset, offset);
+ flush_l2_cache(img);
+ sync_refcount(fd, img);
+
+ /* Write l1_table*/
+ if (lseek(fd, img->l1_offset, SEEK_SET) < 0) {
+ strerror(errno);
+ exit(1);
+ }
+ size = img->l1_size * sizeof(__u64);
+ generic_write(fd, (char *)img->l1_table, size, 0);
+
+ free(buf);
+ free_qcow2_image(img);
+}
+
+static void write_raw_image_file(ext2_filsys fs, int fd, int type, int flags)
{
struct process_block_struct pb;
struct ext2_inode inode;
@@ -465,6 +1058,7 @@ static void write_raw_image_file(ext2_filsys fs, int fd, int scramble_flag)
errcode_t retval;
char * block_buf;

+ meta_blocks_count = 0;
retval = ext2fs_allocate_block_bitmap(fs, "in-use block map",
&meta_block_map);
if (retval) {
@@ -472,7 +1066,7 @@ static void write_raw_image_file(ext2_filsys fs, int fd, int scramble_flag)
exit(1);
}

- if (scramble_flag) {
+ if (flags & SCRAMBLE_FLAG) {
retval = ext2fs_allocate_block_bitmap(fs, "scramble block map",
&scramble_block_map);
if (retval) {
@@ -514,6 +1108,7 @@ static void write_raw_image_file(ext2_filsys fs, int fd, int scramble_flag)
if (ext2fs_file_acl_block(&inode)) {
ext2fs_mark_block_bitmap2(meta_block_map,
ext2fs_file_acl_block(&inode));
+ meta_blocks_count++;
}
if (!ext2fs_inode_has_valid_blocks(&inode))
continue;
@@ -551,21 +1146,31 @@ static void write_raw_image_file(ext2_filsys fs, int fd, int scramble_flag)
}
}
use_inode_shortcuts(fs, 0);
- output_meta_data_blocks(fs, fd);
+
+ if (type & IMAGE_QCOW2)
+ output_qcow2_meta_data_blocks(fs, fd);
+ else
+ output_meta_data_blocks(fs, fd);
+
free(block_buf);
+ ext2fs_close_inode_scan(scan);
+ ext2fs_free_block_bitmap(meta_block_map);
+ if (type & SCRAMBLE_FLAG)
+ ext2fs_free_block_bitmap(scramble_block_map);
}

-static void install_image(char *device, char *image_fn, int raw_flag)
+static void install_image(char *device, char *image_fn, int type)
{
errcode_t retval;
ext2_filsys fs;
int open_flag = EXT2_FLAG_IMAGE_FILE;
int fd = 0;
io_manager io_ptr;
- io_channel io, image_io;
+ io_channel io;

- if (raw_flag) {
- com_err(program_name, 0, "Raw images cannot be installed");
+ if (type) {
+ com_err(program_name, 0, "Raw and qcow2 images cannot"
+ "be installed");
exit(1);
}

@@ -607,8 +1212,6 @@ static void install_image(char *device, char *image_fn, int raw_flag)
exit(1);
}

- image_io = fs->io;
-
ext2fs_rewrite_to_io(fs, io);

if (lseek(fd, fs->image_header->offset_inode, SEEK_SET) < 0) {
@@ -633,9 +1236,8 @@ int main (int argc, char ** argv)
ext2_filsys fs;
char *image_fn;
int open_flag = EXT2_FLAG_64BITS;
- int raw_flag = 0;
- int install_flag = 0;
- int scramble_flag = 0;
+ int img_type = 0;
+ int flags = 0;
int fd = 0;

#ifdef ENABLE_NLS
@@ -649,16 +1251,23 @@ int main (int argc, char ** argv)
if (argc && *argv)
program_name = *argv;
add_error_table(&et_ext2_error_table);
- while ((c = getopt (argc, argv, "rsI")) != EOF)
+ while ((c = getopt(argc, argv, "rsIQ")) != EOF)
switch (c) {
case 'r':
- raw_flag++;
+ if (img_type)
+ usage();
+ img_type |= IMAGE_RAW;
break;
case 's':
- scramble_flag++;
+ flags |= SCRAMBLE_FLAG;
break;
case 'I':
- install_flag++;
+ flags |= INSTALL_FLAG;
+ break;
+ case 'Q':
+ if (img_type)
+ usage();
+ img_type |= IMAGE_QCOW2;
break;
default:
usage();
@@ -668,8 +1277,8 @@ int main (int argc, char ** argv)
device_name = argv[optind];
image_fn = argv[optind+1];

- if (install_flag) {
- install_image(device_name, image_fn, raw_flag);
+ if (flags & INSTALL_FLAG) {
+ install_image(device_name, image_fn, img_type);
exit (0);
}

@@ -697,8 +1306,14 @@ int main (int argc, char ** argv)
}
}

- if (raw_flag)
- write_raw_image_file(fs, fd, scramble_flag);
+ if ((img_type & IMAGE_QCOW2) && (fd == 1)) {
+ com_err(program_name, 0, "QCOW2 image can not be written to "
+ "the stdout!\n");
+ exit(1);
+ }
+
+ if (img_type)
+ write_raw_image_file(fs, fd, img_type, flags);
else
write_image_file(fs, fd);

--
1.7.4



2011-02-25 12:49:44

by Lukas Czerner

[permalink] [raw]
Subject: [PATCH 2/4] e2image: Support for conversion QCOW2 image into raw

This commit adds support for converting QCOW2 image created previously
with e2image into raw image. The QCOW2 image is detected automatically,
so there is not new option. Just use following command:

e2image -r image.qcow image.raw

No that this tool is aimed to quickly convert qcow2 image created with
e2image into raw image. In order to improve speed we are doing some
assumption I believe might not be true for regular qcow2 images. So it
was not tested with regular QCOW2 images and it might not work with
them. The intention of this tool is only convert images previously
created by e2image.

Note that there is nothing special with QCOW2 images created by e2images
and it can be used with tools like qemu-img, or qemu-nbd without any
problems.

Signed-off-by: Lukas Czerner <[email protected]>
---
lib/ext2fs/qcow2.c | 3 ++-
misc/e2image.8.in | 4 ++++
misc/e2image.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/lib/ext2fs/qcow2.c b/lib/ext2fs/qcow2.c
index 17eab38..9ac050b 100644
--- a/lib/ext2fs/qcow2.c
+++ b/lib/ext2fs/qcow2.c
@@ -209,9 +209,10 @@ int qcow2_write_raw_image(int qcow2_fd, int raw_fd,
}

/* Resize the output image to the filesystem size */
- if (lseek(raw_fd, img.image_size, SEEK_SET) < 0)
+ if (lseek(raw_fd, img.image_size - 1, SEEK_SET) < 0)
return errno;

+ memset(copy_buf, 0, 1);
size = write(raw_fd, copy_buf, 1);
if (size != 1)
return errno;
diff --git a/misc/e2image.8.in b/misc/e2image.8.in
index 4a28580..6f31dd1 100644
--- a/misc/e2image.8.in
+++ b/misc/e2image.8.in
@@ -131,6 +131,10 @@ the
option will prevent analysis of problems related to hash-tree indexed
directories.
.PP
+Note that this will work even if you substitute "/dev/hda1" for another raw
+disk image, or QCOW2 image previously created by
+.BR e2image .
+.PP
.SH QCOW2 IMAGE FILES
The
.B \-Q
diff --git a/misc/e2image.c b/misc/e2image.c
index 6dc78d3..d749981 100644
--- a/misc/e2image.c
+++ b/misc/e2image.c
@@ -1229,16 +1229,33 @@ static void install_image(char *device, char *image_fn, int type)
exit (0);
}

+static struct ext2_qcow2_hdr *check_qcow2_image(int *fd, char *name)
+{
+
+#ifdef HAVE_OPEN64
+ *fd = open64(name, O_RDONLY, 0600);
+#else
+ *fd = open(name, O_RDONLY, 0600);
+#endif
+ if (*fd < 0)
+ return NULL;
+
+ return qcow2_read_header(*fd, name);
+}
+
int main (int argc, char ** argv)
{
int c;
errcode_t retval;
ext2_filsys fs;
char *image_fn;
+ struct ext2_qcow2_hdr *header = NULL;
int open_flag = EXT2_FLAG_64BITS;
int img_type = 0;
int flags = 0;
+ int qcow2_fd = 0;
int fd = 0;
+ int ret = 0;

#ifdef ENABLE_NLS
setlocale(LC_MESSAGES, "");
@@ -1282,6 +1299,14 @@ int main (int argc, char ** argv)
exit (0);
}

+ if (img_type & IMAGE_RAW) {
+ header = check_qcow2_image(&qcow2_fd, device_name);
+ if (header) {
+ flags |= IS_QCOW2_FLAG;
+ goto skip_device;
+ }
+ }
+
retval = ext2fs_open (device_name, open_flag, 0, 0,
unix_io_manager, &fs);
if (retval) {
@@ -1291,6 +1316,7 @@ int main (int argc, char ** argv)
exit(1);
}

+skip_device:
if (strcmp(image_fn, "-") == 0)
fd = 1;
else {
@@ -1306,6 +1332,16 @@ int main (int argc, char ** argv)
}
}

+ if (flags & IS_QCOW2_FLAG) {
+ ret = qcow2_write_raw_image(qcow2_fd, fd, header);
+ if (ret)
+ com_err(program_name, errno,
+ _("while trying to convert qcow2 image"
+ " (%s) into raw image (%s)"),
+ device_name, image_fn);
+ goto out;
+ }
+
if ((img_type & IMAGE_QCOW2) && (fd == 1)) {
com_err(program_name, 0, "QCOW2 image can not be written to "
"the stdout!\n");
@@ -1318,6 +1354,11 @@ int main (int argc, char ** argv)
write_image_file(fs, fd);

ext2fs_close (fs);
+out:
+ if (header)
+ free(header);
+ if (qcow2_fd)
+ close(qcow2_fd);
remove_error_table(&et_ext2_error_table);
- exit (0);
+ return ret;
}
--
1.7.4


2011-02-25 12:49:58

by Lukas Czerner

[permalink] [raw]
Subject: [PATCH 4/4] e2fsck: Add QCOW2 support

This commit adds QCOW2 support for e2fsck. In order to avoid creating
real QCOW2 image support, which would require creating a lot of code, we
simply bypass the problem by converting the QCOW2 image into raw image
and than let e2fsck work with raw image. Conversion itself can be quite
fast, so it should not be a serious slowdown.

Add '-Q' option to specify path for the raw image. It not specified the
raw image will be saved in /tmp direcotry in format
<qcow2_filename>.raw.XXXXXX, where X chosen randomly.

Signed-off-by: Lukas Czerner <[email protected]>
---
e2fsck/e2fsck.8.in | 8 +++++-
e2fsck/unix.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/e2fsck/e2fsck.8.in b/e2fsck/e2fsck.8.in
index 3fb15e6..36d1492 100644
--- a/e2fsck/e2fsck.8.in
+++ b/e2fsck/e2fsck.8.in
@@ -8,7 +8,7 @@ e2fsck \- check a Linux ext2/ext3/ext4 file system
.SH SYNOPSIS
.B e2fsck
[
-.B \-pacnyrdfkvtDFV
+.B \-pacnyrdfkvtDFVQ
]
[
.B \-b
@@ -263,6 +263,12 @@ will print a description of the problem and then exit with the value 4
logically or'ed into the exit code. (See the \fBEXIT CODE\fR section.)
This option is normally used by the system's boot scripts. It may not
be specified at the same time as the
+.TP
+.BI \-Q " filename"
+When e2fsck is attempting to check QCOW2 image, it has to convert QCOW2
+into raw image. This option specify the filename for the raw image. If
+this option is ommited, raw image will be created in /tmp direcotry.
+.TP
.B \-n
or
.B \-y
diff --git a/e2fsck/unix.c b/e2fsck/unix.c
index 7eb269c..acfff47 100644
--- a/e2fsck/unix.c
+++ b/e2fsck/unix.c
@@ -19,6 +19,7 @@
#include <fcntl.h>
#include <ctype.h>
#include <time.h>
+#include <limits.h>
#ifdef HAVE_SIGNAL_H
#include <signal.h>
#endif
@@ -53,6 +54,7 @@ extern int optind;
#include "e2p/e2p.h"
#include "e2fsck.h"
#include "problem.h"
+#include "ext2fs/qcow2.h"
#include "../version.h"

/* Command line options */
@@ -626,8 +628,10 @@ static const char *config_fn[] = { ROOT_SYSCONFDIR "/e2fsck.conf", 0 };

static errcode_t PRS(int argc, char *argv[], e2fsck_t *ret_ctx)
{
- int flush = 0;
- int c, fd;
+ int flush = 0, raw_name_set = 0;
+ int c, fd, qcow2_fd;
+ struct ext2_qcow2_hdr *header = NULL;
+ char *d_name, raw_name[PATH_MAX];
#ifdef MTRACE
extern void *mallwatch;
#endif
@@ -667,7 +671,7 @@ static errcode_t PRS(int argc, char *argv[], e2fsck_t *ret_ctx)
ctx->program_name = *argv;
else
ctx->program_name = "e2fsck";
- while ((c = getopt (argc, argv, "panyrcC:B:dE:fvtFVM:b:I:j:P:l:L:N:SsDk")) != EOF)
+ while ((c = getopt (argc, argv, "panyrcC:B:dE:fvtFVM:b:I:j:P:l:L:N:SsDkQ:")) != EOF)
switch (c) {
case 'C':
ctx->progress = e2fsck_update_progress;
@@ -790,6 +794,10 @@ static errcode_t PRS(int argc, char *argv[], e2fsck_t *ret_ctx)
case 'k':
keep_bad_blocks++;
break;
+ case 'Q':
+ raw_name_set++;
+ snprintf(raw_name, PATH_MAX, "%s", optarg);
+ break;
default:
usage(ctx);
}
@@ -819,10 +827,66 @@ static errcode_t PRS(int argc, char *argv[], e2fsck_t *ret_ctx)
ctx->io_options = strchr(argv[optind], '?');
if (ctx->io_options)
*ctx->io_options++ = 0;
- ctx->filesystem_name = blkid_get_devname(ctx->blkid, argv[optind], 0);
+
+ d_name = argv[optind];
+
+ /* Check whether the device, of image is QCOW2 */
+#ifdef HAVE_OPEN64
+ qcow2_fd = open64(d_name, O_RDONLY);
+#else
+ qcow2_fd = open(d_name, O_RDONLY);
+#endif
+ if (qcow2_fd < 0)
+ goto skip_qcow2;
+
+ header = qcow2_read_header(qcow2_fd, d_name);
+ if (header) {
+ int raw_fd;
+ char *path;
+ /*
+ * We have qcow2 image, so need to convert it into raw
+ * image, then pass its filename into further e2fsck code.
+ */
+ if (!raw_name_set) {
+ if (!(path = strdup(d_name)))
+ fatal_error(ctx, "Could not allocate path");
+ snprintf(raw_name, PATH_MAX, "/tmp/%s.raw.XXXXXX",
+ basename(path));
+ free(path);
+ raw_fd = mkstemp(raw_name);
+ printf(_("QCOW2 image detected! Converting into raw"
+ " image = %s\n"), raw_name);
+ } else {
+#ifdef HAVE_OPEN64
+ raw_fd = open64(raw_name, O_CREAT|O_TRUNC|O_WRONLY, 0600);
+#else
+ raw_fd = open(raw_name, O_CREAT|O_TRUNC|O_WRONLY, 0600);
+#endif
+ }
+
+ if (raw_fd < 0) {
+ com_err(ctx->program_name, errno,
+ _("while opening raw image file %s"),raw_name);
+ fatal_error(ctx, 0);
+ }
+
+ retval = qcow2_write_raw_image(qcow2_fd, raw_fd, header);
+ if (retval) {
+ com_err(ctx->program_name, retval,
+ _("while converting qcow image %s into "
+ "raw image %s"),d_name, raw_name);
+ fatal_error(ctx, 0);
+ }
+ close(raw_fd);
+ d_name = raw_name;
+ }
+ close(qcow2_fd);
+
+skip_qcow2:
+ ctx->filesystem_name = blkid_get_devname(ctx->blkid, d_name, 0);
if (!ctx->filesystem_name) {
com_err(ctx->program_name, 0, _("Unable to resolve '%s'"),
- argv[optind]);
+ d_name);
fatal_error(ctx, 0);
}
if (extended_opts)
--
1.7.4


2011-02-26 16:28:30

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/4] e2image: Add support for qcow2 format

On Fri, Feb 25, 2011 at 01:49:30PM +0100, Lukas Czerner wrote:
> This commit adds support for exporting filesystem into QCOW2 image
> format. Like sparse format this saves space, by writing only necessary
> (metadata blocks) into image. Unlike sparse image, QCOW2 image is NOT
> sparse, hence does not change its size by copying with not-sparse-aware
> tools.
>
> New options '-Q' has been added to tell the e2image to use QCOW2 as an
> output image format. QCOW2 supports encryption and compression, however
> e2image so far does no support such features, however you can still
> scramble filenames with '-s' option.
>
> Signed-off-by: Lukas Czerner <[email protected]>

I noticed that the qcow2 functions have a qcow2_ prefix and aren't
prefixed with ext2_. That's a namespace leakage, although I
understand why adding ext2_ to functions that really have nothing to
do with ext2_ might not make sense.

What this *does* make me wonder though, is whether or not we should
split off qcow2 into its own separate library...

- Ted

2011-02-26 16:44:45

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Fri, Feb 25, 2011 at 01:49:33PM +0100, Lukas Czerner wrote:
> This commit adds QCOW2 support for e2fsck. In order to avoid creating
> real QCOW2 image support, which would require creating a lot of code, we
> simply bypass the problem by converting the QCOW2 image into raw image
> and than let e2fsck work with raw image. Conversion itself can be quite
> fast, so it should not be a serious slowdown.
>
> Add '-Q' option to specify path for the raw image. It not specified the
> raw image will be saved in /tmp direcotry in format
> <qcow2_filename>.raw.XXXXXX, where X chosen randomly.
>
> Signed-off-by: Lukas Czerner <[email protected]>

If we're just going to convert the qcow2 image into a raw image, that
means that if someone sends us a N gigabyte QCOW2 image, it will lots
of time (I'm not sure I agree with the "quite fast part"), and consume
an extra N gigabytes of free space to create the raw image.

In that case, I'm not so sure we really want to have a -Q option to
e2fsck. We might be better off simply forcing the use of e2image to
convert the image back.

Note that the other reason why it's a lot better to be able to allow
e2fsck to be able to work on the raw image directly is that if a
customer sends a qcow2's metadata-only image from their 3TB raid
array, we won't be able to expand that to a raw image because of
ext2/3/4's 2TB maximum file size limit. The qcow2 image might be only
a few hundreds of megabytes, so being able to have e2fsck operate on
that image directly would be a huge win.

Adding iomanager support would also allow debugfs to access the qcow2
image directly --- also a win.

Whether or not we add the io_manager support right away (eventually I
think it's a must have feature), I don't think having a "decompress a
qcow2 image to a sparse raw image" makes sense as an explicit e2fsck
option. It just clutters up the e2fsck option space, and people might
be confused because now e2fsck could break because there wasn't enough
free space to decompress the raw image. Also, e2fsck doesn't delete
the /tmp file afterwards, which is bad --- but if it takes a large
amount of time to create the raw image, deleting afterwards is a bit
of waste as well. Probably better to force the user to manage the
converted raw file system image.

- Ted

2011-02-28 09:44:46

by Rogier Wolff

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Sat, Feb 26, 2011 at 11:44:42AM -0500, Ted Ts'o wrote:
> ext2/3/4's 2TB maximum file size limit. The qcow2 image might be only
> a few hundreds of megabytes, so being able to have e2fsck operate on
> that image directly would be a huge win.

driepoot:~> ls -ls /mnt/md3.img
61558920 -rw------- 1 root root 2937535070208 Feb 26 00:36 /mnt/md3.img

61 Gigabytes in my case... (and my system finished counting: I have
8.9M directories on there...)

Roger.

--
** [email protected] ** http://www.BitWizard.nl/ ** +31-15-2600998 **
** Delftechpark 26 2628 XH Delft, The Netherlands. KVK: 27239233 **
*-- BitWizard writes Linux device drivers for any device you may have! --*
Q: It doesn't work. A: Look buddy, doesn't work is an ambiguous statement.
Does it sit on the couch all day? Is it unemployed? Please be specific!
Define 'it' and what it isn't doing. --------- Adapted from lxrbot FAQ

2011-03-01 11:42:08

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Sat, 26 Feb 2011, Ted Ts'o wrote:

> On Fri, Feb 25, 2011 at 01:49:33PM +0100, Lukas Czerner wrote:
> > This commit adds QCOW2 support for e2fsck. In order to avoid creating
> > real QCOW2 image support, which would require creating a lot of code, we
> > simply bypass the problem by converting the QCOW2 image into raw image
> > and than let e2fsck work with raw image. Conversion itself can be quite
> > fast, so it should not be a serious slowdown.
> >
> > Add '-Q' option to specify path for the raw image. It not specified the
> > raw image will be saved in /tmp direcotry in format
> > <qcow2_filename>.raw.XXXXXX, where X chosen randomly.
> >
> > Signed-off-by: Lukas Czerner <[email protected]>
>
> If we're just going to convert the qcow2 image into a raw image, that
> means that if someone sends us a N gigabyte QCOW2 image, it will lots
> of time (I'm not sure I agree with the "quite fast part"), and consume
> an extra N gigabytes of free space to create the raw image.
>
> In that case, I'm not so sure we really want to have a -Q option to
> e2fsck. We might be better off simply forcing the use of e2image to
> convert the image back.
>
> Note that the other reason why it's a lot better to be able to allow
> e2fsck to be able to work on the raw image directly is that if a
> customer sends a qcow2's metadata-only image from their 3TB raid
> array, we won't be able to expand that to a raw image because of
> ext2/3/4's 2TB maximum file size limit. The qcow2 image might be only
> a few hundreds of megabytes, so being able to have e2fsck operate on
> that image directly would be a huge win.
>
> Adding iomanager support would also allow debugfs to access the qcow2
> image directly --- also a win.
>
> Whether or not we add the io_manager support right away (eventually I
> think it's a must have feature), I don't think having a "decompress a
> qcow2 image to a sparse raw image" makes sense as an explicit e2fsck
> option. It just clutters up the e2fsck option space, and people might
> be confused because now e2fsck could break because there wasn't enough
> free space to decompress the raw image. Also, e2fsck doesn't delete
> the /tmp file afterwards, which is bad --- but if it takes a large
> amount of time to create the raw image, deleting afterwards is a bit
> of waste as well. Probably better to force the user to manage the
> converted raw file system image.
>
> - Ted
>

Hi Ted,

sorry for late answer, but I was running some benchmarks to have some
numbers to throw at you :). Now let's see how "qite fast" it actually is
in comparison:

I have 6TB raid composed of four drives and I flooded it with lots and
lots of files (copying /usr/share over and over again) and even created
some big files (1M, 20M, 1G, 10G) so the number of used inodes on the
filesystem is 10928139. I am using e2fsck form top of the master branch.

Before each step I run:
sync; echo 3 > /proc/sys/vm/drop_caches

exporting raw image:
time .//misc/e2image -r /dev/mapper/vg_raid-lv_stripe image.raw

real 12m3.798s
user 2m53.116s
sys 3m38.430s

6,0G image.raw

exporting qcow2 image
time .//misc/e2image -Q /dev/mapper/vg_raid-lv_stripe image.qcow2
e2image 1.41.14 (22-Dec-2010)

real 11m55.574s
user 2m50.521s
sys 3m41.515s

6,1G image.qcow2

So we can see that the running time is essentially the same, so there is
no crazy overhead in creating qcow2 image. Note that qcow2 image is
slightly bigger because of all the qcow2 related metadata and it's size
really depends on the size of the device. Also I tried to see how long
does it take to export bzipped2 raw image, but it is running almost one
day now, so it is not even comparable.

e2fsck on the device:
time .//e2fsck/e2fsck -fn /dev/mapper/vg_raid-lv_stripe

real 3m9.400s
user 0m47.558s
sys 0m15.098s

e2fsck on the raw image:
time .//e2fsck/e2fsck -fn image.raw

real 2m36.767s
user 0m47.613s
sys 0m8.403s

We can see that e2fsck on the raw image is a bit faster, but that is
obvious since the drive does not have to seek so much (right?).

Now converting qcow2 image into raw image:
time .//misc/e2image -r image.qcow2 image.qcow2.raw

real 1m23.486s
user 0m0.704s
sys 0m22.574s

It is hard to say if it is "quite fast" or not. But I would say it is
not terribly slow either. Just out of curiosity, I have tried to convert
raw->qcow2 with qemu-img convert tool:

time qemu-img convert -O raw image.qcow2 image.qemu.raw
..it is running almost an hour now, so it is not comparable as well :)

e2fsck on the qcow2 image.
time .//e2fsck/e2fsck -fn -Q ./image.qcow2.img.tmp image.qcow2

real 2m47.256s
user 0m41.646s
sys 0m28.618s

Now that is surprising. Well, not so much actually.. We can see that
e2fsck check on the qcow2 image, including qcow2->raw conversion is a
bit slower than checking raw image (by 7% which is not much) but it is
still faster than checking device itself. Now, the reason is probably
that the raw image we are creating is partially loaded into memory, hence
accelerate e2fsck. So I do not think that converting image before check
is such a bad idea (especially when you have enough memory:)).

I completely agree that having io_manager for the qcow2 format would be
cool, if someone is willing to do that, but I am not convinced that it
is worth it. Your concerns are all valid and I agree, however I do not
think e2image is used by regular unexperienced users, so it should not
confuse them, but that is just stupid assumption :).

Also, remember that if you really do not want to convert the image
because of file size limit, or whatever, you can always use qemu-nbd to
attach qcow2 image into nbd block device and use that as regular device.

Regarding the e2fsck and the qcow2 support (or -Q option), I think it is
useful, but I do not really insist on keeping it and as you said we can
always force user to use e2image for conversion. It is just, this way it
seems easier to do it automatically. Maybe we can ask user whether he
wants to keep the raw image after the check or not ?

Regaring separate qcow2.h file and "qcow2_" prefix. I have done this
because I am using this code from e2image and e2fsck so it seemed
convenient to have it in separate header, however I guess I can move it
into e2image.c and e2image.h if you want.

So what do you think.

Thanks!
-Lukas

2011-03-07 10:40:03

by Amir Goldstein

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Tue, Mar 1, 2011 at 1:42 PM, Lukas Czerner <[email protected]> wrote:
> On Sat, 26 Feb 2011, Ted Ts'o wrote:
>
>> On Fri, Feb 25, 2011 at 01:49:33PM +0100, Lukas Czerner wrote:
>> > This commit adds QCOW2 support for e2fsck. In order to avoid creating
>> > real QCOW2 image support, which would require creating a lot of code, we
>> > simply bypass the problem by converting the QCOW2 image into raw image
>> > and than let e2fsck work with raw image. Conversion itself can be quite
>> > fast, so it should not be a serious slowdown.
>> >
>> > Add '-Q' option to specify path for the raw image. It not specified the
>> > raw image will be saved in /tmp direcotry in format
>> > <qcow2_filename>.raw.XXXXXX, where X chosen randomly.
>> >
>> > Signed-off-by: Lukas Czerner <[email protected]>
>>
>> If we're just going to convert the qcow2 image into a raw image, that
>> means that if someone sends us a N gigabyte QCOW2 image, it will lots
>> of time (I'm not sure I agree with the "quite fast part"), and consume
>> an extra N gigabytes of free space to create the raw image.
>>
>> In that case, I'm not so sure we really want to have a -Q option to
>> e2fsck. ?We might be better off simply forcing the use of e2image to
>> convert the image back.
>>
>> Note that the other reason why it's a lot better to be able to allow
>> e2fsck to be able to work on the raw image directly is that if a
>> customer sends a qcow2's metadata-only image from their 3TB raid
>> array, we won't be able to expand that to a raw image because of
>> ext2/3/4's 2TB maximum file size limit. ?The qcow2 image might be only
>> a few hundreds of megabytes, so being able to have e2fsck operate on
>> that image directly would be a huge win.
>>
>> Adding iomanager support would also allow debugfs to access the qcow2
>> image directly --- also a win.
>>
>> Whether or not we add the io_manager support right away (eventually I
>> think it's a must have feature), I don't think having a "decompress a
>> qcow2 image to a sparse raw image" makes sense as an explicit e2fsck
>> option. ?It just clutters up the e2fsck option space, and people might
>> be confused because now e2fsck could break because there wasn't enough
>> free space to decompress the raw image. ?Also, e2fsck doesn't delete
>> the /tmp file afterwards, which is bad --- but if it takes a large
>> amount of time to create the raw image, deleting afterwards is a bit
>> of waste as well. ?Probably better to force the user to manage the
>> converted raw file system image.
>>
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? - Ted
>>
>
> Hi Ted,
>
> sorry for late answer, but I was running some benchmarks to have some
> numbers to throw at you :). Now let's see how "qite fast" it actually is
> in comparison:
>
> I have 6TB raid composed of four drives and I flooded it with lots and
> lots of files (copying /usr/share over and over again) and even created
> some big files (1M, 20M, 1G, 10G) so the number of used inodes on the
> filesystem is 10928139. I am using e2fsck form top of the master branch.
>
> Before each step I run:
> sync; echo 3 > /proc/sys/vm/drop_caches
>
> exporting raw image:
> time .//misc/e2image -r /dev/mapper/vg_raid-lv_stripe image.raw
>
> ? ? ? ?real ? ?12m3.798s
> ? ? ? ?user ? ?2m53.116s
> ? ? ? ?sys ? ? 3m38.430s
>
> ? ? ? ?6,0G ? ?image.raw
>
> exporting qcow2 image
> time .//misc/e2image -Q /dev/mapper/vg_raid-lv_stripe image.qcow2
> e2image 1.41.14 (22-Dec-2010)
>
> ? ? ? ?real ? ?11m55.574s
> ? ? ? ?user ? ?2m50.521s
> ? ? ? ?sys ? ? 3m41.515s
>
> ? ? ? ?6,1G ? ?image.qcow2
>
> So we can see that the running time is essentially the same, so there is
> no crazy overhead in creating qcow2 image. Note that qcow2 image is
> slightly bigger because of all the qcow2 related metadata and it's size
> really depends on the size of the device. Also I tried to see how long
> does it take to export bzipped2 raw image, but it is running almost one
> day now, so it is not even comparable.
>
> e2fsck on the device:
> time .//e2fsck/e2fsck -fn /dev/mapper/vg_raid-lv_stripe
>
> ? ? ? ?real ? ?3m9.400s
> ? ? ? ?user ? ?0m47.558s
> ? ? ? ?sys ? ? 0m15.098s
>
> e2fsck on the raw image:
> time .//e2fsck/e2fsck -fn image.raw
>
> ? ? ? ?real ? ?2m36.767s
> ? ? ? ?user ? ?0m47.613s
> ? ? ? ?sys ? ? 0m8.403s
>
> We can see that e2fsck on the raw image is a bit faster, but that is
> obvious since the drive does not have to seek so much (right?).
>
> Now converting qcow2 image into raw image:
> time .//misc/e2image -r image.qcow2 image.qcow2.raw
>
> ? ? ? ?real ? ?1m23.486s
> ? ? ? ?user ? ?0m0.704s
> ? ? ? ?sys ? ? 0m22.574s
>
> It is hard to say if it is "quite fast" or not. But I would say it is
> not terribly slow either. Just out of curiosity, I have tried to convert
> raw->qcow2 with qemu-img convert tool:
>
> time qemu-img convert -O raw image.qcow2 image.qemu.raw
> ..it is running almost an hour now, so it is not comparable as well :)
>
> e2fsck on the qcow2 image.
> time .//e2fsck/e2fsck -fn -Q ./image.qcow2.img.tmp image.qcow2
>
> ? ? ? ?real ? ?2m47.256s
> ? ? ? ?user ? ?0m41.646s
> ? ? ? ?sys ? ? 0m28.618s
>
> Now that is surprising. Well, not so much actually.. We can see that
> e2fsck check on the qcow2 image, including qcow2->raw conversion is a
> bit slower than checking raw image (by 7% which is not much) but it is
> still faster than checking device itself. Now, the reason is probably
> that the raw image we are creating is partially loaded into memory, hence
> accelerate e2fsck. So I do not think that converting image before check
> is such a bad idea (especially when you have enough memory:)).
>
> I completely agree that having io_manager for the qcow2 format would be
> cool, if someone is willing to do that, but I am not convinced that it
> is worth it. Your concerns are all valid and I agree, however I do not
> think e2image is used by regular unexperienced users, so it should not
> confuse them, but that is just stupid assumption :).
>
> Also, remember that if you really do not want to convert the image
> because of file size limit, or whatever, you can always use qemu-nbd to
> attach qcow2 image into nbd block device and use that as regular device.

Did you consider the possibility to use QCOW2 format for doing a "tryout"
fsck on the filesystem with the option to rollback?

If QCOW2 image is created with the 'backing_file' option set to the origin
block device (and 'backing_fmt' is set to 'host_device'), then qemu-nbd
will be able to see the exported image metadata as well as the filesystem
data.

You can then do an "intrusive" fsck run on the NBD, mount your filesystem
(from the NBD) and view the results.

If you are satisfied with the results, you can apply the fsck changes to the
origin block device (there is probably a qemu-img command to do that).
If you are unsatisfied with the results, you can simply discard the image
or better yet, revert to a QCOW2 snapshot, which you created just before
running fsck.

Can you provide the performance figures for running fsck over NBD?

>
> Regarding the e2fsck and the qcow2 support (or -Q option), I think it is
> useful, but I do not really insist on keeping it and as you said we can
> always force user to use e2image for conversion. It is just, this way it
> seems easier to do it automatically. Maybe we can ask user whether he
> wants to keep the raw image after the check or not ?
>
> Regaring separate qcow2.h file and "qcow2_" prefix. I have done this
> because I am using this code from e2image and e2fsck so it seemed
> convenient to have it in separate header, however I guess I can move it
> into e2image.c and e2image.h if you want.
>
> So what do you think.
>
> Thanks!
> -Lukas
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>

2011-03-07 12:40:38

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Mon, 7 Mar 2011, Amir Goldstein wrote:

> On Tue, Mar 1, 2011 at 1:42 PM, Lukas Czerner <[email protected]> wrote:
> > On Sat, 26 Feb 2011, Ted Ts'o wrote:
> >
> >> On Fri, Feb 25, 2011 at 01:49:33PM +0100, Lukas Czerner wrote:
> >> > This commit adds QCOW2 support for e2fsck. In order to avoid creating
> >> > real QCOW2 image support, which would require creating a lot of code, we
> >> > simply bypass the problem by converting the QCOW2 image into raw image
> >> > and than let e2fsck work with raw image. Conversion itself can be quite
> >> > fast, so it should not be a serious slowdown.
> >> >
> >> > Add '-Q' option to specify path for the raw image. It not specified the
> >> > raw image will be saved in /tmp direcotry in format
> >> > <qcow2_filename>.raw.XXXXXX, where X chosen randomly.
> >> >
> >> > Signed-off-by: Lukas Czerner <[email protected]>
> >>
> >> If we're just going to convert the qcow2 image into a raw image, that
> >> means that if someone sends us a N gigabyte QCOW2 image, it will lots
> >> of time (I'm not sure I agree with the "quite fast part"), and consume
> >> an extra N gigabytes of free space to create the raw image.
> >>
> >> In that case, I'm not so sure we really want to have a -Q option to
> >> e2fsck. ?We might be better off simply forcing the use of e2image to
> >> convert the image back.
> >>
> >> Note that the other reason why it's a lot better to be able to allow
> >> e2fsck to be able to work on the raw image directly is that if a
> >> customer sends a qcow2's metadata-only image from their 3TB raid
> >> array, we won't be able to expand that to a raw image because of
> >> ext2/3/4's 2TB maximum file size limit. ?The qcow2 image might be only
> >> a few hundreds of megabytes, so being able to have e2fsck operate on
> >> that image directly would be a huge win.
> >>
> >> Adding iomanager support would also allow debugfs to access the qcow2
> >> image directly --- also a win.
> >>
> >> Whether or not we add the io_manager support right away (eventually I
> >> think it's a must have feature), I don't think having a "decompress a
> >> qcow2 image to a sparse raw image" makes sense as an explicit e2fsck
> >> option. ?It just clutters up the e2fsck option space, and people might
> >> be confused because now e2fsck could break because there wasn't enough
> >> free space to decompress the raw image. ?Also, e2fsck doesn't delete
> >> the /tmp file afterwards, which is bad --- but if it takes a large
> >> amount of time to create the raw image, deleting afterwards is a bit
> >> of waste as well. ?Probably better to force the user to manage the
> >> converted raw file system image.
> >>
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? - Ted
> >>
> >
> > Hi Ted,
> >
> > sorry for late answer, but I was running some benchmarks to have some
> > numbers to throw at you :). Now let's see how "qite fast" it actually is
> > in comparison:
> >
> > I have 6TB raid composed of four drives and I flooded it with lots and
> > lots of files (copying /usr/share over and over again) and even created
> > some big files (1M, 20M, 1G, 10G) so the number of used inodes on the
> > filesystem is 10928139. I am using e2fsck form top of the master branch.
> >
> > Before each step I run:
> > sync; echo 3 > /proc/sys/vm/drop_caches
> >
> > exporting raw image:
> > time .//misc/e2image -r /dev/mapper/vg_raid-lv_stripe image.raw
> >
> > ? ? ? ?real ? ?12m3.798s
> > ? ? ? ?user ? ?2m53.116s
> > ? ? ? ?sys ? ? 3m38.430s
> >
> > ? ? ? ?6,0G ? ?image.raw
> >
> > exporting qcow2 image
> > time .//misc/e2image -Q /dev/mapper/vg_raid-lv_stripe image.qcow2
> > e2image 1.41.14 (22-Dec-2010)
> >
> > ? ? ? ?real ? ?11m55.574s
> > ? ? ? ?user ? ?2m50.521s
> > ? ? ? ?sys ? ? 3m41.515s
> >
> > ? ? ? ?6,1G ? ?image.qcow2
> >
> > So we can see that the running time is essentially the same, so there is
> > no crazy overhead in creating qcow2 image. Note that qcow2 image is
> > slightly bigger because of all the qcow2 related metadata and it's size
> > really depends on the size of the device. Also I tried to see how long
> > does it take to export bzipped2 raw image, but it is running almost one
> > day now, so it is not even comparable.
> >
> > e2fsck on the device:
> > time .//e2fsck/e2fsck -fn /dev/mapper/vg_raid-lv_stripe
> >
> > ? ? ? ?real ? ?3m9.400s
> > ? ? ? ?user ? ?0m47.558s
> > ? ? ? ?sys ? ? 0m15.098s
> >
> > e2fsck on the raw image:
> > time .//e2fsck/e2fsck -fn image.raw
> >
> > ? ? ? ?real ? ?2m36.767s
> > ? ? ? ?user ? ?0m47.613s
> > ? ? ? ?sys ? ? 0m8.403s
> >
> > We can see that e2fsck on the raw image is a bit faster, but that is
> > obvious since the drive does not have to seek so much (right?).
> >
> > Now converting qcow2 image into raw image:
> > time .//misc/e2image -r image.qcow2 image.qcow2.raw
> >
> > ? ? ? ?real ? ?1m23.486s
> > ? ? ? ?user ? ?0m0.704s
> > ? ? ? ?sys ? ? 0m22.574s
> >
> > It is hard to say if it is "quite fast" or not. But I would say it is
> > not terribly slow either. Just out of curiosity, I have tried to convert
> > raw->qcow2 with qemu-img convert tool:
> >
> > time qemu-img convert -O raw image.qcow2 image.qemu.raw
> > ..it is running almost an hour now, so it is not comparable as well :)
> >
> > e2fsck on the qcow2 image.
> > time .//e2fsck/e2fsck -fn -Q ./image.qcow2.img.tmp image.qcow2
> >
> > ? ? ? ?real ? ?2m47.256s
> > ? ? ? ?user ? ?0m41.646s
> > ? ? ? ?sys ? ? 0m28.618s
> >
> > Now that is surprising. Well, not so much actually.. We can see that
> > e2fsck check on the qcow2 image, including qcow2->raw conversion is a
> > bit slower than checking raw image (by 7% which is not much) but it is
> > still faster than checking device itself. Now, the reason is probably
> > that the raw image we are creating is partially loaded into memory, hence
> > accelerate e2fsck. So I do not think that converting image before check
> > is such a bad idea (especially when you have enough memory:)).
> >
> > I completely agree that having io_manager for the qcow2 format would be
> > cool, if someone is willing to do that, but I am not convinced that it
> > is worth it. Your concerns are all valid and I agree, however I do not
> > think e2image is used by regular unexperienced users, so it should not
> > confuse them, but that is just stupid assumption :).
> >
> > Also, remember that if you really do not want to convert the image
> > because of file size limit, or whatever, you can always use qemu-nbd to
> > attach qcow2 image into nbd block device and use that as regular device.
>
> Did you consider the possibility to use QCOW2 format for doing a "tryout"
> fsck on the filesystem with the option to rollback?
>
> If QCOW2 image is created with the 'backing_file' option set to the origin
> block device (and 'backing_fmt' is set to 'host_device'), then qemu-nbd
> will be able to see the exported image metadata as well as the filesystem
> data.
>
> You can then do an "intrusive" fsck run on the NBD, mount your filesystem
> (from the NBD) and view the results.
>
> If you are satisfied with the results, you can apply the fsck changes to the
> origin block device (there is probably a qemu-img command to do that).
> If you are unsatisfied with the results, you can simply discard the image
> or better yet, revert to a QCOW2 snapshot, which you created just before
> running fsck.

But this is something you can do even now. You can mount the qcow2
metadata image without any problems, you just will not see any data. But
I can take a look at this functionality, it seems simple enough.

>
> Can you provide the performance figures for running fsck over NBD?

Well, unfortunately I do not have access to the same machine anymore,
but I have simple results which has been done elsewhere, but due to lack
of proper storage this has been done on loop device (should not affect
raw and qcow2 results).

[+] fsck raw image
real 0m30.176s
user 0m22.397s
sys 0m2.289s

[+] fsck NBD exported qcow2 image
real 0m31.667s
user 0m21.561s
sys 0m3.293s

So you can see that performance here is a bit worse (5%).

Thanks!
-Lukas

>
> >
> > Regarding the e2fsck and the qcow2 support (or -Q option), I think it is
> > useful, but I do not really insist on keeping it and as you said we can
> > always force user to use e2image for conversion. It is just, this way it
> > seems easier to do it automatically. Maybe we can ask user whether he
> > wants to keep the raw image after the check or not ?
> >
> > Regaring separate qcow2.h file and "qcow2_" prefix. I have done this
> > because I am using this code from e2image and e2fsck so it seemed
> > convenient to have it in separate header, however I guess I can move it
> > into e2image.c and e2image.h if you want.
> >
> > So what do you think.
> >
> > Thanks!
> > -Lukas
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> > the body of a message to [email protected]
> > More majordomo info at ?http://vger.kernel.org/majordomo-info.html
> >
>

--

2011-03-09 16:30:29

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support


--snip--
> >
> > Did you consider the possibility to use QCOW2 format for doing a "tryout"
> > fsck on the filesystem with the option to rollback?
> >
> > If QCOW2 image is created with the 'backing_file' option set to the origin
> > block device (and 'backing_fmt' is set to 'host_device'), then qemu-nbd
> > will be able to see the exported image metadata as well as the filesystem
> > data.
> >
> > You can then do an "intrusive" fsck run on the NBD, mount your filesystem
> > (from the NBD) and view the results.
> >
> > If you are satisfied with the results, you can apply the fsck changes to the
> > origin block device (there is probably a qemu-img command to do that).
> > If you are unsatisfied with the results, you can simply discard the image
> > or better yet, revert to a QCOW2 snapshot, which you created just before
> > running fsck.
>
> But this is something you can do even now. You can mount the qcow2
> metadata image without any problems, you just will not see any data. But
> I can take a look at this functionality, it seems simple enough.

So I have done this and it works as expected as long as the device
you've created the image from is present in the system, which might not
be true, especially in the case you are transferring the image to the
another machine (bug report).

If the device with the same name as the original does not exist in the
system qemu-nbd is not smart enough to just ignore that fact and mount
the image anyway. And looking at the man page there is no way to do it.

So, the result is I am not going to include this into my patches (if
someone does not change my mind:)) as I do not want to create just-another
switch for e2image. Also I fail to see the benefit if it anyway:).

Thanks!
-Lukas


>
> >
> > Can you provide the performance figures for running fsck over NBD?
>
> Well, unfortunately I do not have access to the same machine anymore,
> but I have simple results which has been done elsewhere, but due to lack
> of proper storage this has been done on loop device (should not affect
> raw and qcow2 results).
>
> [+] fsck raw image
> real 0m30.176s
> user 0m22.397s
> sys 0m2.289s
>
> [+] fsck NBD exported qcow2 image
> real 0m31.667s
> user 0m21.561s
> sys 0m3.293s
>
> So you can see that performance here is a bit worse (5%).
>
> Thanks!
> -Lukas
>
--snip--

2011-03-09 17:52:52

by Amir Goldstein

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

On Wed, Mar 9, 2011 at 6:30 PM, Lukas Czerner <[email protected]> wrote:
>
> --snip--
>> >
>> > Did you consider the possibility to use QCOW2 format for doing a "tryout"
>> > fsck on the filesystem with the option to rollback?
>> >
>> > If QCOW2 image is created with the 'backing_file' option set to the origin
>> > block device (and 'backing_fmt' is set to 'host_device'), then qemu-nbd
>> > will be able to see the exported image metadata as well as the filesystem
>> > data.
>> >
>> > You can then do an "intrusive" fsck run on the NBD, mount your filesystem
>> > (from the NBD) and view the results.
>> >
>> > If you are satisfied with the results, you can apply the fsck changes to the
>> > origin block device (there is probably a qemu-img command to do that).
>> > If you are unsatisfied with the results, you can simply discard the image
>> > or better yet, revert to a QCOW2 snapshot, which you created just before
>> > running fsck.
>>
>> But this is something you can do even now. You can mount the qcow2
>> metadata image without any problems, you just will not see any data. But
>> I can take a look at this functionality, it seems simple enough.
>
> So I have done this and it works as expected as long as the device
> you've created the image from is present in the system, which might not
> be true, especially in the case you are transferring the image to the
> another machine (bug report).
>
> If the device with the same name as the original does not exist in the
> system qemu-nbd is not smart enough to just ignore that fact and mount
> the image anyway. And looking at the man page there is no way to do it.
>
> So, the result is I am not going to include this into my patches (if
> someone does not change my mind:)) as I do not want to create just-another
> switch for e2image. Also I fail to see the benefit if it anyway:).
>

The benefit is, as I see it, is with the following capability:
A user with a corrupted fs, sends an e2image to an expert,
having him examine the file system (so far we already have).
Then the expert can fix the fs image (say using hard core debugfs'ing) and
send it back to the user.
The user can then "test mount" the fixed fs and if his valuable data is back,
send the other half of the payment to the expert, apply the fix to the origin
device and go on with his life.

It's a shame that qemu-nbd doesn't play along with that plan, but you can't
blame it, can you...

Anyway, thanks for testing my idea and thanks for QCOW2 e2image :-)
This is just one example of the nice things that the new e2image format
can be leveraged to.

Amir.

2011-03-17 13:05:21

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH 4/4] e2fsck: Add QCOW2 support

Hi Ted,

any comment on this ?

Thanks!
-Lukas

On Tue, 1 Mar 2011, Lukas Czerner wrote:

> On Sat, 26 Feb 2011, Ted Ts'o wrote:
>
> > On Fri, Feb 25, 2011 at 01:49:33PM +0100, Lukas Czerner wrote:
> > > This commit adds QCOW2 support for e2fsck. In order to avoid creating
> > > real QCOW2 image support, which would require creating a lot of code, we
> > > simply bypass the problem by converting the QCOW2 image into raw image
> > > and than let e2fsck work with raw image. Conversion itself can be quite
> > > fast, so it should not be a serious slowdown.
> > >
> > > Add '-Q' option to specify path for the raw image. It not specified the
> > > raw image will be saved in /tmp direcotry in format
> > > <qcow2_filename>.raw.XXXXXX, where X chosen randomly.
> > >
> > > Signed-off-by: Lukas Czerner <[email protected]>
> >
> > If we're just going to convert the qcow2 image into a raw image, that
> > means that if someone sends us a N gigabyte QCOW2 image, it will lots
> > of time (I'm not sure I agree with the "quite fast part"), and consume
> > an extra N gigabytes of free space to create the raw image.
> >
> > In that case, I'm not so sure we really want to have a -Q option to
> > e2fsck. We might be better off simply forcing the use of e2image to
> > convert the image back.
> >
> > Note that the other reason why it's a lot better to be able to allow
> > e2fsck to be able to work on the raw image directly is that if a
> > customer sends a qcow2's metadata-only image from their 3TB raid
> > array, we won't be able to expand that to a raw image because of
> > ext2/3/4's 2TB maximum file size limit. The qcow2 image might be only
> > a few hundreds of megabytes, so being able to have e2fsck operate on
> > that image directly would be a huge win.
> >
> > Adding iomanager support would also allow debugfs to access the qcow2
> > image directly --- also a win.
> >
> > Whether or not we add the io_manager support right away (eventually I
> > think it's a must have feature), I don't think having a "decompress a
> > qcow2 image to a sparse raw image" makes sense as an explicit e2fsck
> > option. It just clutters up the e2fsck option space, and people might
> > be confused because now e2fsck could break because there wasn't enough
> > free space to decompress the raw image. Also, e2fsck doesn't delete
> > the /tmp file afterwards, which is bad --- but if it takes a large
> > amount of time to create the raw image, deleting afterwards is a bit
> > of waste as well. Probably better to force the user to manage the
> > converted raw file system image.
> >
> > - Ted
> >
>
> Hi Ted,
>
> sorry for late answer, but I was running some benchmarks to have some
> numbers to throw at you :). Now let's see how "qite fast" it actually is
> in comparison:
>
> I have 6TB raid composed of four drives and I flooded it with lots and
> lots of files (copying /usr/share over and over again) and even created
> some big files (1M, 20M, 1G, 10G) so the number of used inodes on the
> filesystem is 10928139. I am using e2fsck form top of the master branch.
>
> Before each step I run:
> sync; echo 3 > /proc/sys/vm/drop_caches
>
> exporting raw image:
> time .//misc/e2image -r /dev/mapper/vg_raid-lv_stripe image.raw
>
> real 12m3.798s
> user 2m53.116s
> sys 3m38.430s
>
> 6,0G image.raw
>
> exporting qcow2 image
> time .//misc/e2image -Q /dev/mapper/vg_raid-lv_stripe image.qcow2
> e2image 1.41.14 (22-Dec-2010)
>
> real 11m55.574s
> user 2m50.521s
> sys 3m41.515s
>
> 6,1G image.qcow2
>
> So we can see that the running time is essentially the same, so there is
> no crazy overhead in creating qcow2 image. Note that qcow2 image is
> slightly bigger because of all the qcow2 related metadata and it's size
> really depends on the size of the device. Also I tried to see how long
> does it take to export bzipped2 raw image, but it is running almost one
> day now, so it is not even comparable.
>
> e2fsck on the device:
> time .//e2fsck/e2fsck -fn /dev/mapper/vg_raid-lv_stripe
>
> real 3m9.400s
> user 0m47.558s
> sys 0m15.098s
>
> e2fsck on the raw image:
> time .//e2fsck/e2fsck -fn image.raw
>
> real 2m36.767s
> user 0m47.613s
> sys 0m8.403s
>
> We can see that e2fsck on the raw image is a bit faster, but that is
> obvious since the drive does not have to seek so much (right?).
>
> Now converting qcow2 image into raw image:
> time .//misc/e2image -r image.qcow2 image.qcow2.raw
>
> real 1m23.486s
> user 0m0.704s
> sys 0m22.574s
>
> It is hard to say if it is "quite fast" or not. But I would say it is
> not terribly slow either. Just out of curiosity, I have tried to convert
> raw->qcow2 with qemu-img convert tool:
>
> time qemu-img convert -O raw image.qcow2 image.qemu.raw
> ..it is running almost an hour now, so it is not comparable as well :)
>
> e2fsck on the qcow2 image.
> time .//e2fsck/e2fsck -fn -Q ./image.qcow2.img.tmp image.qcow2
>
> real 2m47.256s
> user 0m41.646s
> sys 0m28.618s
>
> Now that is surprising. Well, not so much actually.. We can see that
> e2fsck check on the qcow2 image, including qcow2->raw conversion is a
> bit slower than checking raw image (by 7% which is not much) but it is
> still faster than checking device itself. Now, the reason is probably
> that the raw image we are creating is partially loaded into memory, hence
> accelerate e2fsck. So I do not think that converting image before check
> is such a bad idea (especially when you have enough memory:)).
>
> I completely agree that having io_manager for the qcow2 format would be
> cool, if someone is willing to do that, but I am not convinced that it
> is worth it. Your concerns are all valid and I agree, however I do not
> think e2image is used by regular unexperienced users, so it should not
> confuse them, but that is just stupid assumption :).
>
> Also, remember that if you really do not want to convert the image
> because of file size limit, or whatever, you can always use qemu-nbd to
> attach qcow2 image into nbd block device and use that as regular device.
>
> Regarding the e2fsck and the qcow2 support (or -Q option), I think it is
> useful, but I do not really insist on keeping it and as you said we can
> always force user to use e2image for conversion. It is just, this way it
> seems easier to do it automatically. Maybe we can ask user whether he
> wants to keep the raw image after the check or not ?
>
> Regaring separate qcow2.h file and "qcow2_" prefix. I have done this
> because I am using this code from e2image and e2fsck so it seemed
> convenient to have it in separate header, however I guess I can move it
> into e2image.c and e2image.h if you want.
>
> So what do you think.
>
> Thanks!
> -Lukas
>