2011-02-28 02:35:25

by Robin Dong

[permalink] [raw]
Subject: [PATCH 1/2] mksparse: build sparse file from compressed e2image file.

From: Robin Dong <[email protected]>

[Purpose]
After we make a image-file by e2image like:
#e2image -r /dev/hda1 - | bzip2 > hda1.bz2
we copy the bz2 file to remote host and extract it:
#bunzip2 hda1.bz2
the unzipped hda1 file will not be a sparse file and the space occupied
by it is as large as the real /dev/hda1 filesystem.

Therefore a tool to transform a raw-file to a sparse-file is necessary.
This Patch is a first attempt to provide such a tool which is called
'mksparse' so far.

[Example]
Extract hda1.bz2 by:
#bunzip2 -c hda1.bz2 | mksparse hda1
the hda1 file will be a sparse file.

Reviewed-by: Coly Li <[email protected]>
Signed-off-by: Robin Dong <[email protected]>
---
misc/Makefile.in | 21 ++++-
misc/mksparse.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 285 insertions(+), 4 deletions(-)
create mode 100644 misc/mksparse.c

diff --git a/misc/Makefile.in b/misc/Makefile.in
index 86ee53f..fcd316e 100644
--- a/misc/Makefile.in
+++ b/misc/Makefile.in
@@ -17,6 +17,8 @@ INSTALL = @INSTALL@
@IMAGER_CMT@E2IMAGE_PROG= e2image
@IMAGER_CMT@E2IMAGE_MAN= e2image.8

+@IMAGER_CMT@MKSPARSE_PROG= mksparse
+
@UUIDD_CMT@UUIDD_PROG= uuidd
@UUIDD_CMT@UUIDD_MAN= uuidd.8

@@ -27,7 +29,7 @@ INSTALL = @INSTALL@
@BLKID_CMT@FINDFS_MAN= findfs.8

SPROGS= mke2fs badblocks tune2fs dumpe2fs $(BLKID_PROG) logsave \
- $(E2IMAGE_PROG) @FSCK_PROG@ e2undo
+ $(E2IMAGE_PROG) $(MKSPARSE_PROG) @FSCK_PROG@ e2undo
USPROGS= mklost+found filefrag e2freefrag $(UUIDD_PROG) $(E4DEFRAG_PROG)
SMANPAGES= tune2fs.8 mklost+found.8 mke2fs.8 dumpe2fs.8 badblocks.8 \
e2label.8 $(FINDFS_MAN) $(BLKID_MAN) $(E2IMAGE_MAN) \
@@ -50,6 +52,7 @@ UUIDD_OBJS= uuidd.o
DUMPE2FS_OBJS= dumpe2fs.o
BADBLOCKS_OBJS= badblocks.o
E2IMAGE_OBJS= e2image.o
+MKSPARSE_OBJS= mksparse.o
FSCK_OBJS= fsck.o base_device.o ismounted.o
BLKID_OBJS= blkid.o
FILEFRAG_OBJS= filefrag.o
@@ -68,6 +71,7 @@ PROFILED_UUIDD_OBJS= profiled/uuidd.o
PROFILED_DUMPE2FS_OBJS= profiled/dumpe2fs.o
PROFILED_BADBLOCKS_OBJS= profiled/badblocks.o
PROFILED_E2IMAGE_OBJS= profiled/e2image.o
+PROFILED_MKSPARSE_OBJS= profiled/mksparse.o
PROFILED_FSCK_OBJS= profiled/fsck.o profiled/base_device.o \
profiled/ismounted.o
PROFILED_BLKID_OBJS= profiled/blkid.o
@@ -109,7 +113,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \
@PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \
e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \
logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \
- e2image.profiled e4defrag.profiled
+ e2image.profiled mksparse.profiled e4defrag.profiled

profiled:
@PROFILE_CMT@ $(E) " MKDIR $@"
@@ -187,6 +191,15 @@ e2image.profiled: $(PROFILED_E2IMAGE_OBJS) $(PROFILED_DEPLIBS)
$(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2image.profiled \
$(PROFILED_E2IMAGE_OBJS) $(PROFILED_LIBS) $(LIBINTL)

+mksparse: $(MKSPARSE_OBJS) $(DEPLIBS)
+ $(E) " LD $@"
+ $(Q) $(CC) $(ALL_LDFLAGS) -o mksparse $(MKSPARSE_OBJS) $(LIBS) $(LIBINTL)
+
+mksparse.profiled: $(PROFILED_MKSPARSE_OBJS) $(PROFILED_DEPLIBS)
+ $(E) " LD $@"
+ $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o mksparse.profiled \
+ $(PROFILED_MKSPARSE_OBJS) $(PROFILED_LIBS) $(LIBINTL)
+
e2undo: $(E2UNDO_OBJS) $(DEPLIBS)
$(E) " LD $@"
$(Q) $(CC) $(ALL_LDFLAGS) -o e2undo $(E2UNDO_OBJS) $(LIBS) $(LIBINTL)
@@ -550,8 +563,8 @@ clean:
$(FMANPAGES) \
base_device base_device.out mke2fs.static filefrag e2freefrag \
e2initrd_helper partinfo prof_err.[ch] default_profile.c \
- uuidd e2image tune2fs.static tst_ismounted fsck.profiled \
- blkid.profiled tune2fs.profiled e2image.profiled \
+ uuidd e2image mksparse tune2fs.static tst_ismounted fsck.profiled \
+ blkid.profiled tune2fs.profiled e2image.profiled mksparse.profiled\
e2undo.profiled mke2fs.profiled dumpe2fs.profiled \
logsave.profiled filefrag.profiled uuidgen.profiled \
uuidd.profiled e2image.profiled \
diff --git a/misc/mksparse.c b/misc/mksparse.c
new file mode 100644
index 0000000..9e62fcf
--- /dev/null
+++ b/misc/mksparse.c
@@ -0,0 +1,268 @@
+/*
+ * mksparse.c --- Program which transform stdin (or file) to
+ * be a new sparse file.
+ *
+ * Copyright 2011 by Taobao, all rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Authors: Robin Dong <[email protected]>
+ */
+
+#define _LARGEFILE_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <fcntl.h>
+#include <grp.h>
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+extern char *optarg;
+extern int optind;
+#endif
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <limits.h>
+
+#include "ext2fs/ext2fs.h"
+
+#include "../version.h"
+#include "nls-enable.h"
+
+#define KB_SIZE 1024
+#define MB_SIZE (1024*1024)
+#define MIN_BUFFER_SIZE 1024
+#define MAX_BUFFER_SIZE (64*1024*1024)
+#define DEFAULT_BUFFER_SIZE (4*1024)
+
+#define OPEN_SRC_FAIL -1
+#define OPEN_TARGET_FAIL -2
+#define MALLOC_FAIL -3
+#define SEEK_FAIL -4
+#define WRITE_FAIL -5
+
+const char *program_name = "mksparse";
+
+static void usage(void)
+{
+ fprintf(stderr,
+ _("Usage: %s [-s buffer_size] [-i input_file] sparse_file\n"),
+ program_name);
+ exit (1);
+}
+
+static int get_buffer_size(const char *optarg)
+{
+ char *pos = NULL;
+ long val = strtol(optarg, &pos, 0);
+ if (pos == optarg || val == LONG_MAX)
+ return DEFAULT_BUFFER_SIZE;
+
+ switch (*pos) {
+ case 'k':
+ case 'K':
+ val *= KB_SIZE;
+ break;
+ case 'm':
+ case 'M':
+ val *= MB_SIZE;
+ break;
+ case 'b':
+ case 'B':
+ case '\0':
+ break;
+ default:
+ fprintf(stderr, _("Wrong buffer_size %s\n"), optarg);
+ val = -1;
+ goto out;
+ /*
+ * never touch here
+ */
+ break;
+ }
+
+ /*
+ * the buffer_size must in thec range [1KB, 64MB]
+ */
+ if (val > MAX_BUFFER_SIZE) {
+ fprintf(stderr,
+ _("Buffer_size is too large, "
+ "change it to %d bytes\n"),
+ MAX_BUFFER_SIZE);
+ val = MAX_BUFFER_SIZE;
+ }
+
+ if (val < MIN_BUFFER_SIZE) {
+ fprintf(stderr,
+ _("Buffer_size is too small, "
+ "change it to %d bytes\n"),
+ MIN_BUFFER_SIZE);
+ val = MIN_BUFFER_SIZE;
+ }
+
+ /*
+ * up-align to MIN_BUFFER_SIZE
+ */
+ val &= ~(MIN_BUFFER_SIZE - 1);
+
+out:
+ return val;
+}
+
+static int check_zero(const char *buffer, int buffer_size)
+{
+ long *wp = (long *)buffer;
+
+ while (*(wp++) == 0) {
+ if ((const char *)wp >= buffer + buffer_size)
+ break;
+ }
+
+ return (const char *)wp >= buffer + buffer_size;
+}
+
+int main (int argc, char **argv)
+{
+ int c;
+ char *buffer = NULL;
+ char *if_name = NULL;
+ char *of_name = NULL;
+ int buffer_size = DEFAULT_BUFFER_SIZE;
+ int source_fd = 0;
+ int target_fd = 0;
+ ssize_t ret = 0;
+ int need = 0;
+ int loop;
+ int err_num = 0;
+
+#ifdef ENABLE_NLS
+ setlocale(LC_MESSAGES, "");
+ setlocale(LC_CTYPE, "");
+ bindtextdomain(NLS_CAT_NAME, LOCALEDIR);
+ textdomain(NLS_CAT_NAME);
+#endif
+ fprintf (stderr, "mksparse %s (%s)\n", E2FSPROGS_VERSION,
+ E2FSPROGS_DATE);
+ if (argc && *argv)
+ program_name = *argv;
+ while ((c = getopt (argc, argv, "s:i:")) != EOF)
+ switch (c) {
+ case 's':
+ buffer_size = get_buffer_size(optarg);
+ if (buffer_size < 0)
+ return -1;
+ break;
+ case 'i':
+ if_name = optarg;
+ break;
+ default:
+ usage();
+ }
+
+ if (optind != argc - 1)
+ usage();
+
+ add_error_table(&et_ext2_error_table);
+
+ of_name = argv[optind];
+
+ if (!if_name) {
+ source_fd = 0;
+ } else {
+ source_fd = open(if_name, O_RDONLY);
+ if (source_fd < 0) {
+ com_err (program_name, errno,
+ _("while trying to open %s"), if_name);
+ err_num = OPEN_SRC_FAIL;
+ goto out;
+ }
+ }
+
+ target_fd = open(of_name, O_CREAT|O_TRUNC|O_WRONLY, 0600);
+ if (target_fd < 0) {
+ com_err (program_name, errno,
+ _("while trying to open %s"), of_name);
+ err_num = OPEN_TARGET_FAIL;
+ goto out;
+ }
+
+ buffer = malloc(buffer_size);
+ if (!buffer) {
+ com_err (program_name, ENOMEM, _("while allocating buffer"));
+ err_num = MALLOC_FAIL;
+ goto out;
+ }
+
+ loop = 1;
+ do {
+ need = buffer_size;
+ while (need > 0) {
+ ret = read (source_fd,
+ buffer + (buffer_size - need),
+ need);
+ if (ret < 0) {
+ if (loop == 0)
+ break;
+ else {
+ loop = 0;
+ continue;
+ }
+ } else if (ret == 0) {
+ loop = 0;
+ break;
+ } else {
+ if (loop == 0)
+ loop = 1;
+ need -= ret;
+ }
+ }
+
+ if (need == 0 && check_zero(buffer, buffer_size)) {
+ ret = lseek(target_fd, buffer_size, SEEK_CUR);
+ if (ret == (off_t)(-1)) {
+ com_err (program_name, errno,
+ _("while lseeking %d"), ret);
+ err_num = SEEK_FAIL;
+ goto out;
+ }
+ } else if (need < buffer_size) {
+ ret = write(target_fd, buffer, buffer_size - need);
+ if (ret < 0) {
+ com_err (program_name,
+ errno, _("while writeing"));
+ err_num = WRITE_FAIL;
+ goto out;
+ }
+ }
+ } while (loop);
+
+out:
+ if (buffer)
+ free(buffer);
+
+ if (target_fd > 0) {
+ fsync(target_fd);
+ close(target_fd);
+ }
+
+ if (source_fd > 0)
+ close(source_fd);
+
+ remove_error_table(&et_ext2_error_table);
+ return (!err_num) ? 0 : -1;
+}
--
1.7.3.5



2011-02-28 02:35:27

by Robin Dong

[permalink] [raw]
Subject: [PATCH 2/2] add manual for tool 'mksparse'

From: Robin Dong <[email protected]>

add manual for tool 'mksparse'.

Reviewed-by: Coly Li <[email protected]>
Signed-off-by: Robing Dong <[email protected]>
---
misc/Makefile.in | 5 +++
misc/mksparse.8.in | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 91 insertions(+), 0 deletions(-)
create mode 100644 misc/mksparse.8.in

diff --git a/misc/Makefile.in b/misc/Makefile.in
index fcd316e..acea909 100644
--- a/misc/Makefile.in
+++ b/misc/Makefile.in
@@ -18,6 +18,7 @@ INSTALL = @INSTALL@
@IMAGER_CMT@E2IMAGE_MAN= e2image.8

@IMAGER_CMT@MKSPARSE_PROG= mksparse
+@IMAGER_CMT@MKSPARSE_MAN= mksparse.8

@UUIDD_CMT@UUIDD_PROG= uuidd
@UUIDD_CMT@UUIDD_MAN= uuidd.8
@@ -365,6 +366,10 @@ e2image.8: $(DEP_SUBSTITUTE) $(srcdir)/e2image.8.in
$(E) " SUBST $@"
$(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e2image.8.in e2image.8

+mksparse.8: $(DEP_SUBSTITUTE) $(srcdir)/mksparse.8.in
+ $(E) " SUBST $@"
+ $(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/mksparse.8.in mksparse.8
+
e4defrag.8: $(DEP_SUBSTITUTE) $(srcdir)/e4defrag.8.in
$(E) " SUBST $@"
$(Q) $(SUBSTITUTE_UPTIME) $(srcdir)/e4defrag.8.in e4defrag.8
diff --git a/misc/mksparse.8.in b/misc/mksparse.8.in
new file mode 100644
index 0000000..8bc9955
--- /dev/null
+++ b/misc/mksparse.8.in
@@ -0,0 +1,86 @@
+.\" -*- nroff -*-
+.\" Copyright 2011 by Taobao. All Rights Reserved.
+.\" This file may be copied under the terms of the GNU Public License.
+.\"
+.TH MKSPARSE 8 "@E2FSPROGS_MONTH@ @E2FSPROGS_YEAR@" "E2fsprogs version @E2FSPROGS_VERSION@"
+.SH NAME
+mksparse \- Make sparse file from an input file or the standard input
+.SH SYNOPSIS
+.B mksparse
+[
+.B \-s
+buffer-size
+]
+[
+.B \-i
+input-file
+]
+.I sparse-file
+.SH DESCRIPTION
+The
+.B mksparse
+makes sparse file from an input file or the standard input. Currently popular
+compressing tools like bzip2 or gunzip supports compressing a sparse file,
+but in decompressing they fill zero bytes into output other than make a sparse
+hole, which usually results a much larger file. If user downloads a
+compressed raw file system image built by
+.BR e2image (8)
+on a very large file system, it is almost impossible to decompress it on a
+desktop machine.
+.PP
+.B mksparse
+can read the decompressed stream from standard input and make a sparse file
+which takes less bytes on storage media. Here is the example,
+.PP
+.br
+\ \fBe2image \-r /dev/hda1 \- | bzip2 > hda1.bz2\fR
+.PP
+\ \fBbunzip2 \-c hda1.bz2 | mksparse hda1_sparse.img\fR
+.PP
+.SH OPTIONS
+.TP
+.I "sparse-file"
+The name specified to output file.
+.TP
+.BI \-i " input-file"
+Other than from standard input,
+.B mksparse
+is also able to make a sparse file named by
+.I sparse-file
+from a regular file or device file which is specified by
+.I input-file
+.TP
+.BI \-s " buffer-size"
+By default
+.B mksparse
+reads 4KB bytes from input into its read buffer. If bytes in the buffer are
+all zero,
+.B mksparse
+makes a sparse hole to
+.I sparse-file
+by seeking
+.I buffer-size
+bytes from current possion of
+.I sparse-file
+, otherwise it writes the buffer to
+.I sparse-file
+. After a hole is made or the buffer is written,
+.B mksparse
+continues to read next
+.I buffer-size
+bytes from input file into its read buffer until the end of input. A larger
+.I buffer-size
+means better I/O performance but less chance to make more sparse space in
+.I sparse-file
+. In most cases, the default 4KB
+.I buffer-size
+works quite well.
+.SH AUTHOR
+.B mksparse
+is written by Robin Dong <[email protected]>.
+.SH AVAILABILITY
+.B mksparse
+is part of the e2fsprogs package.
+.SH SEE ALSO
+.BR e2image (8)
+
--
1.7.3.5


2011-02-28 03:09:45

by Yongqiang Yang

[permalink] [raw]
Subject: Re: [PATCH 1/2] mksparse: build sparse file from compressed e2image file.

Hi Robin,

Are there many zero-blocks in /dev/sda1 usually? If so, is there a
ratio of zero-blocks?


On Mon, Feb 28, 2011 at 10:35 AM, Robin Dong <[email protected]> wrote:
> From: Robin Dong <[email protected]>
>
> [Purpose]
> After we make a image-file by e2image like:
> #e2image -r /dev/hda1 - | bzip2 > hda1.bz2
> we copy the bz2 file to remote host and extract it:
> #bunzip2 hda1.bz2
> the unzipped hda1 file will not be a sparse file and the space occupied
> by it is as large as the real /dev/hda1 filesystem.
>
> Therefore a tool to transform a raw-file to a sparse-file is necessary.
> This Patch is a first attempt to provide such a tool which is called
> 'mksparse' so far.
>
> [Example]
> Extract hda1.bz2 by:
> #bunzip2 -c hda1.bz2 | mksparse hda1
> the hda1 file will be a sparse file.
>
> Reviewed-by: Coly Li <[email protected]>
> Signed-off-by: Robin Dong <[email protected]>
> ---
> ?misc/Makefile.in | ? 21 ++++-
> ?misc/mksparse.c ?| ?268 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> ?2 files changed, 285 insertions(+), 4 deletions(-)
> ?create mode 100644 misc/mksparse.c
>
> diff --git a/misc/Makefile.in b/misc/Makefile.in
> index 86ee53f..fcd316e 100644
> --- a/misc/Makefile.in
> +++ b/misc/Makefile.in
> @@ -17,6 +17,8 @@ INSTALL = @INSTALL@
> ?@IMAGER_CMT@E2IMAGE_PROG= e2image
> ?@IMAGER_CMT@E2IMAGE_MAN= e2image.8
>
> +@IMAGER_CMT@MKSPARSE_PROG= mksparse
> +
> ?@UUIDD_CMT@UUIDD_PROG= uuidd
> ?@UUIDD_CMT@UUIDD_MAN= uuidd.8
>
> @@ -27,7 +29,7 @@ INSTALL = @INSTALL@
> ?@BLKID_CMT@FINDFS_MAN= findfs.8
>
> ?SPROGS= ? ? ? ? ? ? ? ?mke2fs badblocks tune2fs dumpe2fs $(BLKID_PROG) logsave \
> - ? ? ? ? ? ? ? ? ? ? ? $(E2IMAGE_PROG) @FSCK_PROG@ e2undo
> + ? ? ? ? ? ? ? ? ? ? ? $(E2IMAGE_PROG) $(MKSPARSE_PROG) @FSCK_PROG@ e2undo
> ?USPROGS= ? ? ? mklost+found filefrag e2freefrag $(UUIDD_PROG) $(E4DEFRAG_PROG)
> ?SMANPAGES= ? ? tune2fs.8 mklost+found.8 mke2fs.8 dumpe2fs.8 badblocks.8 \
> ? ? ? ? ? ? ? ? ? ? ? ?e2label.8 $(FINDFS_MAN) $(BLKID_MAN) $(E2IMAGE_MAN) \
> @@ -50,6 +52,7 @@ UUIDD_OBJS= ? uuidd.o
> ?DUMPE2FS_OBJS= dumpe2fs.o
> ?BADBLOCKS_OBJS= ? ? ? ?badblocks.o
> ?E2IMAGE_OBJS= ?e2image.o
> +MKSPARSE_OBJS= mksparse.o
> ?FSCK_OBJS= ? ? fsck.o base_device.o ismounted.o
> ?BLKID_OBJS= ? ?blkid.o
> ?FILEFRAG_OBJS= filefrag.o
> @@ -68,6 +71,7 @@ PROFILED_UUIDD_OBJS= ?profiled/uuidd.o
> ?PROFILED_DUMPE2FS_OBJS= ? ? ? ?profiled/dumpe2fs.o
> ?PROFILED_BADBLOCKS_OBJS= ? ? ? profiled/badblocks.o
> ?PROFILED_E2IMAGE_OBJS= profiled/e2image.o
> +PROFILED_MKSPARSE_OBJS= ? ? ? ?profiled/mksparse.o
> ?PROFILED_FSCK_OBJS= ? ?profiled/fsck.o profiled/base_device.o \
> ? ? ? ? ? ? ? ? ? ? ? ?profiled/ismounted.o
> ?PROFILED_BLKID_OBJS= ? profiled/blkid.o
> @@ -109,7 +113,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \
> ?@PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \
> ? ? ? ?e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \
> ? ? ? ?logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \
> - ? ? ? e2image.profiled e4defrag.profiled
> + ? ? ? e2image.profiled mksparse.profiled e4defrag.profiled
>
> ?profiled:
> ?@PROFILE_CMT@ ?$(E) " ?MKDIR $@"
> @@ -187,6 +191,15 @@ e2image.profiled: $(PROFILED_E2IMAGE_OBJS) $(PROFILED_DEPLIBS)
> ? ? ? ?$(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2image.profiled \
> ? ? ? ? ? ? ? ?$(PROFILED_E2IMAGE_OBJS) $(PROFILED_LIBS) $(LIBINTL)
>
> +mksparse: $(MKSPARSE_OBJS) $(DEPLIBS)
> + ? ? ? $(E) " ?LD $@"
> + ? ? ? $(Q) $(CC) $(ALL_LDFLAGS) -o mksparse $(MKSPARSE_OBJS) $(LIBS) $(LIBINTL)
> +
> +mksparse.profiled: $(PROFILED_MKSPARSE_OBJS) $(PROFILED_DEPLIBS)
> + ? ? ? $(E) " ?LD $@"
> + ? ? ? $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o mksparse.profiled \
> + ? ? ? ? ? ? ? $(PROFILED_MKSPARSE_OBJS) $(PROFILED_LIBS) $(LIBINTL)
> +
> ?e2undo: $(E2UNDO_OBJS) $(DEPLIBS)
> ? ? ? ?$(E) " ?LD $@"
> ? ? ? ?$(Q) $(CC) $(ALL_LDFLAGS) -o e2undo $(E2UNDO_OBJS) $(LIBS) $(LIBINTL)
> @@ -550,8 +563,8 @@ clean:
> ? ? ? ? ? ? ? ?$(FMANPAGES) \
> ? ? ? ? ? ? ? ?base_device base_device.out mke2fs.static filefrag e2freefrag \
> ? ? ? ? ? ? ? ?e2initrd_helper partinfo prof_err.[ch] default_profile.c \
> - ? ? ? ? ? ? ? uuidd e2image tune2fs.static tst_ismounted fsck.profiled \
> - ? ? ? ? ? ? ? blkid.profiled tune2fs.profiled e2image.profiled \
> + ? ? ? ? ? ? ? uuidd e2image mksparse tune2fs.static tst_ismounted fsck.profiled \
> + ? ? ? ? ? ? ? blkid.profiled tune2fs.profiled e2image.profiled mksparse.profiled\
> ? ? ? ? ? ? ? ?e2undo.profiled mke2fs.profiled dumpe2fs.profiled \
> ? ? ? ? ? ? ? ?logsave.profiled filefrag.profiled uuidgen.profiled \
> ? ? ? ? ? ? ? ?uuidd.profiled e2image.profiled \
> diff --git a/misc/mksparse.c b/misc/mksparse.c
> new file mode 100644
> index 0000000..9e62fcf
> --- /dev/null
> +++ b/misc/mksparse.c
> @@ -0,0 +1,268 @@
> +/*
> + * mksparse.c --- Program which transform stdin (or file) to
> + * be a new sparse file.
> + *
> + * Copyright 2011 by Taobao, all rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * Authors: Robin Dong <[email protected]>
> + */
> +
> +#define _LARGEFILE_SOURCE
> +#define _LARGEFILE64_SOURCE
> +
> +#include <fcntl.h>
> +#include <grp.h>
> +#ifdef HAVE_GETOPT_H
> +#include <getopt.h>
> +#else
> +extern char *optarg;
> +extern int optind;
> +#endif
> +#include <stdio.h>
> +#ifdef HAVE_STDLIB_H
> +#include <stdlib.h>
> +#endif
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <limits.h>
> +
> +#include "ext2fs/ext2fs.h"
> +
> +#include "../version.h"
> +#include "nls-enable.h"
> +
> +#define KB_SIZE 1024
> +#define MB_SIZE (1024*1024)
> +#define MIN_BUFFER_SIZE 1024
> +#define MAX_BUFFER_SIZE (64*1024*1024)
> +#define DEFAULT_BUFFER_SIZE (4*1024)
> +
> +#define OPEN_SRC_FAIL ? ? ? ? ?-1
> +#define OPEN_TARGET_FAIL ? ? ? -2
> +#define MALLOC_FAIL ? ? ? ? ? ? ? ? ? ?-3
> +#define SEEK_FAIL ? ? ? ? ? ? ? ? ? ? ?-4
> +#define WRITE_FAIL ? ? ? ? ? ? ? ? ? ? -5
> +
> +const char *program_name = "mksparse";
> +
> +static void usage(void)
> +{
> + ? ? ? fprintf(stderr,
> + ? ? ? ? ? ? ? _("Usage: %s [-s buffer_size] [-i input_file] sparse_file\n"),
> + ? ? ? ? ? ? ? program_name);
> + ? ? ? exit (1);
> +}
> +
> +static int get_buffer_size(const char *optarg)
> +{
> + ? ? ? char *pos = NULL;
> + ? ? ? long val = strtol(optarg, &pos, 0);
> + ? ? ? if (pos == optarg || val == LONG_MAX)
> + ? ? ? ? ? ? ? return DEFAULT_BUFFER_SIZE;
> +
> + ? ? ? switch (*pos) {
> + ? ? ? case 'k':
> + ? ? ? case 'K':
> + ? ? ? ? ? ? ? val *= KB_SIZE;
> + ? ? ? ? ? ? ? break;
> + ? ? ? case 'm':
> + ? ? ? case 'M':
> + ? ? ? ? ? ? ? val *= MB_SIZE;
> + ? ? ? ? ? ? ? break;
> + ? ? ? case 'b':
> + ? ? ? case 'B':
> + ? ? ? case '\0':
> + ? ? ? ? ? ? ? break;
> + ? ? ? default:
> + ? ? ? ? ? ? ? fprintf(stderr, _("Wrong buffer_size %s\n"), optarg);
> + ? ? ? ? ? ? ? val = -1;
> + ? ? ? ? ? ? ? goto out;
> + ? ? ? ? ? ? ? /*
> + ? ? ? ? ? ? ? ?* never touch here
> + ? ? ? ? ? ? ? ?*/
> + ? ? ? ? ? ? ? break;
> + ? ? ? }
> +
> + ? ? ? /*
> + ? ? ? ?* the buffer_size must in thec range [1KB, 64MB]
> + ? ? ? ?*/
> + ? ? ? if (val > MAX_BUFFER_SIZE) {
> + ? ? ? ? ? ? ? fprintf(stderr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? _("Buffer_size is too large, "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "change it to %d bytes\n"),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MAX_BUFFER_SIZE);
> + ? ? ? ? ? ? ? val = MAX_BUFFER_SIZE;
> + ? ? ? }
> +
> + ? ? ? if (val < MIN_BUFFER_SIZE) {
> + ? ? ? ? ? ? ? fprintf(stderr,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? _("Buffer_size is too small, "
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? "change it to %d bytes\n"),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? MIN_BUFFER_SIZE);
> + ? ? ? ? ? ? ? val = MIN_BUFFER_SIZE;
> + ? ? ? }
> +
> + ? ? ? /*
> + ? ? ? ?* up-align to MIN_BUFFER_SIZE
> + ? ? ? ?*/
> + ? ? ? val &= ~(MIN_BUFFER_SIZE - 1);
> +
> +out:
> + ? ? ? return val;
> +}
> +
> +static int check_zero(const char *buffer, int buffer_size)
> +{
> + ? ? ? long *wp = (long *)buffer;
> +
> + ? ? ? while (*(wp++) == 0) {
> + ? ? ? ? ? ? ? if ((const char *)wp >= buffer + buffer_size)
> + ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? }
> +
> + ? ? ? return (const char *)wp >= buffer + buffer_size;
> +}
> +
> +int main (int argc, char **argv)
> +{
> + ? ? ? int c;
> + ? ? ? char *buffer = NULL;
> + ? ? ? char *if_name = NULL;
> + ? ? ? char *of_name = NULL;
> + ? ? ? int buffer_size = DEFAULT_BUFFER_SIZE;
> + ? ? ? int source_fd = 0;
> + ? ? ? int target_fd = 0;
> + ? ? ? ssize_t ret = 0;
> + ? ? ? int need = 0;
> + ? ? ? int loop;
> + ? ? ? int err_num = 0;
> +
> +#ifdef ENABLE_NLS
> + ? ? ? setlocale(LC_MESSAGES, "");
> + ? ? ? setlocale(LC_CTYPE, "");
> + ? ? ? bindtextdomain(NLS_CAT_NAME, LOCALEDIR);
> + ? ? ? textdomain(NLS_CAT_NAME);
> +#endif
> + ? ? ? fprintf (stderr, "mksparse %s (%s)\n", E2FSPROGS_VERSION,
> + ? ? ? ? ? ? ? ?E2FSPROGS_DATE);
> + ? ? ? if (argc && *argv)
> + ? ? ? ? ? ? ? program_name = *argv;
> + ? ? ? while ((c = getopt (argc, argv, "s:i:")) != EOF)
> + ? ? ? ? ? ? ? switch (c) {
> + ? ? ? ? ? ? ? case 's':
> + ? ? ? ? ? ? ? ? ? ? ? buffer_size = get_buffer_size(optarg);
> + ? ? ? ? ? ? ? ? ? ? ? if (buffer_size < 0)
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? return -1;
> + ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? ? ? ? ? case 'i':
> + ? ? ? ? ? ? ? ? ? ? ? if_name = optarg;
> + ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? ? ? ? ? default:
> + ? ? ? ? ? ? ? ? ? ? ? usage();
> + ? ? ? ? ? ? ? }
> +
> + ? ? ? if (optind != argc - 1)
> + ? ? ? ? ? ? ? usage();
> +
> + ? ? ? add_error_table(&et_ext2_error_table);
> +
> + ? ? ? of_name = argv[optind];
> +
> + ? ? ? if (!if_name) {
> + ? ? ? ? ? ? ? source_fd = 0;
> + ? ? ? } else {
> + ? ? ? ? ? ? ? source_fd = open(if_name, O_RDONLY);
> + ? ? ? ? ? ? ? if (source_fd < 0) {
> + ? ? ? ? ? ? ? ? ? ? ? com_err (program_name, errno,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? _("while trying to open %s"), if_name);
> + ? ? ? ? ? ? ? ? ? ? ? err_num = OPEN_SRC_FAIL;
> + ? ? ? ? ? ? ? ? ? ? ? goto out;
> + ? ? ? ? ? ? ? }
> + ? ? ? }
> +
> + ? ? ? target_fd = open(of_name, O_CREAT|O_TRUNC|O_WRONLY, 0600);
> + ? ? ? if (target_fd < 0) {
> + ? ? ? ? ? ? ? com_err (program_name, errno,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? _("while trying to open %s"), of_name);
> + ? ? ? ? ? ? ? err_num = OPEN_TARGET_FAIL;
> + ? ? ? ? ? ? ? goto out;
> + ? ? ? }
> +
> + ? ? ? buffer = malloc(buffer_size);
> + ? ? ? if (!buffer) {
> + ? ? ? ? ? ? ? com_err (program_name, ENOMEM, _("while allocating buffer"));
> + ? ? ? ? ? ? ? err_num = MALLOC_FAIL;
> + ? ? ? ? ? ? ? goto out;
> + ? ? ? }
> +
> + ? ? ? loop = 1;
> + ? ? ? do {
> + ? ? ? ? ? ? ? need = buffer_size;
> + ? ? ? ? ? ? ? while (need > 0) {
> + ? ? ? ? ? ? ? ? ? ? ? ret = read (source_fd,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? buffer + (buffer_size - need),
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? need);
> + ? ? ? ? ? ? ? ? ? ? ? if (ret < 0) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (loop == 0)
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? else {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? loop = 0;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? continue;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? ? ? ? ? } else if (ret == 0) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? loop = 0;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? break;
> + ? ? ? ? ? ? ? ? ? ? ? } else {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (loop == 0)
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? loop = 1;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? need -= ret;
> + ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? }
> +
> + ? ? ? ? ? ? ? if (need == 0 && check_zero(buffer, buffer_size)) {
> + ? ? ? ? ? ? ? ? ? ? ? ret = lseek(target_fd, buffer_size, SEEK_CUR);
> + ? ? ? ? ? ? ? ? ? ? ? if (ret == (off_t)(-1)) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? com_err (program_name, errno,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? _("while lseeking %d"), ret);
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? err_num = SEEK_FAIL;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto out;
> + ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? } else if (need < buffer_size) {
> + ? ? ? ? ? ? ? ? ? ? ? ret = write(target_fd, buffer, buffer_size - need);
> + ? ? ? ? ? ? ? ? ? ? ? if (ret < 0) {
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? com_err (program_name,
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?errno, _("while writeing"));
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? err_num = WRITE_FAIL;
> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto out;
> + ? ? ? ? ? ? ? ? ? ? ? }
> + ? ? ? ? ? ? ? }
> + ? ? ? } while (loop);
> +
> +out:
> + ? ? ? if (buffer)
> + ? ? ? ? ? ? ? free(buffer);
> +
> + ? ? ? if (target_fd > 0) {
> + ? ? ? ? ? ? ? fsync(target_fd);
> + ? ? ? ? ? ? ? close(target_fd);
> + ? ? ? }
> +
> + ? ? ? if (source_fd > 0)
> + ? ? ? ? ? ? ? close(source_fd);
> +
> + ? ? ? remove_error_table(&et_ext2_error_table);
> + ? ? ? return (!err_num) ? 0 : -1;
> +}
> --
> 1.7.3.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to [email protected]
> More majordomo info at ?http://vger.kernel.org/majordomo-info.html
>



--
Best Wishes
Yongqiang Yang

2011-02-28 13:24:19

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH 1/2] mksparse: build sparse file from compressed e2image file.

On Mon, 28 Feb 2011, Robin Dong wrote:

> From: Robin Dong <[email protected]>
>
> [Purpose]
> After we make a image-file by e2image like:
> #e2image -r /dev/hda1 - | bzip2 > hda1.bz2
> we copy the bz2 file to remote host and extract it:
> #bunzip2 hda1.bz2
> the unzipped hda1 file will not be a sparse file and the space occupied
> by it is as large as the real /dev/hda1 filesystem.
>
> Therefore a tool to transform a raw-file to a sparse-file is necessary.
> This Patch is a first attempt to provide such a tool which is called
> 'mksparse' so far.
>
> [Example]
> Extract hda1.bz2 by:
> #bunzip2 -c hda1.bz2 | mksparse hda1
> the hda1 file will be a sparse file.

Hi Robin,

I am working on QCOW2 support for e2image, just so we do not need to
handle sparse files while moving the image around. You can see patches
here:

http://www.spinics.net/lists/linux-ext4/msg23389.html

at this point it is usable and should work without any problems. You can
create qcow2 image like this:

e2image -Q /dev/hda1 image.qcow2

and convert it back to the raw image like this:

e2image -r image.qcow2 image.raw

So far there is no real support for e2fsck to check qcow2 image directly
without the need to convert it into raw image, but I do not think it is
worth the work (but I might be wrong). Also if you do not want to
convert qcow2 image into raw, you can use qemu-nbd to use it directly:

modprobe nbd max_part=8
qemu-nbd --connect=/dev/nbd0 ./image.qcow2
fsck.ext4 -f /dev/nbd0
qemu-nbd --disconnect /dev/nbd0

Thanks!
-Lukas

>
> Reviewed-by: Coly Li <[email protected]>
> Signed-off-by: Robin Dong <[email protected]>
> ---
> misc/Makefile.in | 21 ++++-
> misc/mksparse.c | 268 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 285 insertions(+), 4 deletions(-)
> create mode 100644 misc/mksparse.c
>
> diff --git a/misc/Makefile.in b/misc/Makefile.in
> index 86ee53f..fcd316e 100644
> --- a/misc/Makefile.in
> +++ b/misc/Makefile.in
> @@ -17,6 +17,8 @@ INSTALL = @INSTALL@
> @IMAGER_CMT@E2IMAGE_PROG= e2image
> @IMAGER_CMT@E2IMAGE_MAN= e2image.8
>
> +@IMAGER_CMT@MKSPARSE_PROG= mksparse
> +
> @UUIDD_CMT@UUIDD_PROG= uuidd
> @UUIDD_CMT@UUIDD_MAN= uuidd.8
>
> @@ -27,7 +29,7 @@ INSTALL = @INSTALL@
> @BLKID_CMT@FINDFS_MAN= findfs.8
>
> SPROGS= mke2fs badblocks tune2fs dumpe2fs $(BLKID_PROG) logsave \
> - $(E2IMAGE_PROG) @FSCK_PROG@ e2undo
> + $(E2IMAGE_PROG) $(MKSPARSE_PROG) @FSCK_PROG@ e2undo
> USPROGS= mklost+found filefrag e2freefrag $(UUIDD_PROG) $(E4DEFRAG_PROG)
> SMANPAGES= tune2fs.8 mklost+found.8 mke2fs.8 dumpe2fs.8 badblocks.8 \
> e2label.8 $(FINDFS_MAN) $(BLKID_MAN) $(E2IMAGE_MAN) \
> @@ -50,6 +52,7 @@ UUIDD_OBJS= uuidd.o
> DUMPE2FS_OBJS= dumpe2fs.o
> BADBLOCKS_OBJS= badblocks.o
> E2IMAGE_OBJS= e2image.o
> +MKSPARSE_OBJS= mksparse.o
> FSCK_OBJS= fsck.o base_device.o ismounted.o
> BLKID_OBJS= blkid.o
> FILEFRAG_OBJS= filefrag.o
> @@ -68,6 +71,7 @@ PROFILED_UUIDD_OBJS= profiled/uuidd.o
> PROFILED_DUMPE2FS_OBJS= profiled/dumpe2fs.o
> PROFILED_BADBLOCKS_OBJS= profiled/badblocks.o
> PROFILED_E2IMAGE_OBJS= profiled/e2image.o
> +PROFILED_MKSPARSE_OBJS= profiled/mksparse.o
> PROFILED_FSCK_OBJS= profiled/fsck.o profiled/base_device.o \
> profiled/ismounted.o
> PROFILED_BLKID_OBJS= profiled/blkid.o
> @@ -109,7 +113,7 @@ all:: profiled $(SPROGS) $(UPROGS) $(USPROGS) $(SMANPAGES) $(UMANPAGES) \
> @PROFILE_CMT@all:: tune2fs.profiled blkid.profiled e2image.profiled \
> e2undo.profiled mke2fs.profiled dumpe2fs.profiled fsck.profiled \
> logsave.profiled filefrag.profiled uuidgen.profiled uuidd.profiled \
> - e2image.profiled e4defrag.profiled
> + e2image.profiled mksparse.profiled e4defrag.profiled
>
> profiled:
> @PROFILE_CMT@ $(E) " MKDIR $@"
> @@ -187,6 +191,15 @@ e2image.profiled: $(PROFILED_E2IMAGE_OBJS) $(PROFILED_DEPLIBS)
> $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o e2image.profiled \
> $(PROFILED_E2IMAGE_OBJS) $(PROFILED_LIBS) $(LIBINTL)
>
> +mksparse: $(MKSPARSE_OBJS) $(DEPLIBS)
> + $(E) " LD $@"
> + $(Q) $(CC) $(ALL_LDFLAGS) -o mksparse $(MKSPARSE_OBJS) $(LIBS) $(LIBINTL)
> +
> +mksparse.profiled: $(PROFILED_MKSPARSE_OBJS) $(PROFILED_DEPLIBS)
> + $(E) " LD $@"
> + $(Q) $(CC) $(ALL_LDFLAGS) -g -pg -o mksparse.profiled \
> + $(PROFILED_MKSPARSE_OBJS) $(PROFILED_LIBS) $(LIBINTL)
> +
> e2undo: $(E2UNDO_OBJS) $(DEPLIBS)
> $(E) " LD $@"
> $(Q) $(CC) $(ALL_LDFLAGS) -o e2undo $(E2UNDO_OBJS) $(LIBS) $(LIBINTL)
> @@ -550,8 +563,8 @@ clean:
> $(FMANPAGES) \
> base_device base_device.out mke2fs.static filefrag e2freefrag \
> e2initrd_helper partinfo prof_err.[ch] default_profile.c \
> - uuidd e2image tune2fs.static tst_ismounted fsck.profiled \
> - blkid.profiled tune2fs.profiled e2image.profiled \
> + uuidd e2image mksparse tune2fs.static tst_ismounted fsck.profiled \
> + blkid.profiled tune2fs.profiled e2image.profiled mksparse.profiled\
> e2undo.profiled mke2fs.profiled dumpe2fs.profiled \
> logsave.profiled filefrag.profiled uuidgen.profiled \
> uuidd.profiled e2image.profiled \
> diff --git a/misc/mksparse.c b/misc/mksparse.c
> new file mode 100644
> index 0000000..9e62fcf
> --- /dev/null
> +++ b/misc/mksparse.c
> @@ -0,0 +1,268 @@
> +/*
> + * mksparse.c --- Program which transform stdin (or file) to
> + * be a new sparse file.
> + *
> + * Copyright 2011 by Taobao, all rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * General Public License for more details.
> + *
> + * Authors: Robin Dong <[email protected]>
> + */
> +
> +#define _LARGEFILE_SOURCE
> +#define _LARGEFILE64_SOURCE
> +
> +#include <fcntl.h>
> +#include <grp.h>
> +#ifdef HAVE_GETOPT_H
> +#include <getopt.h>
> +#else
> +extern char *optarg;
> +extern int optind;
> +#endif
> +#include <stdio.h>
> +#ifdef HAVE_STDLIB_H
> +#include <stdlib.h>
> +#endif
> +#include <string.h>
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <limits.h>
> +
> +#include "ext2fs/ext2fs.h"
> +
> +#include "../version.h"
> +#include "nls-enable.h"
> +
> +#define KB_SIZE 1024
> +#define MB_SIZE (1024*1024)
> +#define MIN_BUFFER_SIZE 1024
> +#define MAX_BUFFER_SIZE (64*1024*1024)
> +#define DEFAULT_BUFFER_SIZE (4*1024)
> +
> +#define OPEN_SRC_FAIL -1
> +#define OPEN_TARGET_FAIL -2
> +#define MALLOC_FAIL -3
> +#define SEEK_FAIL -4
> +#define WRITE_FAIL -5
> +
> +const char *program_name = "mksparse";
> +
> +static void usage(void)
> +{
> + fprintf(stderr,
> + _("Usage: %s [-s buffer_size] [-i input_file] sparse_file\n"),
> + program_name);
> + exit (1);
> +}
> +
> +static int get_buffer_size(const char *optarg)
> +{
> + char *pos = NULL;
> + long val = strtol(optarg, &pos, 0);
> + if (pos == optarg || val == LONG_MAX)
> + return DEFAULT_BUFFER_SIZE;
> +
> + switch (*pos) {
> + case 'k':
> + case 'K':
> + val *= KB_SIZE;
> + break;
> + case 'm':
> + case 'M':
> + val *= MB_SIZE;
> + break;
> + case 'b':
> + case 'B':
> + case '\0':
> + break;
> + default:
> + fprintf(stderr, _("Wrong buffer_size %s\n"), optarg);
> + val = -1;
> + goto out;
> + /*
> + * never touch here
> + */
> + break;
> + }
> +
> + /*
> + * the buffer_size must in thec range [1KB, 64MB]
> + */
> + if (val > MAX_BUFFER_SIZE) {
> + fprintf(stderr,
> + _("Buffer_size is too large, "
> + "change it to %d bytes\n"),
> + MAX_BUFFER_SIZE);
> + val = MAX_BUFFER_SIZE;
> + }
> +
> + if (val < MIN_BUFFER_SIZE) {
> + fprintf(stderr,
> + _("Buffer_size is too small, "
> + "change it to %d bytes\n"),
> + MIN_BUFFER_SIZE);
> + val = MIN_BUFFER_SIZE;
> + }
> +
> + /*
> + * up-align to MIN_BUFFER_SIZE
> + */
> + val &= ~(MIN_BUFFER_SIZE - 1);
> +
> +out:
> + return val;
> +}
> +
> +static int check_zero(const char *buffer, int buffer_size)
> +{
> + long *wp = (long *)buffer;
> +
> + while (*(wp++) == 0) {
> + if ((const char *)wp >= buffer + buffer_size)
> + break;
> + }
> +
> + return (const char *)wp >= buffer + buffer_size;
> +}
> +
> +int main (int argc, char **argv)
> +{
> + int c;
> + char *buffer = NULL;
> + char *if_name = NULL;
> + char *of_name = NULL;
> + int buffer_size = DEFAULT_BUFFER_SIZE;
> + int source_fd = 0;
> + int target_fd = 0;
> + ssize_t ret = 0;
> + int need = 0;
> + int loop;
> + int err_num = 0;
> +
> +#ifdef ENABLE_NLS
> + setlocale(LC_MESSAGES, "");
> + setlocale(LC_CTYPE, "");
> + bindtextdomain(NLS_CAT_NAME, LOCALEDIR);
> + textdomain(NLS_CAT_NAME);
> +#endif
> + fprintf (stderr, "mksparse %s (%s)\n", E2FSPROGS_VERSION,
> + E2FSPROGS_DATE);
> + if (argc && *argv)
> + program_name = *argv;
> + while ((c = getopt (argc, argv, "s:i:")) != EOF)
> + switch (c) {
> + case 's':
> + buffer_size = get_buffer_size(optarg);
> + if (buffer_size < 0)
> + return -1;
> + break;
> + case 'i':
> + if_name = optarg;
> + break;
> + default:
> + usage();
> + }
> +
> + if (optind != argc - 1)
> + usage();
> +
> + add_error_table(&et_ext2_error_table);
> +
> + of_name = argv[optind];
> +
> + if (!if_name) {
> + source_fd = 0;
> + } else {
> + source_fd = open(if_name, O_RDONLY);
> + if (source_fd < 0) {
> + com_err (program_name, errno,
> + _("while trying to open %s"), if_name);
> + err_num = OPEN_SRC_FAIL;
> + goto out;
> + }
> + }
> +
> + target_fd = open(of_name, O_CREAT|O_TRUNC|O_WRONLY, 0600);
> + if (target_fd < 0) {
> + com_err (program_name, errno,
> + _("while trying to open %s"), of_name);
> + err_num = OPEN_TARGET_FAIL;
> + goto out;
> + }
> +
> + buffer = malloc(buffer_size);
> + if (!buffer) {
> + com_err (program_name, ENOMEM, _("while allocating buffer"));
> + err_num = MALLOC_FAIL;
> + goto out;
> + }
> +
> + loop = 1;
> + do {
> + need = buffer_size;
> + while (need > 0) {
> + ret = read (source_fd,
> + buffer + (buffer_size - need),
> + need);
> + if (ret < 0) {
> + if (loop == 0)
> + break;
> + else {
> + loop = 0;
> + continue;
> + }
> + } else if (ret == 0) {
> + loop = 0;
> + break;
> + } else {
> + if (loop == 0)
> + loop = 1;
> + need -= ret;
> + }
> + }
> +
> + if (need == 0 && check_zero(buffer, buffer_size)) {
> + ret = lseek(target_fd, buffer_size, SEEK_CUR);
> + if (ret == (off_t)(-1)) {
> + com_err (program_name, errno,
> + _("while lseeking %d"), ret);
> + err_num = SEEK_FAIL;
> + goto out;
> + }
> + } else if (need < buffer_size) {
> + ret = write(target_fd, buffer, buffer_size - need);
> + if (ret < 0) {
> + com_err (program_name,
> + errno, _("while writeing"));
> + err_num = WRITE_FAIL;
> + goto out;
> + }
> + }
> + } while (loop);
> +
> +out:
> + if (buffer)
> + free(buffer);
> +
> + if (target_fd > 0) {
> + fsync(target_fd);
> + close(target_fd);
> + }
> +
> + if (source_fd > 0)
> + close(source_fd);
> +
> + remove_error_table(&et_ext2_error_table);
> + return (!err_num) ? 0 : -1;
> +}
>

--

2011-02-28 18:26:07

by Theodore Ts'o

[permalink] [raw]
Subject: Re: [PATCH 1/2] mksparse: build sparse file from compressed e2image file.

On Mon, Feb 28, 2011 at 10:35:13AM +0800, Robin Dong wrote:
> From: Robin Dong <[email protected]>
>
> [Purpose]
> After we make a image-file by e2image like:
> #e2image -r /dev/hda1 - | bzip2 > hda1.bz2
> we copy the bz2 file to remote host and extract it:
> #bunzip2 hda1.bz2
> the unzipped hda1 file will not be a sparse file and the space occupied
> by it is as large as the real /dev/hda1 filesystem.
>
> Therefore a tool to transform a raw-file to a sparse-file is necessary.
> This Patch is a first attempt to provide such a tool which is called
> 'mksparse' so far.

I guess you didn't find /usr/src/e2fsprogs/contrib/make-sparse.c file
that I had written a while back. I never did write a man page or
usage manual, or the rest of the niceties that go into a supported
program, but it's there. :-)

I've never been completely convinced this was functionality was one
that should be included in the e2fsprogs as a supported program and
shipped with distributions. It is useful, though, and I've certainly
used make-sparse.c many times in the past, so I'm willing to be
convinced otherwise.

- Ted