2008-10-24 10:10:26

by Akira Fujita

[permalink] [raw]
Subject: [RFC][PATCH 9/9]ext4: online defrag command

ext4: online defrag -- Online defrag command

From: Akira Fujita <[email protected]>

- The defrag command. Usage is as follows:
- Put the multiple files closer together.
# e4defrag -r directory-name
- Defrag for free space fragmentation.
# e4defrag -f file-name
- Defrag for a single file.
# e4defrag file-name
- Defrag for all files on ext4.
# e4defrag device-name

Signed-off-by: Akira Fujita <[email protected]>
Signed-off-by: Takashi Sato <[email protected]>
---
/*
* e4defrag.c - ext4 filesystem defragmenter
*
* Copyright (C) 2008 NEC Software Tohoku, Ltd.
*
* Author: Akira Fujita <[email protected]>
* Takashi Sato <[email protected]>
*/

#ifndef _LARGEFILE_SOURCE
#define _LARGEFILE_SOURCE
#endif

#ifndef _LARGEFILE64_SOURCE
#define _LARGEFILE64_SOURCE
#endif

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include <ftw.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
#include <limits.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/statfs.h>
#include <sys/vfs.h>
#include <sys/ioctl.h>
#include <mntent.h>
#include <linux/fs.h>
#include <ctype.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <ext2fs/ext2_types.h>
#include <endian.h>

/* Ioctl command */
#define EXT4_IOC_DEFRAG _IOW('f', 15, struct ext4_ext_defrag_data)
#define EXT4_IOC_SUPER_INFO _IOR('f', 16, struct ext4_super_block)
#define EXT4_IOC_FREE_BLOCKS_INFO _IOWR('f', 17, struct ext4_extents_info)
#define EXT4_IOC_FIEMAP_INO _IOWR('f', 18, struct fiemap_ino)
#define EXT4_IOC_MOVE_VICTIM _IOW('f', 19, struct ext4_extents_info)
#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)

/* Macro functions */
#define PRINT_ERR_MSG(msg) fprintf(stderr, "%s\n", (msg));
#define IN_FTW_PRINT_ERR_MSG(msg) \
fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg));
#define PRINT_FILE_NAME(file) fprintf(stderr, " \"%s\"\n", (file));
#define PRINT_ERR_MSG_WITH_ERRNO(msg) \
fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno));
#define min(x, y) (((x) > (y)) ? (y) : (x))
/* Wrap up the free function */
#define FREE(tmp) \
do { \
if (tmp) \
free(tmp); \
} while (0) \
/* Insert list2 after list1 */
#define insert(list1, list2) \
do { \
list2->next = list1->next; \
list1->next->prev = list2; \
list2->prev = list1; \
list1->next = list2; \
} while (0)

#ifndef __NR_fadvise64
#define __NR_fadvise64 250
#endif

#ifndef __NR_sync_file_range
#define __NR_sync_file_range 314
#endif

#ifndef POSIX_FADV_DONTNEED
#if defined(__s390x__)
#define POSIX_FADV_DONTNEED 6 /* Don't need these pages */
#else
#define POSIX_FADV_DONTNEED 4 /* Don't need these pages */
#endif
#endif

#ifndef SYNC_FILE_RANGE_WAIT_BEFORE
#define SYNC_FILE_RANGE_WAIT_BEFORE 1
#endif
#ifndef SYNC_FILE_RANGE_WRITE
#define SYNC_FILE_RANGE_WRITE 2
#endif
#ifndef SYNC_FILE_RANGE_WAIT_AFTER
#define SYNC_FILE_RANGE_WAIT_AFTER 4
#endif

#define DEVNAME 0
#define DIRNAME 1
#define FILENAME 2

#define RETURN_OK 0
#define RETURN_NG -1
#define FTW_CONT 0
#define FTW_OPEN_FD 2000

#define FS_EXT4 "ext4"
#define ROOT_UID 0
#define CHECK_FRAG_COUNT 1

/* Extent status which are used in extent_t */
#define EXT4_EXT_USE 0
#define EXT4_EXT_FREE 1

/* The maximum number of extents for exchanging between
* kernel-space and user-space per ioctl
*/
#define DEFRAG_MAX_ENT 32

/* The phase of force defrag mode */
#define DEFRAG_FORCE_TRY 1
#define DEFRAG_FORCE_GATHER 3

/* Magic number for ext4 */
#define EXT4_SUPER_MAGIC 0xEF53

/* Defrag size(64MB) per ioctl(not including force mode) */
#define DEFRAG_SIZE ((unsigned long)1 << 26)

/* Force defrag mode: Max file size in bytes (128MB) */
#define MAX_FILE_SIZE ((unsigned long)1 << 27)


/* The following four macros are used for ioctl FS_IOC_FIEMAP
* FIEMAP_FLAG_SYNC: sync file data before map.
* FIEMAP_EXTENT_LAST: last extent in file.
* FIEMAP_MAX_OFFSET: max file offset.
* EXTENT_MAX_COUNT: the maximum number of extents for exchanging between
kernel-space and user-space per ioctl
*/
#define FIEMAP_FLAG_SYNC 0x00000001
#define FIEMAP_EXTENT_LAST 0x00000001
#define FIEMAP_MAX_OFFSET (~0ULL)
#define EXTENT_MAX_COUNT 512

/* The following macros are error message */
#define MSG_USAGE \
"Usage : e4defrag [-v] file...| directory...| device...\n\
: e4defrag -f file [blocknr] \n\
: e4defrag -r directory... | device... \n"

#define MSG_R_OPTION " with regional block allocation mode.\n"
#define NGMSG_MTAB "Can not access /etc/mtab"
#define NGMSG_UNMOUNT "FS is not mounted"
#define NGMSG_EXT4 "FS is not ext4 File System"
#define NGMSG_FS_INFO "Get FSInfo fail"
#define NGMSG_FILE_INFO "Get FileInfo fail"
#define NGMSG_FILE_OPEN "Open fail"
#define NGMSG_FILE_SYNC "Sync(fsync) fail"
#define NGMSG_FILE_DEFRAG "Defrag fail"
#define NGMSG_FILE_BLOCKSIZE "Can't get blocksize"
#define NGMSG_FILE_SUPER "Can't get super block info"
#define NGMSG_FILE_UNREG "File is not regular file"
#define NGMSG_VICTIM_UID "Victim file is not current user's file"

#define NGMSG_FILE_LARGE \
"Defrag size is larger than FileSystem's free space"

#define NGMSG_FILE_PRIORITY \
"File is not current user's file or current user is not root"

#define NGMSG_FILE_LOCK "File is locked"
#define NGMSG_FILE_BLANK "File size is 0"
#define NGMSG_GET_LCKINFO "Get LockInfo fail"
#define NGMSG_TYPE \
"Can not process %s in regional mode.\n"
#define NGMSG_LOST_FOUND "Can not process \"lost+found\""
#define NGMSG_REALPATH "Can not get full path"
#define NGMSG_FILE_MAP "Get file map fail"
#define NGMSG_FILE_DROP_BUFFER "Free page fail"
#define NGMSG_FADVISE_SYSCALL "\tFadvise fail"
#define NGMSG_FORCE_DEFRAG_SIZE \
"Cannot specify a file larger than %luMB in force defrag mode"
#define NGMSG_ENDIAN "Endian type is not big/little endian"

/* Data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t;

/* Data type for file logical block number */
typedef unsigned int ext4_lblk_t;

/* Data type for block offset of block group */
typedef int ext4_grpblk_t;

struct ext4_extent_data {
ext4_lblk_t block; /* start logical block number */
ext4_fsblk_t start; /* start physical block number */
int len; /* blocks count */
};

/* Used for defrag */
struct ext4_ext_defrag_data {
ext4_lblk_t start_offset; /* start offset to defrag in blocks */
ext4_lblk_t defrag_size; /* size of defrag in blocks */
ext4_fsblk_t goal; /* block offset for allocation */
int flag; /* free space mode flag */
struct ext4_extent_data ext;
};

typedef __u16 __le16;
typedef __u32 __le32;
typedef __u64 __le64;

/*
* Structure of the super block
*/
struct ext4_super_block {
/*00*/ __le32 s_inodes_count; /* Inodes count */
__le32 s_blocks_count_lo; /* Blocks count */
__le32 s_r_blocks_count_lo; /* Reserved blocks count */
__le32 s_free_blocks_count_lo; /* Free blocks count */
/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
__le32 s_first_data_block; /* First Data Block */
__le32 s_log_block_size; /* Block size */
__le32 s_obso_log_frag_size; /* Obsoleted fragment size */
/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
__le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
__le32 s_inodes_per_group; /* # Inodes per group */
__le32 s_mtime; /* Mount time */
/*30*/ __le32 s_wtime; /* Write time */
__le16 s_mnt_count; /* Mount count */
__le16 s_max_mnt_count; /* Maximal mount count */
__le16 s_magic; /* Magic signature */
__le16 s_state; /* File system state */
__le16 s_errors; /* Behaviour when detecting errors */
__le16 s_minor_rev_level; /* minor revision level */
/*40*/ __le32 s_lastcheck; /* time of last check */
__le32 s_checkinterval; /* max. time between checks */
__le32 s_creator_os; /* OS */
__le32 s_rev_level; /* Revision level */
/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
__le16 s_def_resgid; /* Default gid for reserved blocks */
/*
* These fields are for EXT4_DYNAMIC_REV superblocks only.
*
* Note: the difference between the compatible feature set and
* the incompatible feature set is that if there is a bit set
* in the incompatible feature set that the kernel doesn't
* know about, it should refuse to mount the filesystem.
*
* e2fsck's requirements are more strict; if it doesn't know
* about a feature in either the compatible or incompatible
* feature set, it must abort and not try to meddle with
* things it doesn't understand...
*/
__le32 s_first_ino; /* First non-reserved inode */
__le16 s_inode_size; /* size of inode structure */
__le16 s_block_group_nr; /* block group # of this superblock */
__le32 s_feature_compat; /* compatible feature set */
/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
__le32 s_feature_ro_compat; /* readonly-compatible feature set */
/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
/*78*/ char s_volume_name[16]; /* volume name */
/*88*/ char s_last_mounted[64]; /* directory where last mounted */
/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
/*
* Performance hints. Directory preallocation should only
* happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
*/
__u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
__u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
__le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
/*
* Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
*/
/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
__le32 s_journal_dev; /* device number of journal file */
__le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */
__u8 s_reserved_char_pad;
__le16 s_desc_size; /* size of group descriptor */
/*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */
/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
__le16 s_min_extra_isize; /* All inodes have at least # bytes */
__le16 s_want_extra_isize; /* New inodes should reserve # bytes */
__le32 s_flags; /* Miscellaneous flags */
__le16 s_raid_stride; /* RAID stride */
__le16 s_mmp_interval; /* # seconds to wait in MMP checking */
__le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_reserved_char_pad2;
__le16 s_reserved_pad;
__u32 s_reserved[162]; /* Padding to the end of the block */
};

struct ext4_extents_info {
unsigned long long ino; /* inode number */
int max_entries; /* maximum extents count */
int entries; /* extent number/count */
ext4_lblk_t f_offset; /* file offset */
ext4_grpblk_t g_offset; /* group offset */
ext4_fsblk_t goal;
struct ext4_extent_data ext[DEFRAG_MAX_ENT];
};

typedef struct extent {
struct extent *prev;
unsigned long tag; /* extent status */
unsigned long ino; /* file's inode number */
struct ext4_extent_data data; /* extent belong to file */
struct extent *next;
} extent_t;

typedef struct exts_group {
struct exts_group *prev;
extent_t *start; /* start ext */
extent_t *end; /* end ext */
int len; /* length of this continuous region */
struct exts_group *next;
} exts_group_t;

struct fiemap_extent {
__u64 fe_logical; /* logical offset in bytes for the start of
* the extent from the beginning of the file */
__u64 fe_physical; /* physical offset in bytes for the start
* of the extent from the beginning
* of the disk */
__u64 fe_length; /* length in bytes for this extent */
__u64 fe_reserved64[2];
__u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
__u32 fe_reserved[3];
};

struct fiemap {
__u64 fm_start; /* logical offset (inclusive) at
* which to start mapping (in) */
__u64 fm_length; /* logical length of mapping which
* userspace wants (in) */
__u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
__u32 fm_mapped_extents;/* number of extents that were mapped (out) */
__u32 fm_extent_count; /* size of fm_extents array (in) */
__u32 fm_reserved;
struct fiemap_extent fm_extents[0];/* array of mapped extents (out) */
};

struct fiemap_ino {
__u64 ino;
struct fiemap fiemap;
};

int force_flag;
int detail_flag;
int regional_flag;
int extents_before_defrag;
int extents_after_defrag;
unsigned long succeed_cnt;
unsigned long regular_count;
unsigned long total_count;
unsigned long frag_files_before_defrag;
unsigned long frag_files_after_defrag;
unsigned long defraged_file_count;
char lost_found_dir[PATH_MAX + 1];
ext4_fsblk_t goal;
ext4_fsblk_t fgoal = -1;

/*
* ext2fs_swab32() - Change endian.
*
* @val: the entry used for change.
*/
__u32 ext2fs_swab32(__u32 val)
{
#if BYTE_ORDER == BIG_ENDIAN
return (val>>24) | ((val>>8)&0xFF00) |
((val<<8)&0xFF0000) | (val<<24);
#else
return val;
#endif
}

/*
* fadvise() - Give advice about file access.
*
* @fd: the file's descriptor.
* @offset: file offset.
* @len: area length.
* @advise: process flag.
*/
int fadvise(int fd, loff_t offset, size_t len, int advise)
{
return syscall(__NR_fadvise64, fd, offset, len, advise);
}

/*
* sync_file_range() - Sync file region.
*
* @fd: the file's descriptor.
* @offset: file offset.
* @length: area length.
* @flag: process flag.
*/
int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag)
{
return syscall(__NR_sync_file_range, fd, offset, length, flag);
}

/*
* page_in_core() - Get information on whether pages are in core.
*
* @fd: the file's descriptor.
* @defrag_data: data used for defrag.
* @vec: page state array.
* @page_num: page number.
*/
int page_in_core(int fd, struct ext4_ext_defrag_data defrag_data,
unsigned char **vec, unsigned long *page_num)
{
int blocksize;
int pagesize = getpagesize();
void *page = NULL;
loff_t offset, end_offset, length;

if (vec == NULL || *vec != NULL)
return RETURN_NG;

if (ioctl(fd, FIGETBSZ, &blocksize) < 0)
return RETURN_NG;

/* In mmap, offset should be a multiple of the page size */
offset = defrag_data.start_offset * blocksize;
length = defrag_data.defrag_size * blocksize;
end_offset = offset + length;
/* Round the offset down to the nearest multiple of pagesize */
offset = (offset / pagesize) * pagesize;
length = end_offset - offset;

page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset);
if (page == MAP_FAILED)
return RETURN_NG;

*page_num = 0;
*page_num = (length + pagesize - 1) / pagesize;
*vec = (unsigned char *)calloc(*page_num, 1);
if (*vec == NULL)
return RETURN_NG;

/* Get information on whether pages are in core */
if (mincore(page, (size_t)length, *vec) == -1 ||
munmap(page, length) == -1) {
FREE(*vec);
return RETURN_NG;
}

return RETURN_OK;
}

/*
* defrag_fadvise() - Predeclare an access pattern for file data.
*
* @fd: the file's descriptor.
* @defrag_data: data used for defrag.
* @vec: page state array.
* @page_num: page number.
*/
int defrag_fadvise(int fd, struct ext4_ext_defrag_data defrag_data,
unsigned char *vec, unsigned long page_num)
{
int flag = 1;
int blocksize;
int pagesize = getpagesize();
int fadvise_flag = POSIX_FADV_DONTNEED;
int sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE|
SYNC_FILE_RANGE_WAIT_AFTER;
unsigned long i;
loff_t offset;

if (ioctl(fd, FIGETBSZ, &blocksize) < 0)
return RETURN_NG;

offset = (loff_t)defrag_data.start_offset * blocksize;
offset = (offset / pagesize) * pagesize;

/* Sync file for fadvise process */
if (sync_file_range(fd, offset,
(loff_t)pagesize*page_num, sync_flag) != 0)
return RETURN_NG;

/* Try to release buffer cache which this process used,
* then other process can use the released buffer
*/
for (i = 0; i < page_num; i++) {
if ((vec[i] & 0x1) == 0) {
offset += pagesize;
continue;
}
if (fadvise(fd, offset, pagesize, fadvise_flag) != 0) {
if (detail_flag && flag) {
perror(NGMSG_FADVISE_SYSCALL);
flag = 0;
}
}
offset += pagesize;
}

return RETURN_OK;
}

/*
* check_free_size() - Check if there's enough disk space.
*
* @fd: the file's descriptor.
* @file_name file name.
* @buf: the pointer of the struct stat64.
*/
int check_free_size(int fd, const char *file_name, const struct stat64 *buf)
{
off64_t size = 0;
off64_t free_size = 0;
struct statfs fsbuf;

/* Target file size */
size = buf->st_size;

if (fstatfs(fd, &fsbuf) < 0) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FS_INFO);
}
return RETURN_NG;
}

/* Compute free space for root and normal user separately */
if (getuid() == ROOT_UID)
free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bfree;
else
free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bavail;

if (free_size >= size)
return RETURN_OK;

if (detail_flag) {
PRINT_FILE_NAME(file_name);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LARGE);
}
return RETURN_NG;
}

int file_check(int fd, const struct stat64 *buf, const char *file_name);
int force_defrag(int fd, const char *file_name,
const struct stat64 *buf, int blocksize);

/*
* file_frag_count() - Get file fragment count.
*
* @fd: the file's descriptor.
*/
long long file_frag_count(int fd)
{
int ret = RETURN_NG;
struct fiemap file_extent_map;

/* When fm_extent_count is 0,
* ioctl just get file fragment count.
*/
memset(&file_extent_map, 0, sizeof(struct fiemap));
file_extent_map.fm_start = 0;
file_extent_map.fm_length = FIEMAP_MAX_OFFSET;
file_extent_map.fm_flags |= FIEMAP_FLAG_SYNC;

ret = ioctl(fd, FS_IOC_FIEMAP, &file_extent_map);
if (ret < 0)
return RETURN_NG;

return file_extent_map.fm_mapped_extents;
}

/*
* ftw_fn() - Check file attributes and ioctl call to avoid.
* illegal operations.
*
* @file: the file's name.
* @buf: the pointer of the struct stat64.
* @flag: file type.
* @ftwbuf: the pointer of a struct FTW.
*/
int ftw_fn(const char *file, const struct stat64 *buf, int flag,
struct FTW *ftwbuf)
{
int fd;
int blocksize;
int percent = 0;
int defrag_fail = 0;
int defraged_size = 0;
int ret = RETURN_NG;
long long file_frags_start, file_frags_end;
unsigned long page_num;
unsigned char *vec = NULL;
loff_t start = 0;
struct ext4_ext_defrag_data df_data;

defraged_file_count++;

if (detail_flag) {
printf("[%lu/%lu]", defraged_file_count, total_count);
fflush(stdout);
}

if (lost_found_dir[0] != '\0' &&
!memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) {
if (detail_flag) {
PRINT_FILE_NAME(file);
IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND);
}
return FTW_CONT;
}

if (flag != FTW_F) {
if (detail_flag) {
PRINT_FILE_NAME(file);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
}
return FTW_CONT;
}

fd = open64(file, O_RDONLY);
if (fd < 0) {
if (detail_flag) {
PRINT_FILE_NAME(file);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN);
}
return FTW_CONT;
}

if (file_check(fd, buf, file) == RETURN_NG)
goto out;

if (fsync(fd) < 0) {
if (detail_flag) {
PRINT_FILE_NAME(file);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC);
}
goto out;
}
/* Get blocksize */
if (ioctl(fd, FIGETBSZ, &blocksize) < 0) {
if (detail_flag) {
PRINT_FILE_NAME(file);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE);
}
goto out;
}
/* Ioctl call does the actual defragment job */
df_data.start_offset = 0;
df_data.goal = goal;
df_data.ext.len = 0;
df_data.flag = force_flag;

/* Count file fragments before defrag */
if (detail_flag) {
file_frags_start = file_frag_count(fd);
if (file_frags_start == RETURN_NG) {
PRINT_FILE_NAME(file);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
goto out;
}

if (file_frags_start != 1)
frag_files_before_defrag++;

extents_before_defrag += file_frags_start;
}

/* Print process progress */
percent = (start * 100) / buf->st_size;
printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%",
defraged_file_count, total_count, file, min(percent, 100));
fflush(stdout);

while (1) {
df_data.defrag_size = (min((buf->st_size - start),
DEFRAG_SIZE) + blocksize - 1) / blocksize;
ret = page_in_core(fd, df_data, &vec, &page_num);
if (ret == RETURN_NG) {
if (detail_flag) {
printf("\n");
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_MAP);
} else {
printf("\t[ NG ]\n");
}
defrag_fail = 1;
break;
}

/* EXT4_IOC_DEFRAG */
defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &df_data);

/* Free pages */
ret = defrag_fadvise(fd, df_data, vec, page_num);
if (vec) {
free(vec);
vec = NULL;
}
if (ret == RETURN_NG) {
if (detail_flag) {
printf("\n");
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DROP_BUFFER
);
} else {
printf("\t[ NG ]\n");
}
defrag_fail = 1;
break;
}

/* Go into force defrag mode */
if (defraged_size < 0 && force_flag == DEFRAG_FORCE_TRY
&& errno == ENOSPC) {
/* File size is larger than max size of
* force defrag mode
*/
if (buf->st_size > MAX_FILE_SIZE) {
if (detail_flag) {
fprintf(stderr, "\n\t");
fprintf(stderr, NGMSG_FORCE_DEFRAG_SIZE,
(MAX_FILE_SIZE) / (1024 * 1024)
);
fprintf(stderr, "\t[ NG ]\n");
} else {
printf("\t[ NG ]\n");
}
defrag_fail = 1;
break;
}

printf("\033[79;0H\033[K");
defraged_size = force_defrag(fd, file, buf, blocksize);
if (defraged_size * blocksize >= buf->st_size)
/* Whole file is enforcedly defraged */
break;
else {
if (!detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\t"
"0%%\t[ NG ]\n",
defraged_file_count,
total_count, file);
}
defrag_fail = 1;
break;
}
}
if (defraged_size < 0) {
if (detail_flag) {
printf("\n");
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DEFRAG);
} else {
printf("\t[ NG ]\n");
}
defrag_fail = 1;
break;
}
df_data.start_offset += defraged_size;
start = (long long)df_data.start_offset * blocksize;

/* Print process progress */
percent = (start * 100) / buf->st_size;

/* Disk space file used is bigger than logical size */
printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%",
defraged_file_count, total_count, file,
min(percent, 100));
fflush(stdout);

/* End of file */
if (start >= buf->st_size)
break;
}

/* Count file fragments after defrag and print extents info */
if (detail_flag) {
file_frags_end = file_frag_count(fd);
if (file_frags_end == RETURN_NG) {
printf("\n");
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
goto out;
}

if (file_frags_end != 1)
frag_files_after_defrag++;

extents_after_defrag += file_frags_end;

if (defrag_fail)
goto out;

printf(" extents: %lld -> %lld",
file_frags_start, file_frags_end);
fflush(stdout);
}

if (defrag_fail)
goto out;

printf("\t[ OK ]\n");
fflush(stdout);
succeed_cnt++;

out:
close(fd);
return FTW_CONT;
}

/*
* file_check() - Check file's attributes.
*
* @fd: the file's descriptor.
* @buf: a pointer of the struct stat64.
* @file_name: the file's name.
*/
int file_check(int fd, const struct stat64 *buf, const char *file_name)
{
struct flock lock;

/* Write-lock check is more reliable */
lock.l_type = F_WRLCK;
lock.l_start = 0;
lock.l_whence = SEEK_SET;
lock.l_len = 0;

/* Regular file */
if (S_ISREG(buf->st_mode) == 0) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG);
}
return RETURN_NG;
}

/* Free space */
if (check_free_size(fd, file_name, buf) == RETURN_NG)
return RETURN_NG;

/* Priority */
if (getuid() != ROOT_UID &&
buf->st_uid != getuid()) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_PRIORITY);
}
return RETURN_NG;
}

/* Lock status */
if (fcntl(fd, F_GETLK, &lock) < 0) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_GET_LCKINFO);
}
return RETURN_NG;
} else if (lock.l_type != F_UNLCK) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LOCK);
}
return RETURN_NG;
}

/* Empty file */
if (buf->st_size == 0) {
if (detail_flag) {
PRINT_FILE_NAME(file_name);
IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_BLANK);
}
return RETURN_NG;
}

return RETURN_OK;
}

/*
* is_ext4() - Whether on an ext4 filesystem.
*
* @filename: the file's name.
*/
int is_ext4(const char *filename)
{
int maxlen, len, ret;
FILE *fp = NULL;
char *mnt_type = NULL;
/* Refer to /etc/mtab */
char *mtab = MOUNTED;
char file_path[PATH_MAX + 1];
struct mntent *mnt = NULL;
struct statfs buffs;

/* Get full path */
if (realpath(filename, file_path) == NULL) {
perror(NGMSG_REALPATH);
PRINT_FILE_NAME(filename);
return RETURN_NG;
}

if (statfs(file_path, &buffs) < 0) {
perror(NGMSG_FS_INFO);
PRINT_FILE_NAME(filename);
return RETURN_NG;
}

if (buffs.f_type != EXT4_SUPER_MAGIC) {
PRINT_ERR_MSG(NGMSG_EXT4);
return RETURN_NG;
}

fp = setmntent(mtab, "r");
if (fp == NULL) {
perror(NGMSG_MTAB);
return RETURN_NG;
}

maxlen = 0;
while ((mnt = getmntent(fp)) != NULL) {
len = strlen(mnt->mnt_dir);
ret = memcmp(file_path, mnt->mnt_dir, len);
if (ret != 0)
continue;

if (maxlen >= len)
continue;

maxlen = len;

mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1);
if (!mnt_type) {
endmntent(fp);
return RETURN_NG;
}
strcpy(mnt_type, mnt->mnt_type);
strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX);
}

endmntent(fp);
if (strcmp(mnt_type, FS_EXT4) == 0) {
FREE(mnt_type);
return RETURN_OK;
} else {
FREE(mnt_type);
PRINT_ERR_MSG(NGMSG_EXT4);
return RETURN_NG;
}
}

/*
* calc_entry_counts - calculate file counts
*
* @file: file name.
* @buf: file info.
* @flag: file type.
* @ftwbuf: the pointer of a struct FTW.
*/
int calc_entry_counts(const char *file, const struct stat64 *buf, int flag,
struct FTW *ftwbuf)
{

if (flag == FTW_F)
regular_count++;

total_count++;

return FTW_CONT;
}

/*
* get_mount_point() - Get device's mount point.
*
* @devname: the device's name.
* @mount_point: the mount point.
* @dir_path_len: the length of directory.
*/
int get_mount_point(const char *devname, char *mount_point,
int dir_path_len)
{
/* Refer to /etc/mtab */
char *mtab = MOUNTED;
FILE *fp = NULL;
struct mntent *mnt = NULL;

fp = setmntent(mtab, "r");
if (fp == NULL) {
perror(NGMSG_MTAB);
return RETURN_NG;
}

while ((mnt = getmntent(fp)) != NULL) {
if (strcmp(devname, mnt->mnt_fsname) != 0)
continue;

endmntent(fp);
if (strcmp(mnt->mnt_type, FS_EXT4) == 0) {
strncpy(mount_point, mnt->mnt_dir,
dir_path_len);
return RETURN_OK;
}
PRINT_ERR_MSG(NGMSG_EXT4);
return RETURN_NG;
}
endmntent(fp);
PRINT_ERR_MSG(NGMSG_UNMOUNT);
return RETURN_NG;
}

/*
* main() - ext4 online defrag.
*
* @argc: the number of parameter.
* @argv[]: the pointer array of parameter.
*/
int main(int argc, char *argv[])
{
int fd;
int opt;
int i, flags;
int arg_type;
int detail_tmp;
int success_flag;
char dir_name[PATH_MAX + 1];
struct stat64 buf;

i = 1;
flags = 0;
arg_type = -1;
detail_tmp = -1;
success_flag = 0;
/* Do not follow symlink */
flags |= FTW_PHYS;
/* Stay within the same filesystem */
flags |= FTW_MOUNT;

/* Parse arguments */
if (argc == 1 || (argc == 2 && argv[1][0] == '-'))
goto out;

while ((opt = getopt(argc, argv, "rvf")) != EOF) {
switch (opt) {
case 'r':
regional_flag = 1;
i = 2;
break;
case 'v':
detail_flag = 1;
i = 2;
break;
case 'f':
force_flag = DEFRAG_FORCE_TRY;
i = 2;

if (argc == 3)
break;

if (argc > 4) {
printf("Illegal argument\n");
goto out;
}

/* Now the agrc must be 4(e4defrag -f file blocknr) */
int res_strlen;
res_strlen = strlen(argv[3]);
for (res_strlen -= 1; res_strlen >= 0; res_strlen--) {
if (!isdigit(argv[3][res_strlen])) {
printf("Illegal argument\n");
goto out;
}
}

fgoal = strtoul(argv[3], NULL, 0);
if (errno) {
printf("block num shold be < 4294967295"
"(max num of unsigned long 32bit)\n");
exit(1);
}
if (!fgoal)
fgoal = -1;
break;
default:
goto out;
}
}

/* Main process */
for (; i < argc; i++) {
succeed_cnt = 0;
regular_count = 0;
total_count = 0;
frag_files_before_defrag = 0;
frag_files_after_defrag = 0;
extents_before_defrag = 0;
extents_after_defrag = 0;
defraged_file_count = 0;

memset(dir_name, 0, PATH_MAX + 1);
memset(lost_found_dir, 0, PATH_MAX + 1);

if (force_flag && i == 3)
break;

#if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN
PRINT_ERR_MSG(NGMSG_ENDIAN);
PRINT_FILE_NAME(argv[i]);
continue;
#endif

if (lstat64(argv[i], &buf) < 0) {
perror(NGMSG_FILE_INFO);
PRINT_FILE_NAME(argv[i]);
continue;
}

/* Only regular file is acceptalbe with force defrag mode */
if (force_flag && !S_ISREG(buf.st_mode)) {
printf("Inappropriate file type\n");
goto out;
}

if (S_ISBLK(buf.st_mode)) {
/* Block device */
if (get_mount_point(argv[i], dir_name, PATH_MAX)
== RETURN_NG)
continue;
arg_type = DEVNAME;
printf("ext4 defragmentation for device(%s)\n",
argv[i]);
} else if (S_ISDIR(buf.st_mode)) {
/* Directory */
if (access(argv[i], R_OK) < 0) {
perror(argv[i]);
continue;
}
arg_type = DIRNAME;
strcpy(dir_name, argv[i]);
} else if (S_ISREG(buf.st_mode)) {
/* Regular file */
arg_type = FILENAME;
} else {
/* Irregular file */
PRINT_ERR_MSG(NGMSG_FILE_UNREG);
PRINT_FILE_NAME(argv[i]);
continue;
}

/* For device case,
* filesystem type checked in get_mount_point()
*/
if (arg_type == FILENAME || arg_type == DIRNAME) {
if (is_ext4(argv[i]) == RETURN_NG)
continue;
if (realpath(argv[i], dir_name) == NULL) {
perror(NGMSG_REALPATH);
PRINT_FILE_NAME(argv[i]);
continue;
}
}

switch (arg_type) {
case DIRNAME:
printf("ext4 defragmentation for directory(%s)\n",
argv[i]);

int mount_dir_len = 0;
mount_dir_len = strnlen(lost_found_dir, PATH_MAX);

strncat(lost_found_dir, "/lost+found",
PATH_MAX - strnlen(lost_found_dir, PATH_MAX));

/* Not the case("e4defrag mount_piont_dir") */
if (dir_name[mount_dir_len] != '\0') {
/* "e4defrag mount_piont_dir/lost+found"
* or "e4defrag mount_piont_dir/lost+found/"
*/
if (strncmp(lost_found_dir, dir_name,
strnlen(lost_found_dir,
PATH_MAX)) == 0 &&
(dir_name[strnlen(lost_found_dir,
PATH_MAX)] == '\0' ||
dir_name[strnlen(lost_found_dir,
PATH_MAX)] == '/')) {
PRINT_ERR_MSG(NGMSG_LOST_FOUND);
PRINT_FILE_NAME(argv[i]);
continue;
}

/* "e4defrag mount_piont_dir/else_dir" */
memset(lost_found_dir, 0, PATH_MAX + 1);
}
case DEVNAME:
if (arg_type == DEVNAME) {
strncpy(lost_found_dir, dir_name,
strnlen(dir_name, PATH_MAX));
strncat(lost_found_dir, "/lost+found/",
PATH_MAX - strnlen(lost_found_dir,
PATH_MAX));
}

/* Regional block allocation */
if (regional_flag) {
unsigned int bg_num = 0;
int ret = 0;
struct ext4_super_block sb;

printf(MSG_R_OPTION);

fd = open64(dir_name, O_RDONLY);
if (fd < 0) {
if (detail_flag) {
perror(NGMSG_FILE_OPEN);
PRINT_FILE_NAME(dir_name);
}
continue;
}

memset(&sb, 0, sizeof(struct ext4_super_block));

ret = ioctl(fd, EXT4_IOC_SUPER_INFO, &sb);
if (ret < 0) {
perror(NGMSG_FILE_SUPER);
PRINT_FILE_NAME(dir_name);
close(fd);
continue;
}
close(fd);

sb.s_blocks_per_group =
ext2fs_swab32(sb.s_blocks_per_group);
sb.s_inodes_per_group =
ext2fs_swab32(sb.s_inodes_per_group);
sb.s_first_data_block =
ext2fs_swab32(sb.s_first_data_block);

bg_num = (buf.st_ino - 1) /
sb.s_inodes_per_group;
goal = bg_num * sb.s_blocks_per_group +
sb.s_first_data_block;
}
nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags);

/* File tree walk */
nftw64(dir_name, ftw_fn, FTW_OPEN_FD, flags);
printf("\n\tSuccess:\t\t\t[ %lu/%lu ]\n", succeed_cnt,
total_count);
printf("\tFailure:\t\t\t[ %lu/%lu ]\n",
total_count - succeed_cnt, total_count);
if (detail_flag) {
printf("\tTotal extents:\t\t\t%4d->%d\n",
extents_before_defrag,
extents_after_defrag);
printf("\tFragmented percentage:\t\t"
"%3llu%%->%llu%%\n",
!regular_count ? 0 :
((unsigned long long)
frag_files_before_defrag * 100) /
regular_count,
!regular_count ? 0 :
((unsigned long long)
frag_files_after_defrag * 100) /
regular_count);
}
break;
case FILENAME:
total_count = 1;
strncat(lost_found_dir, "/lost+found/",
PATH_MAX - strnlen(lost_found_dir,
PATH_MAX));
if (strncmp(lost_found_dir, dir_name,
strnlen(lost_found_dir,
PATH_MAX)) == 0) {
PRINT_ERR_MSG(NGMSG_LOST_FOUND);
PRINT_FILE_NAME(argv[i]);
continue;
}

if (regional_flag) {
fprintf(stderr, NGMSG_TYPE, argv[i]);
continue;
}
printf("ext4 defragmentation for %s\n", argv[i]);
/* Defrag single file process */
ftw_fn(argv[i], &buf, FTW_F, NULL);
if (succeed_cnt != 0)
printf(" Success:\t\t\t[1/1]\n");
else
printf(" Success:\t\t\t[0/1]\n");

break;
}

if (succeed_cnt != 0)
success_flag = 1;
}

if (success_flag)
return RETURN_OK;

exit(1);

out:
printf(MSG_USAGE);
exit(1);
}

/*
* insert_extent() - Sequentially insert extent by physical block number.
*
* @extlist_head: the head of an extent list.
* @ext: the extent element which will be inserted.
*/
int insert_extent(extent_t **extlist_head, extent_t *ext)
{
extent_t *ext_tmp = *extlist_head;

if (ext == NULL)
return RETURN_NG;

/* First element */
if (*extlist_head == NULL) {
(*extlist_head) = ext;
(*extlist_head)->prev = *extlist_head;
(*extlist_head)->next = *extlist_head;
return RETURN_OK;
}

if (ext->data.start <= ext_tmp->data.start) {
/* Insert before head */
if (ext_tmp->data.start < ext->data.start + ext->data.len)
/* Overlap */
return RETURN_NG;
/* Adjust head */
*extlist_head = ext;
} else {
/* Insert into the middle or last of the list */
do {
if (ext->data.start < ext_tmp->data.start)
break;
ext_tmp = ext_tmp->next;
} while (ext_tmp != (*extlist_head));
if (ext->data.start <
ext_tmp->prev->data.start + ext_tmp->prev->data.len)
/* Overlap */
return RETURN_NG;

if (ext_tmp != *extlist_head &&
ext_tmp->data.start < ext->data.start + ext->data.len)
/* Overlap */
return RETURN_NG;
}
ext_tmp = ext_tmp->prev;
/* Insert "ext" after "ext_tmp" */
insert(ext_tmp, ext);
return RETURN_OK;
}

/*
* insert_exts_group() - Insert a exts_group in decreasing order
* of length.
*
* @exts_group_list_head: the head of a exts_group list.
* @exts_group: the exts_group element which will be inserted.
*/
int insert_exts_group(exts_group_t **exts_group_list_head,
exts_group_t *exts_group)
{
exts_group_t *exts_group_tmp = NULL;

if (exts_group == NULL)
return RETURN_NG;

/* Initialize list */
if (*exts_group_list_head == NULL) {
(*exts_group_list_head) = exts_group;
(*exts_group_list_head)->prev = *exts_group_list_head;
(*exts_group_list_head)->next = *exts_group_list_head;
return RETURN_OK;
}

if (exts_group->len >= (*exts_group_list_head)->len) {
/* Insert before exts_group_list_head */
exts_group_tmp = (*exts_group_list_head)->prev;
insert(exts_group_tmp, exts_group);
*exts_group_list_head = exts_group;
return RETURN_OK;
}

/* Find insertion positon */
for (exts_group_tmp = (*exts_group_list_head)->next;
exts_group_tmp != *exts_group_list_head;
exts_group_tmp = exts_group_tmp->next) {
if (exts_group_tmp->len < exts_group->len)
break;
}
exts_group_tmp = exts_group_tmp->prev;
insert(exts_group_tmp, exts_group);

return RETURN_OK;
}

/*
* get_exts_group() - Get element from the exts_group list.
*
* @exts_group_list_head: the head of a exts_group list.
* @exts_group: the exts_group element which will be got.
*/
exts_group_t *get_exts_group(exts_group_t **exts_group_list_head,
exts_group_t *exts_group)
{
if (exts_group == NULL || *exts_group_list_head == NULL)
return NULL;

/* Keep "exts_group_list_head" point to the largest extent group */
if (exts_group == *exts_group_list_head)
*exts_group_list_head = exts_group->next;

if (*exts_group_list_head == (*exts_group_list_head)->next &&
exts_group == *exts_group_list_head)
/* Delete the last element in the list */
*exts_group_list_head = NULL;

exts_group->prev->next = exts_group->next;
exts_group->next->prev = exts_group->prev;
return exts_group;
}

/*
* free_exts_group() - Free the exts_group.
*
* @*exts_group_list_head: the exts_group list head which will be free.
*/
void free_exts_group(exts_group_t *exts_group_list_head)
{
exts_group_t *exts_group_tmp = NULL;

if (exts_group_list_head == NULL)
return;

while (exts_group_list_head->next != exts_group_list_head) {
exts_group_tmp = exts_group_list_head;
exts_group_list_head->prev->next = exts_group_list_head->next;
exts_group_list_head->next->prev = exts_group_list_head->prev;
exts_group_list_head = exts_group_list_head->next;
free(exts_group_tmp);
}
free(exts_group_list_head);
}

/*
* free_ext() - Free the extent list.
*
* @extent_list_head: the extent list head of which will be free.
*/
void free_ext(extent_t *extent_list_head)
{
extent_t *extent_tmp = NULL;

if (extent_list_head == NULL)
return;

while (extent_list_head->next != extent_list_head) {
extent_tmp = extent_list_head;
extent_list_head->prev->next = extent_list_head->next;
extent_list_head->next->prev = extent_list_head->prev;
extent_list_head = extent_list_head->next;
free(extent_tmp);
}
free(extent_list_head);
}

/*
* do_defrag() - Execute the defrag program in force mode.
*
* @fd: the file's descriptor.
* @exts_group: the exts_group which will be defraged.
* @defrag_data: the data which will be defraged.
*/
int do_defrag(int fd, exts_group_t *exts_group,
struct ext4_ext_defrag_data defrag_data)
{
int defraged_size = 0;
int fadvise_ret = 0;
unsigned long page_num;
unsigned char *vec = NULL;

/* Init defrag_data for defrag */
defrag_data.ext.start = exts_group->start->data.start;
defrag_data.ext.len = exts_group->len;
defrag_data.ext.block = 0;
defrag_data.defrag_size = exts_group->len;
/* Move victim extents to make sufficient space */
defrag_data.flag = DEFRAG_FORCE_GATHER;
defrag_data.goal = exts_group->start->data.start;

if (page_in_core(fd, defrag_data, &vec, &page_num) == RETURN_NG)
return RETURN_NG;

defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &defrag_data);

/* Free pages */
fadvise_ret = defrag_fadvise(fd, defrag_data, vec, page_num);
FREE(vec);

if (fadvise_ret == RETURN_NG || defraged_size < 0)
return RETURN_NG;

return defraged_size;
}

/*
* get_used_extent() - Get used extent in the block group.
*
* @fd: the file's descriptor.
* @ext_list_head: the head of the extent list.
* @istart: start inode in the block group.
* @iend: end inode in the block group.
* @bstart: start block in the block group.
* @bend: end block in the block group.
*/
int get_used_extent(int fd, extent_t **ext_list_head,
unsigned long long istart, unsigned long long iend,
ext4_fsblk_t bstart, ext4_fsblk_t bend)
{
int ret = 0;
int blocksize;
int extents_buffer_size, fiemap_ino_size;
__u64 pos = 0;
__u64 group_start, group_end;
unsigned long long ino;
struct fiemap_extent *extents_buf = NULL;
struct fiemap_ino *fiemap_ino_buf = NULL;

ret = ioctl(fd, FIGETBSZ, &blocksize);
if (ret < 0) {
if (detail_flag)
perror(NGMSG_FILE_BLOCKSIZE);

return RETURN_NG;
}

/* Convert units, in bytes.
* Be careful : now, physical block number in extent is 48bit,
* and the maximum blocksize for ext4 is 4K(12bit),
* so there is no overflow, but in future it may be changed.
*/
group_start = bstart * blocksize;
group_end = bend * blocksize;

/* Alloc space for fiemap_ino */
fiemap_ino_size = sizeof(struct fiemap_ino);
extents_buffer_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent);

fiemap_ino_buf = malloc(fiemap_ino_size + extents_buffer_size);
if (!fiemap_ino_buf)
return RETURN_NG;

extents_buf = fiemap_ino_buf->fiemap.fm_extents;
memset(fiemap_ino_buf, 0, fiemap_ino_size);
fiemap_ino_buf->fiemap.fm_length = FIEMAP_MAX_OFFSET;
fiemap_ino_buf->fiemap.fm_flags |= FIEMAP_FLAG_SYNC;
fiemap_ino_buf->fiemap.fm_extent_count = EXTENT_MAX_COUNT;

for (ino = istart; ino <= iend; ino++) {
fiemap_ino_buf->ino = ino;
pos = 0;
do {
int i;
fiemap_ino_buf->fiemap.fm_start = pos;
memset(extents_buf, 0, extents_buffer_size);

ret = ioctl(fd, EXT4_IOC_FIEMAP_INO, fiemap_ino_buf);
if (ret < 0) {
/* Skip non-regular file and unused inode */
if (errno == EOPNOTSUPP || errno == ESTALE
|| errno == ENOENT || errno == EACCES)
continue;
goto out;
}

for (i = 0; i < fiemap_ino_buf->
fiemap.fm_mapped_extents; i++) {
extent_t *extent = NULL;

/* Is this extent in current block group ? */
if (extents_buf[i].fe_physical < group_start ||
extents_buf[i].fe_physical > group_end)
continue;

extent = malloc(sizeof(extent_t));
if (extent == NULL)
goto out;

extent->data.start = extents_buf[i].fe_physical
/ blocksize;
extent->data.block = extents_buf[i].fe_logical
/ blocksize;
extent->data.len = extents_buf[i].fe_length
/ blocksize;
extent->ino = ino;
extent->tag = EXT4_EXT_USE;

if (insert_extent(ext_list_head, extent) < 0) {
FREE(extent);
goto out;
}
}

/* Record file's logical offset this time */
pos = extents_buf[EXTENT_MAX_COUNT-1].fe_logical +
extents_buf[EXTENT_MAX_COUNT-1].fe_length;
/* If fm_extents array has been filled and
* there are extents left, continue to cycle.
*/
} while (fiemap_ino_buf->fiemap.fm_mapped_extents
== EXTENT_MAX_COUNT &&
!(extents_buf[EXTENT_MAX_COUNT-1].fe_flags
& FIEMAP_EXTENT_LAST));
}

FREE(fiemap_ino_buf);
return RETURN_OK;
out:
FREE(fiemap_ino_buf);
return RETURN_NG;
}

/*
* get_free_extent() - Get used extent in the block group.
*
* @fd: the file's descriptor.
* @ino: inode number from struct stat64.
* @blocks_per_group: the block number of each block group.
* @ext_list_head: the head of the extent list.
*/
int get_free_extent(int fd, unsigned long long ino,
int blocks_per_group, extent_t **ext_list_head)
{
ext4_grpblk_t pos = 0;
struct ext4_extents_info extents_info;

memset(&extents_info, 0, sizeof(struct ext4_extents_info));
extents_info.ino = ino;
extents_info.max_entries = DEFRAG_MAX_ENT;
while (pos < blocks_per_group) {
int i;
if (ioctl(fd, EXT4_IOC_FREE_BLOCKS_INFO, &extents_info) < 0)
return RETURN_NG;

for (i = 0;
extents_info.ext[i].len != 0 && i < DEFRAG_MAX_ENT; i++) {
/* Alloc space for extent */
extent_t *extent = NULL;

extent = malloc(sizeof(extent_t));
if (extent == NULL)
return RETURN_NG;
memset(extent, 0, sizeof(extent_t));
memcpy(&(extent->data), &(extents_info.ext[i]),
sizeof(struct ext4_extent_data));
/* Free extent */
extent->tag = EXT4_EXT_FREE;
if (insert_extent(ext_list_head, extent) < 0) {
FREE(extent);
return RETURN_NG;
}
}

/* No free extent after the logical block number "pos".
* In other word, offset this time equals to prev recursion.
*/
if (pos == extents_info.g_offset)
break;
if (i < DEFRAG_MAX_ENT)
break;
/* Record the offset of logical block number this time */
pos = extents_info.g_offset;
memset(extents_info.ext, 0,
sizeof(struct ext4_extent_data) * DEFRAG_MAX_ENT);
}

return RETURN_OK;
}

/*
* join_extents() - Find continuous region(exts_group).
*
* @ext_list_head: the head of the extent list.
* @target_exts_group_list_head:the head of the target exts_group list.
* @exts_group_list_head: the head of the original exts_group list.
* @filesize: the file's descriptor.
* @max: the max size of free space.
*/
int join_extents(extent_t *ext_list_head,
exts_group_t **target_exts_group_list_head,
exts_group_t **exts_group_list_head,
unsigned long filesize, int *max)
{
int len;
extent_t *ext_start, *extent_tmp;

ext_start = ext_list_head;
extent_tmp = ext_list_head;
*max = 0;
len = ext_list_head->data.len;
extent_tmp = extent_tmp->next;
do {
exts_group_t *exts_group_tmp = NULL;

if (len >= filesize) {
/* Hit on the way,
* one extent group is enough for defrag, so return.
*/
exts_group_tmp = malloc(sizeof(exts_group_t));
if (!exts_group_tmp)
return RETURN_NG;

exts_group_tmp->prev = NULL;
exts_group_tmp->next = NULL;
exts_group_tmp->start = ext_start;
exts_group_tmp->end = extent_tmp->prev;
exts_group_tmp->len = len;
if (insert_exts_group(target_exts_group_list_head,
exts_group_tmp) < 0) {
FREE(exts_group_tmp);
return RETURN_NG;
}
return CHECK_FRAG_COUNT;
}

/* This extent and previous extent are not continuous,
* so, all previous extents are treated as an extent group.
*/
if ((extent_tmp->prev->data.start + extent_tmp->prev->data.len)
!= extent_tmp->data.start) {
exts_group_tmp = malloc(sizeof(exts_group_t));
if (exts_group_tmp == NULL)
return RETURN_NG;

memset(exts_group_tmp, 0, sizeof(exts_group_t));
exts_group_tmp->len = len;
exts_group_tmp->start = ext_start;
exts_group_tmp->end = extent_tmp->prev;

if (insert_exts_group(exts_group_list_head,
exts_group_tmp) < 0) {
FREE(exts_group_tmp);
return RETURN_NG;
}
*max += len;
ext_start = extent_tmp;
len = extent_tmp->data.len;
extent_tmp = extent_tmp->next;
continue;
}

/* This extent and previous extent are continuous,
* so, they belong to the same extent group, and we check
* if the next extent belongs to the same extent group.
*/
len += extent_tmp->data.len;
extent_tmp = extent_tmp->next;
} while (extent_tmp != ext_list_head->next);

return RETURN_OK;
}

/*
* find_exts_group() - Find target exts_group.
*
* @ext_count: the number of extents.
* @filesize: the file's size.
* @exts_group_list_head: the head of the original exts_group list
* @target_exts_group_list_head: the head of the target exts_group list.
*/
int find_exts_group(int *ext_count, unsigned long filesize,
exts_group_t **exts_group_list_head,
exts_group_t **target_exts_group_list_head)
{
/* Blocks we found for target file */
int len = 0;

if (!(*exts_group_list_head))
return RETURN_NG;

while (*exts_group_list_head) {
exts_group_t *exts_group_tmp = NULL;

/* Even add the current largest extent group,
* the sapce we found is still not enough for the file,
* so just add the current largest extent group,
* and do the next loop.
*/
if ((*exts_group_list_head)->len + len < filesize) {
len += (*exts_group_list_head)->len;
exts_group_tmp = get_exts_group(exts_group_list_head,
*exts_group_list_head);
if (insert_exts_group(target_exts_group_list_head,
exts_group_tmp) < 0) {
FREE(exts_group_tmp);
return RETURN_NG;
}
(*ext_count)++;
continue;
}

/* Now, if add the current largest extent group,
* the space we found is enough,
* So, search from the smallest extent group
* to avoid waste of space
*/
exts_group_tmp = (*exts_group_list_head)->prev;
do {
if (exts_group_tmp->len + len >= filesize) {
len += exts_group_tmp->len;
exts_group_tmp =
get_exts_group(exts_group_list_head,
exts_group_tmp);
if (insert_exts_group
(target_exts_group_list_head,
exts_group_tmp) < 0) {
FREE(exts_group_tmp);
return RETURN_NG;
}
(*ext_count)++;
/* The only entry go out normally */
return RETURN_OK;
}
exts_group_tmp = exts_group_tmp->prev;
} while (exts_group_tmp !=
(*exts_group_list_head)->prev);

/* If we come here, there must be something wrong */
return RETURN_NG;
}

/* If we come here, there must be something wrong(space not enough) */
return RETURN_NG;
}

/*
* check_frag_count() - Check file frag.
*
* @fd: the file's discriptor.
* @extent_count: the number of extents.
*/
int check_frag_count(int fd, int extent_count)
{
long long file_extent_count = RETURN_NG;

file_extent_count = file_frag_count(fd);
if (file_extent_count == RETURN_NG)
return RETURN_NG;

if (extent_count >= file_extent_count) {
/* No improvment */
errno = ENOSPC;
return RETURN_NG;
}

return RETURN_OK;
}

/*
* defrag_proc() - Reserve extent group and execute
* the defrag program.
*
* @fd: the file's discriptor.
* @file_name file name.
* @target_exts_group_head: the head of the original exts_group list.
* @ino: inode number from struct stat64.
*/
int defrag_proc(int fd, const char *file_name,
exts_group_t *target_exts_group_head, unsigned long long ino)
{
int ret = 0;
int flag = 0;
int count = 0;
int percent = 0;
int blocksize = 0;
struct stat64 buf;
exts_group_t *exts_group = NULL;
extent_t *extent = NULL;
struct ext4_extents_info extents_info;
struct ext4_ext_defrag_data defrag_data;

if (!target_exts_group_head)
return RETURN_NG;

/* Get file size */
memset(&buf, 0, sizeof(struct stat64));
ret = fstat64(fd, &buf);
if (ret < 0) {
if (detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\n",
defraged_file_count,
total_count, file_name);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO);
}
return RETURN_NG;
}

/* Get block size */
ret = ioctl(fd, FIGETBSZ, &blocksize);
if (ret < 0) {
if (detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\n",
defraged_file_count,
total_count, file_name);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE);
}
return RETURN_NG;
}

memset(&extents_info, 0, sizeof(extents_info));
memset(&defrag_data, 0, sizeof(struct ext4_ext_defrag_data));
extents_info.ino = 0;
exts_group = target_exts_group_head;
extents_info.max_entries = DEFRAG_MAX_ENT;
extents_info.ino = ino;
defrag_data.start_offset = 0;

do {
/* Move victim extents to make sufficient space */
extent = exts_group->start;
int flush_count = 0;
do {
if (extent->tag != EXT4_EXT_USE) {
extent = extent->next;
continue;
}
extents_info.ino = extent->ino;
extents_info.goal = fgoal;
memcpy(extents_info.ext, &extent->data,
sizeof(struct ext4_extent_data));

extent = extent->next;
extents_info.entries = 1;
ret = ioctl(fd, EXT4_IOC_MOVE_VICTIM, &extents_info);
if (ret < 0) {
if (errno == EACCES && detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\n",
defraged_file_count,
total_count, file_name);
PRINT_ERR_MSG_WITH_ERRNO
(NGMSG_VICTIM_UID);
}
return ret;
}
count++;
if (detail_flag) {
if (!flag) {
printf(" Move victim extents to");
printf(" the other block group\n");
flag = 1;
}
/* moved extent info */
if (!flush_count) {
/* flush the defrag process info */
printf("\033[79;0H\033[K ext%-3d",
count);
printf(" phys:\t%8llu logical:\t%8u "
"length:\t%8d\n",
(extents_info.ext[0]).start,
(extents_info.ext[0]).block,
(extents_info.ext[0]).len);
flush_count = 1;
} else {
printf(" ext%-3d phys:\t%8llu logical:"
"\t%8u length:\t%8d\n", count,
(extents_info.ext[0]).start,
(extents_info.ext[0]).block,
(extents_info.ext[0]).len);
}
}
} while (extent != exts_group->end->next);

if (fsync(fd) < 0) {
if (detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\n",
defraged_file_count,
total_count, file_name);
PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC);
}
return ret;
}

/* Init extents_info for ioctl (RESERVE) */
extents_info.entries = 1;
extents_info.ext[0].block = exts_group->start->data.block;
extents_info.ext[0].start = exts_group->start->data.start;
extents_info.ext[0].len = exts_group->len;

/* Defrag */
ret = do_defrag(fd, exts_group, defrag_data);
if (ret < 0) {
if (detail_flag) {
printf("\033[79;0H\033[K[%lu/%lu]%s\n",
defraged_file_count,
total_count, file_name);
printf("\tDEFRAG_ERROR ret = %d\t[ NG ]\n",
ret);
}
return ret;
}
/* Defrag success, so update offset */
defrag_data.start_offset += ret;
ret = defrag_data.start_offset;

/* Print process progress */
{
percent = ((long long)ret * blocksize * 100) /
buf.st_size;
printf("\033[79;0H\033[K[%lu/%lu]%s:\t%d%%",
defraged_file_count, total_count,
file_name, min(percent, 100));
fflush(stdout);
}

exts_group = exts_group->next;
} while (exts_group != target_exts_group_head);
return ret;
}

/*
* force_defrag() - Execute the defrag program in force defrag mode.
*
* @fd: the file's descriptor.
* @file_name file name.
* @buf: a pointer of the struct stat64.
* @blocksize: block size in byte.
*/
int force_defrag(int fd, const char *file_name,
const struct stat64 *buf, int blocksize)
{
int ret = 0;
int exts = 0;
int maxlen = 0;
unsigned int gnumber;
unsigned long filesize;
unsigned long long istart, iend;
ext4_fsblk_t bstart, bend;
extent_t *extlist_head = NULL;
exts_group_t *exts_group_list_head, *exts_group_list_target_head;
struct ext4_super_block sb;

exts_group_list_head = NULL;
exts_group_list_target_head = NULL;

/* Get super block info */
memset(&sb, 0, sizeof(struct ext4_super_block));

if (ioctl(fd, EXT4_IOC_SUPER_INFO, &sb) < 0)
return RETURN_NG;

sb.s_blocks_per_group = ext2fs_swab32(sb.s_blocks_per_group);
sb.s_inodes_per_group = ext2fs_swab32(sb.s_inodes_per_group);

gnumber = (buf->st_ino - 1) / sb.s_inodes_per_group;
istart = gnumber * sb.s_inodes_per_group;
iend = istart + sb.s_inodes_per_group - 1;
bstart = gnumber * sb.s_blocks_per_group;
bend = bstart + sb.s_blocks_per_group - 1;

/* Compute filesize in block */
filesize = (buf->st_size + blocksize - 1) / blocksize;

/* Get used extents in the block group */
ret = get_used_extent(fd, &extlist_head, istart, iend, bstart, bend);
if (ret == RETURN_NG)
goto freelist;

/* Get free extents in the group */
ret = get_free_extent(fd, buf->st_ino,
sb.s_blocks_per_group, &extlist_head);
if (ret == RETURN_NG)
goto freelist;

/* All space in this group is used by other groups' inodes */
if (extlist_head == NULL) {
ret = RETURN_NG;
goto freelist;
}

/* Get continuous region(extents group) */
ret = join_extents(extlist_head, &exts_group_list_target_head,
&exts_group_list_head, filesize, &maxlen);
if (ret == RETURN_NG)
goto freelist;
if (ret == CHECK_FRAG_COUNT) {
exts = 1;
goto frag_check;
}

if (maxlen < filesize) {
/* No enough space */
errno = ENOSPC;
ret = RETURN_NG;
goto freelist;
}

if (!exts_group_list_head) {
ret = RETURN_NG;
goto freelist;
}

/* Find target extents group */
ret = find_exts_group(&exts, filesize, &exts_group_list_head,
&exts_group_list_target_head);
if (ret == RETURN_NG)
goto freelist;

frag_check:
/* Check file extent count */
ret = check_frag_count(fd, exts);
if (ret == RETURN_NG)
goto freelist;

/* Reserve extent group and execute the defrag program */
ret = defrag_proc(fd, file_name,
exts_group_list_target_head, buf->st_ino);

freelist:
free_exts_group(exts_group_list_target_head);
free_exts_group(exts_group_list_head);
free_ext(extlist_head);
return ret;
}