From: Akira Fujita Subject: [RFC][PATCH 9/9]ext4: online defrag command Date: Fri, 24 Oct 2008 19:10:12 +0900 Message-ID: <49019F04.9050303@rs.jp.nec.com> Mime-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Cc: linux-fsdevel@vger.kernel.org To: linux-ext4@vger.kernel.org, Theodore Tso , Mingming Cao Return-path: Received: from TYO202.gate.nec.co.jp ([202.32.8.206]:63905 "EHLO tyo202.gate.nec.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753629AbYJXKK0 (ORCPT ); Fri, 24 Oct 2008 06:10:26 -0400 Sender: linux-ext4-owner@vger.kernel.org List-ID: ext4: online defrag -- Online defrag command From: Akira Fujita - The defrag command. Usage is as follows: - Put the multiple files closer together. # e4defrag -r directory-name - Defrag for free space fragmentation. # e4defrag -f file-name - Defrag for a single file. # e4defrag file-name - Defrag for all files on ext4. # e4defrag device-name Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato --- /* * e4defrag.c - ext4 filesystem defragmenter * * Copyright (C) 2008 NEC Software Tohoku, Ltd. * * Author: Akira Fujita * Takashi Sato */ #ifndef _LARGEFILE_SOURCE #define _LARGEFILE_SOURCE #endif #ifndef _LARGEFILE64_SOURCE #define _LARGEFILE64_SOURCE #endif #define _XOPEN_SOURCE 500 #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Ioctl command */ #define EXT4_IOC_DEFRAG _IOW('f', 15, struct ext4_ext_defrag_data) #define EXT4_IOC_SUPER_INFO _IOR('f', 16, struct ext4_super_block) #define EXT4_IOC_FREE_BLOCKS_INFO _IOWR('f', 17, struct ext4_extents_info) #define EXT4_IOC_FIEMAP_INO _IOWR('f', 18, struct fiemap_ino) #define EXT4_IOC_MOVE_VICTIM _IOW('f', 19, struct ext4_extents_info) #define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) /* Macro functions */ #define PRINT_ERR_MSG(msg) fprintf(stderr, "%s\n", (msg)); #define IN_FTW_PRINT_ERR_MSG(msg) \ fprintf(stderr, "\t%s\t\t[ NG ]\n", (msg)); #define PRINT_FILE_NAME(file) fprintf(stderr, " \"%s\"\n", (file)); #define PRINT_ERR_MSG_WITH_ERRNO(msg) \ fprintf(stderr, "\t%s:%s\t[ NG ]\n", (msg), strerror(errno)); #define min(x, y) (((x) > (y)) ? (y) : (x)) /* Wrap up the free function */ #define FREE(tmp) \ do { \ if (tmp) \ free(tmp); \ } while (0) \ /* Insert list2 after list1 */ #define insert(list1, list2) \ do { \ list2->next = list1->next; \ list1->next->prev = list2; \ list2->prev = list1; \ list1->next = list2; \ } while (0) #ifndef __NR_fadvise64 #define __NR_fadvise64 250 #endif #ifndef __NR_sync_file_range #define __NR_sync_file_range 314 #endif #ifndef POSIX_FADV_DONTNEED #if defined(__s390x__) #define POSIX_FADV_DONTNEED 6 /* Don't need these pages */ #else #define POSIX_FADV_DONTNEED 4 /* Don't need these pages */ #endif #endif #ifndef SYNC_FILE_RANGE_WAIT_BEFORE #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #endif #ifndef SYNC_FILE_RANGE_WRITE #define SYNC_FILE_RANGE_WRITE 2 #endif #ifndef SYNC_FILE_RANGE_WAIT_AFTER #define SYNC_FILE_RANGE_WAIT_AFTER 4 #endif #define DEVNAME 0 #define DIRNAME 1 #define FILENAME 2 #define RETURN_OK 0 #define RETURN_NG -1 #define FTW_CONT 0 #define FTW_OPEN_FD 2000 #define FS_EXT4 "ext4" #define ROOT_UID 0 #define CHECK_FRAG_COUNT 1 /* Extent status which are used in extent_t */ #define EXT4_EXT_USE 0 #define EXT4_EXT_FREE 1 /* The maximum number of extents for exchanging between * kernel-space and user-space per ioctl */ #define DEFRAG_MAX_ENT 32 /* The phase of force defrag mode */ #define DEFRAG_FORCE_TRY 1 #define DEFRAG_FORCE_GATHER 3 /* Magic number for ext4 */ #define EXT4_SUPER_MAGIC 0xEF53 /* Defrag size(64MB) per ioctl(not including force mode) */ #define DEFRAG_SIZE ((unsigned long)1 << 26) /* Force defrag mode: Max file size in bytes (128MB) */ #define MAX_FILE_SIZE ((unsigned long)1 << 27) /* The following four macros are used for ioctl FS_IOC_FIEMAP * FIEMAP_FLAG_SYNC: sync file data before map. * FIEMAP_EXTENT_LAST: last extent in file. * FIEMAP_MAX_OFFSET: max file offset. * EXTENT_MAX_COUNT: the maximum number of extents for exchanging between kernel-space and user-space per ioctl */ #define FIEMAP_FLAG_SYNC 0x00000001 #define FIEMAP_EXTENT_LAST 0x00000001 #define FIEMAP_MAX_OFFSET (~0ULL) #define EXTENT_MAX_COUNT 512 /* The following macros are error message */ #define MSG_USAGE \ "Usage : e4defrag [-v] file...| directory...| device...\n\ : e4defrag -f file [blocknr] \n\ : e4defrag -r directory... | device... \n" #define MSG_R_OPTION " with regional block allocation mode.\n" #define NGMSG_MTAB "Can not access /etc/mtab" #define NGMSG_UNMOUNT "FS is not mounted" #define NGMSG_EXT4 "FS is not ext4 File System" #define NGMSG_FS_INFO "Get FSInfo fail" #define NGMSG_FILE_INFO "Get FileInfo fail" #define NGMSG_FILE_OPEN "Open fail" #define NGMSG_FILE_SYNC "Sync(fsync) fail" #define NGMSG_FILE_DEFRAG "Defrag fail" #define NGMSG_FILE_BLOCKSIZE "Can't get blocksize" #define NGMSG_FILE_SUPER "Can't get super block info" #define NGMSG_FILE_UNREG "File is not regular file" #define NGMSG_VICTIM_UID "Victim file is not current user's file" #define NGMSG_FILE_LARGE \ "Defrag size is larger than FileSystem's free space" #define NGMSG_FILE_PRIORITY \ "File is not current user's file or current user is not root" #define NGMSG_FILE_LOCK "File is locked" #define NGMSG_FILE_BLANK "File size is 0" #define NGMSG_GET_LCKINFO "Get LockInfo fail" #define NGMSG_TYPE \ "Can not process %s in regional mode.\n" #define NGMSG_LOST_FOUND "Can not process \"lost+found\"" #define NGMSG_REALPATH "Can not get full path" #define NGMSG_FILE_MAP "Get file map fail" #define NGMSG_FILE_DROP_BUFFER "Free page fail" #define NGMSG_FADVISE_SYSCALL "\tFadvise fail" #define NGMSG_FORCE_DEFRAG_SIZE \ "Cannot specify a file larger than %luMB in force defrag mode" #define NGMSG_ENDIAN "Endian type is not big/little endian" /* Data type for filesystem-wide blocks number */ typedef unsigned long long ext4_fsblk_t; /* Data type for file logical block number */ typedef unsigned int ext4_lblk_t; /* Data type for block offset of block group */ typedef int ext4_grpblk_t; struct ext4_extent_data { ext4_lblk_t block; /* start logical block number */ ext4_fsblk_t start; /* start physical block number */ int len; /* blocks count */ }; /* Used for defrag */ struct ext4_ext_defrag_data { ext4_lblk_t start_offset; /* start offset to defrag in blocks */ ext4_lblk_t defrag_size; /* size of defrag in blocks */ ext4_fsblk_t goal; /* block offset for allocation */ int flag; /* free space mode flag */ struct ext4_extent_data ext; }; typedef __u16 __le16; typedef __u32 __le32; typedef __u64 __le64; /* * Structure of the super block */ struct ext4_super_block { /*00*/ __le32 s_inodes_count; /* Inodes count */ __le32 s_blocks_count_lo; /* Blocks count */ __le32 s_r_blocks_count_lo; /* Reserved blocks count */ __le32 s_free_blocks_count_lo; /* Free blocks count */ /*10*/ __le32 s_free_inodes_count; /* Free inodes count */ __le32 s_first_data_block; /* First Data Block */ __le32 s_log_block_size; /* Block size */ __le32 s_obso_log_frag_size; /* Obsoleted fragment size */ /*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */ __le32 s_inodes_per_group; /* # Inodes per group */ __le32 s_mtime; /* Mount time */ /*30*/ __le32 s_wtime; /* Write time */ __le16 s_mnt_count; /* Mount count */ __le16 s_max_mnt_count; /* Maximal mount count */ __le16 s_magic; /* Magic signature */ __le16 s_state; /* File system state */ __le16 s_errors; /* Behaviour when detecting errors */ __le16 s_minor_rev_level; /* minor revision level */ /*40*/ __le32 s_lastcheck; /* time of last check */ __le32 s_checkinterval; /* max. time between checks */ __le32 s_creator_os; /* OS */ __le32 s_rev_level; /* Revision level */ /*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ __le16 s_def_resgid; /* Default gid for reserved blocks */ /* * These fields are for EXT4_DYNAMIC_REV superblocks only. * * Note: the difference between the compatible feature set and * the incompatible feature set is that if there is a bit set * in the incompatible feature set that the kernel doesn't * know about, it should refuse to mount the filesystem. * * e2fsck's requirements are more strict; if it doesn't know * about a feature in either the compatible or incompatible * feature set, it must abort and not try to meddle with * things it doesn't understand... */ __le32 s_first_ino; /* First non-reserved inode */ __le16 s_inode_size; /* size of inode structure */ __le16 s_block_group_nr; /* block group # of this superblock */ __le32 s_feature_compat; /* compatible feature set */ /*60*/ __le32 s_feature_incompat; /* incompatible feature set */ __le32 s_feature_ro_compat; /* readonly-compatible feature set */ /*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ /*78*/ char s_volume_name[16]; /* volume name */ /*88*/ char s_last_mounted[64]; /* directory where last mounted */ /*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ /* * Performance hints. Directory preallocation should only * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on. */ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ /* * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set. */ /*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ /*E0*/ __le32 s_journal_inum; /* inode number of journal file */ __le32 s_journal_dev; /* device number of journal file */ __le32 s_last_orphan; /* start of list of inodes to delete */ __le32 s_hash_seed[4]; /* HTREE hash seed */ __u8 s_def_hash_version; /* Default hash version to use */ __u8 s_reserved_char_pad; __le16 s_desc_size; /* size of group descriptor */ /*100*/ __le32 s_default_mount_opts; __le32 s_first_meta_bg; /* First metablock block group */ __le32 s_mkfs_time; /* When the filesystem was created */ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ /*150*/ __le32 s_blocks_count_hi; /* Blocks count */ __le32 s_r_blocks_count_hi; /* Reserved blocks count */ __le32 s_free_blocks_count_hi; /* Free blocks count */ __le16 s_min_extra_isize; /* All inodes have at least # bytes */ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ __le32 s_flags; /* Miscellaneous flags */ __le16 s_raid_stride; /* RAID stride */ __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ __le64 s_mmp_block; /* Block for multi-mount protection */ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __u8 s_log_groups_per_flex; /* FLEX_BG group size */ __u8 s_reserved_char_pad2; __le16 s_reserved_pad; __u32 s_reserved[162]; /* Padding to the end of the block */ }; struct ext4_extents_info { unsigned long long ino; /* inode number */ int max_entries; /* maximum extents count */ int entries; /* extent number/count */ ext4_lblk_t f_offset; /* file offset */ ext4_grpblk_t g_offset; /* group offset */ ext4_fsblk_t goal; struct ext4_extent_data ext[DEFRAG_MAX_ENT]; }; typedef struct extent { struct extent *prev; unsigned long tag; /* extent status */ unsigned long ino; /* file's inode number */ struct ext4_extent_data data; /* extent belong to file */ struct extent *next; } extent_t; typedef struct exts_group { struct exts_group *prev; extent_t *start; /* start ext */ extent_t *end; /* end ext */ int len; /* length of this continuous region */ struct exts_group *next; } exts_group_t; struct fiemap_extent { __u64 fe_logical; /* logical offset in bytes for the start of * the extent from the beginning of the file */ __u64 fe_physical; /* physical offset in bytes for the start * of the extent from the beginning * of the disk */ __u64 fe_length; /* length in bytes for this extent */ __u64 fe_reserved64[2]; __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ __u32 fe_reserved[3]; }; struct fiemap { __u64 fm_start; /* logical offset (inclusive) at * which to start mapping (in) */ __u64 fm_length; /* logical length of mapping which * userspace wants (in) */ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ __u32 fm_extent_count; /* size of fm_extents array (in) */ __u32 fm_reserved; struct fiemap_extent fm_extents[0];/* array of mapped extents (out) */ }; struct fiemap_ino { __u64 ino; struct fiemap fiemap; }; int force_flag; int detail_flag; int regional_flag; int extents_before_defrag; int extents_after_defrag; unsigned long succeed_cnt; unsigned long regular_count; unsigned long total_count; unsigned long frag_files_before_defrag; unsigned long frag_files_after_defrag; unsigned long defraged_file_count; char lost_found_dir[PATH_MAX + 1]; ext4_fsblk_t goal; ext4_fsblk_t fgoal = -1; /* * ext2fs_swab32() - Change endian. * * @val: the entry used for change. */ __u32 ext2fs_swab32(__u32 val) { #if BYTE_ORDER == BIG_ENDIAN return (val>>24) | ((val>>8)&0xFF00) | ((val<<8)&0xFF0000) | (val<<24); #else return val; #endif } /* * fadvise() - Give advice about file access. * * @fd: the file's descriptor. * @offset: file offset. * @len: area length. * @advise: process flag. */ int fadvise(int fd, loff_t offset, size_t len, int advise) { return syscall(__NR_fadvise64, fd, offset, len, advise); } /* * sync_file_range() - Sync file region. * * @fd: the file's descriptor. * @offset: file offset. * @length: area length. * @flag: process flag. */ int sync_file_range(int fd, loff_t offset, loff_t length, unsigned int flag) { return syscall(__NR_sync_file_range, fd, offset, length, flag); } /* * page_in_core() - Get information on whether pages are in core. * * @fd: the file's descriptor. * @defrag_data: data used for defrag. * @vec: page state array. * @page_num: page number. */ int page_in_core(int fd, struct ext4_ext_defrag_data defrag_data, unsigned char **vec, unsigned long *page_num) { int blocksize; int pagesize = getpagesize(); void *page = NULL; loff_t offset, end_offset, length; if (vec == NULL || *vec != NULL) return RETURN_NG; if (ioctl(fd, FIGETBSZ, &blocksize) < 0) return RETURN_NG; /* In mmap, offset should be a multiple of the page size */ offset = defrag_data.start_offset * blocksize; length = defrag_data.defrag_size * blocksize; end_offset = offset + length; /* Round the offset down to the nearest multiple of pagesize */ offset = (offset / pagesize) * pagesize; length = end_offset - offset; page = mmap(NULL, length, PROT_READ, MAP_SHARED, fd, offset); if (page == MAP_FAILED) return RETURN_NG; *page_num = 0; *page_num = (length + pagesize - 1) / pagesize; *vec = (unsigned char *)calloc(*page_num, 1); if (*vec == NULL) return RETURN_NG; /* Get information on whether pages are in core */ if (mincore(page, (size_t)length, *vec) == -1 || munmap(page, length) == -1) { FREE(*vec); return RETURN_NG; } return RETURN_OK; } /* * defrag_fadvise() - Predeclare an access pattern for file data. * * @fd: the file's descriptor. * @defrag_data: data used for defrag. * @vec: page state array. * @page_num: page number. */ int defrag_fadvise(int fd, struct ext4_ext_defrag_data defrag_data, unsigned char *vec, unsigned long page_num) { int flag = 1; int blocksize; int pagesize = getpagesize(); int fadvise_flag = POSIX_FADV_DONTNEED; int sync_flag = SYNC_FILE_RANGE_WAIT_BEFORE | SYNC_FILE_RANGE_WRITE| SYNC_FILE_RANGE_WAIT_AFTER; unsigned long i; loff_t offset; if (ioctl(fd, FIGETBSZ, &blocksize) < 0) return RETURN_NG; offset = (loff_t)defrag_data.start_offset * blocksize; offset = (offset / pagesize) * pagesize; /* Sync file for fadvise process */ if (sync_file_range(fd, offset, (loff_t)pagesize*page_num, sync_flag) != 0) return RETURN_NG; /* Try to release buffer cache which this process used, * then other process can use the released buffer */ for (i = 0; i < page_num; i++) { if ((vec[i] & 0x1) == 0) { offset += pagesize; continue; } if (fadvise(fd, offset, pagesize, fadvise_flag) != 0) { if (detail_flag && flag) { perror(NGMSG_FADVISE_SYSCALL); flag = 0; } } offset += pagesize; } return RETURN_OK; } /* * check_free_size() - Check if there's enough disk space. * * @fd: the file's descriptor. * @file_name file name. * @buf: the pointer of the struct stat64. */ int check_free_size(int fd, const char *file_name, const struct stat64 *buf) { off64_t size = 0; off64_t free_size = 0; struct statfs fsbuf; /* Target file size */ size = buf->st_size; if (fstatfs(fd, &fsbuf) < 0) { if (detail_flag) { PRINT_FILE_NAME(file_name); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FS_INFO); } return RETURN_NG; } /* Compute free space for root and normal user separately */ if (getuid() == ROOT_UID) free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bfree; else free_size = (off64_t)fsbuf.f_bsize * fsbuf.f_bavail; if (free_size >= size) return RETURN_OK; if (detail_flag) { PRINT_FILE_NAME(file_name); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LARGE); } return RETURN_NG; } int file_check(int fd, const struct stat64 *buf, const char *file_name); int force_defrag(int fd, const char *file_name, const struct stat64 *buf, int blocksize); /* * file_frag_count() - Get file fragment count. * * @fd: the file's descriptor. */ long long file_frag_count(int fd) { int ret = RETURN_NG; struct fiemap file_extent_map; /* When fm_extent_count is 0, * ioctl just get file fragment count. */ memset(&file_extent_map, 0, sizeof(struct fiemap)); file_extent_map.fm_start = 0; file_extent_map.fm_length = FIEMAP_MAX_OFFSET; file_extent_map.fm_flags |= FIEMAP_FLAG_SYNC; ret = ioctl(fd, FS_IOC_FIEMAP, &file_extent_map); if (ret < 0) return RETURN_NG; return file_extent_map.fm_mapped_extents; } /* * ftw_fn() - Check file attributes and ioctl call to avoid. * illegal operations. * * @file: the file's name. * @buf: the pointer of the struct stat64. * @flag: file type. * @ftwbuf: the pointer of a struct FTW. */ int ftw_fn(const char *file, const struct stat64 *buf, int flag, struct FTW *ftwbuf) { int fd; int blocksize; int percent = 0; int defrag_fail = 0; int defraged_size = 0; int ret = RETURN_NG; long long file_frags_start, file_frags_end; unsigned long page_num; unsigned char *vec = NULL; loff_t start = 0; struct ext4_ext_defrag_data df_data; defraged_file_count++; if (detail_flag) { printf("[%lu/%lu]", defraged_file_count, total_count); fflush(stdout); } if (lost_found_dir[0] != '\0' && !memcmp(file, lost_found_dir, strnlen(lost_found_dir, PATH_MAX))) { if (detail_flag) { PRINT_FILE_NAME(file); IN_FTW_PRINT_ERR_MSG(NGMSG_LOST_FOUND); } return FTW_CONT; } if (flag != FTW_F) { if (detail_flag) { PRINT_FILE_NAME(file); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG); } return FTW_CONT; } fd = open64(file, O_RDONLY); if (fd < 0) { if (detail_flag) { PRINT_FILE_NAME(file); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_OPEN); } return FTW_CONT; } if (file_check(fd, buf, file) == RETURN_NG) goto out; if (fsync(fd) < 0) { if (detail_flag) { PRINT_FILE_NAME(file); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC); } goto out; } /* Get blocksize */ if (ioctl(fd, FIGETBSZ, &blocksize) < 0) { if (detail_flag) { PRINT_FILE_NAME(file); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE); } goto out; } /* Ioctl call does the actual defragment job */ df_data.start_offset = 0; df_data.goal = goal; df_data.ext.len = 0; df_data.flag = force_flag; /* Count file fragments before defrag */ if (detail_flag) { file_frags_start = file_frag_count(fd); if (file_frags_start == RETURN_NG) { PRINT_FILE_NAME(file); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO); goto out; } if (file_frags_start != 1) frag_files_before_defrag++; extents_before_defrag += file_frags_start; } /* Print process progress */ percent = (start * 100) / buf->st_size; printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%", defraged_file_count, total_count, file, min(percent, 100)); fflush(stdout); while (1) { df_data.defrag_size = (min((buf->st_size - start), DEFRAG_SIZE) + blocksize - 1) / blocksize; ret = page_in_core(fd, df_data, &vec, &page_num); if (ret == RETURN_NG) { if (detail_flag) { printf("\n"); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_MAP); } else { printf("\t[ NG ]\n"); } defrag_fail = 1; break; } /* EXT4_IOC_DEFRAG */ defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &df_data); /* Free pages */ ret = defrag_fadvise(fd, df_data, vec, page_num); if (vec) { free(vec); vec = NULL; } if (ret == RETURN_NG) { if (detail_flag) { printf("\n"); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DROP_BUFFER ); } else { printf("\t[ NG ]\n"); } defrag_fail = 1; break; } /* Go into force defrag mode */ if (defraged_size < 0 && force_flag == DEFRAG_FORCE_TRY && errno == ENOSPC) { /* File size is larger than max size of * force defrag mode */ if (buf->st_size > MAX_FILE_SIZE) { if (detail_flag) { fprintf(stderr, "\n\t"); fprintf(stderr, NGMSG_FORCE_DEFRAG_SIZE, (MAX_FILE_SIZE) / (1024 * 1024) ); fprintf(stderr, "\t[ NG ]\n"); } else { printf("\t[ NG ]\n"); } defrag_fail = 1; break; } printf("\033[79;0H\033[K"); defraged_size = force_defrag(fd, file, buf, blocksize); if (defraged_size * blocksize >= buf->st_size) /* Whole file is enforcedly defraged */ break; else { if (!detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\t" "0%%\t[ NG ]\n", defraged_file_count, total_count, file); } defrag_fail = 1; break; } } if (defraged_size < 0) { if (detail_flag) { printf("\n"); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_DEFRAG); } else { printf("\t[ NG ]\n"); } defrag_fail = 1; break; } df_data.start_offset += defraged_size; start = (long long)df_data.start_offset * blocksize; /* Print process progress */ percent = (start * 100) / buf->st_size; /* Disk space file used is bigger than logical size */ printf("\033[79;0H\033[K[%lu/%lu]%s:\t%3d%%", defraged_file_count, total_count, file, min(percent, 100)); fflush(stdout); /* End of file */ if (start >= buf->st_size) break; } /* Count file fragments after defrag and print extents info */ if (detail_flag) { file_frags_end = file_frag_count(fd); if (file_frags_end == RETURN_NG) { printf("\n"); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO); goto out; } if (file_frags_end != 1) frag_files_after_defrag++; extents_after_defrag += file_frags_end; if (defrag_fail) goto out; printf(" extents: %lld -> %lld", file_frags_start, file_frags_end); fflush(stdout); } if (defrag_fail) goto out; printf("\t[ OK ]\n"); fflush(stdout); succeed_cnt++; out: close(fd); return FTW_CONT; } /* * file_check() - Check file's attributes. * * @fd: the file's descriptor. * @buf: a pointer of the struct stat64. * @file_name: the file's name. */ int file_check(int fd, const struct stat64 *buf, const char *file_name) { struct flock lock; /* Write-lock check is more reliable */ lock.l_type = F_WRLCK; lock.l_start = 0; lock.l_whence = SEEK_SET; lock.l_len = 0; /* Regular file */ if (S_ISREG(buf->st_mode) == 0) { if (detail_flag) { PRINT_FILE_NAME(file_name); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_UNREG); } return RETURN_NG; } /* Free space */ if (check_free_size(fd, file_name, buf) == RETURN_NG) return RETURN_NG; /* Priority */ if (getuid() != ROOT_UID && buf->st_uid != getuid()) { if (detail_flag) { PRINT_FILE_NAME(file_name); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_PRIORITY); } return RETURN_NG; } /* Lock status */ if (fcntl(fd, F_GETLK, &lock) < 0) { if (detail_flag) { PRINT_FILE_NAME(file_name); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_GET_LCKINFO); } return RETURN_NG; } else if (lock.l_type != F_UNLCK) { if (detail_flag) { PRINT_FILE_NAME(file_name); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_LOCK); } return RETURN_NG; } /* Empty file */ if (buf->st_size == 0) { if (detail_flag) { PRINT_FILE_NAME(file_name); IN_FTW_PRINT_ERR_MSG(NGMSG_FILE_BLANK); } return RETURN_NG; } return RETURN_OK; } /* * is_ext4() - Whether on an ext4 filesystem. * * @filename: the file's name. */ int is_ext4(const char *filename) { int maxlen, len, ret; FILE *fp = NULL; char *mnt_type = NULL; /* Refer to /etc/mtab */ char *mtab = MOUNTED; char file_path[PATH_MAX + 1]; struct mntent *mnt = NULL; struct statfs buffs; /* Get full path */ if (realpath(filename, file_path) == NULL) { perror(NGMSG_REALPATH); PRINT_FILE_NAME(filename); return RETURN_NG; } if (statfs(file_path, &buffs) < 0) { perror(NGMSG_FS_INFO); PRINT_FILE_NAME(filename); return RETURN_NG; } if (buffs.f_type != EXT4_SUPER_MAGIC) { PRINT_ERR_MSG(NGMSG_EXT4); return RETURN_NG; } fp = setmntent(mtab, "r"); if (fp == NULL) { perror(NGMSG_MTAB); return RETURN_NG; } maxlen = 0; while ((mnt = getmntent(fp)) != NULL) { len = strlen(mnt->mnt_dir); ret = memcmp(file_path, mnt->mnt_dir, len); if (ret != 0) continue; if (maxlen >= len) continue; maxlen = len; mnt_type = realloc(mnt_type, strlen(mnt->mnt_type) + 1); if (!mnt_type) { endmntent(fp); return RETURN_NG; } strcpy(mnt_type, mnt->mnt_type); strncpy(lost_found_dir, mnt->mnt_dir, PATH_MAX); } endmntent(fp); if (strcmp(mnt_type, FS_EXT4) == 0) { FREE(mnt_type); return RETURN_OK; } else { FREE(mnt_type); PRINT_ERR_MSG(NGMSG_EXT4); return RETURN_NG; } } /* * calc_entry_counts - calculate file counts * * @file: file name. * @buf: file info. * @flag: file type. * @ftwbuf: the pointer of a struct FTW. */ int calc_entry_counts(const char *file, const struct stat64 *buf, int flag, struct FTW *ftwbuf) { if (flag == FTW_F) regular_count++; total_count++; return FTW_CONT; } /* * get_mount_point() - Get device's mount point. * * @devname: the device's name. * @mount_point: the mount point. * @dir_path_len: the length of directory. */ int get_mount_point(const char *devname, char *mount_point, int dir_path_len) { /* Refer to /etc/mtab */ char *mtab = MOUNTED; FILE *fp = NULL; struct mntent *mnt = NULL; fp = setmntent(mtab, "r"); if (fp == NULL) { perror(NGMSG_MTAB); return RETURN_NG; } while ((mnt = getmntent(fp)) != NULL) { if (strcmp(devname, mnt->mnt_fsname) != 0) continue; endmntent(fp); if (strcmp(mnt->mnt_type, FS_EXT4) == 0) { strncpy(mount_point, mnt->mnt_dir, dir_path_len); return RETURN_OK; } PRINT_ERR_MSG(NGMSG_EXT4); return RETURN_NG; } endmntent(fp); PRINT_ERR_MSG(NGMSG_UNMOUNT); return RETURN_NG; } /* * main() - ext4 online defrag. * * @argc: the number of parameter. * @argv[]: the pointer array of parameter. */ int main(int argc, char *argv[]) { int fd; int opt; int i, flags; int arg_type; int detail_tmp; int success_flag; char dir_name[PATH_MAX + 1]; struct stat64 buf; i = 1; flags = 0; arg_type = -1; detail_tmp = -1; success_flag = 0; /* Do not follow symlink */ flags |= FTW_PHYS; /* Stay within the same filesystem */ flags |= FTW_MOUNT; /* Parse arguments */ if (argc == 1 || (argc == 2 && argv[1][0] == '-')) goto out; while ((opt = getopt(argc, argv, "rvf")) != EOF) { switch (opt) { case 'r': regional_flag = 1; i = 2; break; case 'v': detail_flag = 1; i = 2; break; case 'f': force_flag = DEFRAG_FORCE_TRY; i = 2; if (argc == 3) break; if (argc > 4) { printf("Illegal argument\n"); goto out; } /* Now the agrc must be 4(e4defrag -f file blocknr) */ int res_strlen; res_strlen = strlen(argv[3]); for (res_strlen -= 1; res_strlen >= 0; res_strlen--) { if (!isdigit(argv[3][res_strlen])) { printf("Illegal argument\n"); goto out; } } fgoal = strtoul(argv[3], NULL, 0); if (errno) { printf("block num shold be < 4294967295" "(max num of unsigned long 32bit)\n"); exit(1); } if (!fgoal) fgoal = -1; break; default: goto out; } } /* Main process */ for (; i < argc; i++) { succeed_cnt = 0; regular_count = 0; total_count = 0; frag_files_before_defrag = 0; frag_files_after_defrag = 0; extents_before_defrag = 0; extents_after_defrag = 0; defraged_file_count = 0; memset(dir_name, 0, PATH_MAX + 1); memset(lost_found_dir, 0, PATH_MAX + 1); if (force_flag && i == 3) break; #if BYTE_ORDER != BIG_ENDIAN && BYTE_ORDER != LITTLE_ENDIAN PRINT_ERR_MSG(NGMSG_ENDIAN); PRINT_FILE_NAME(argv[i]); continue; #endif if (lstat64(argv[i], &buf) < 0) { perror(NGMSG_FILE_INFO); PRINT_FILE_NAME(argv[i]); continue; } /* Only regular file is acceptalbe with force defrag mode */ if (force_flag && !S_ISREG(buf.st_mode)) { printf("Inappropriate file type\n"); goto out; } if (S_ISBLK(buf.st_mode)) { /* Block device */ if (get_mount_point(argv[i], dir_name, PATH_MAX) == RETURN_NG) continue; arg_type = DEVNAME; printf("ext4 defragmentation for device(%s)\n", argv[i]); } else if (S_ISDIR(buf.st_mode)) { /* Directory */ if (access(argv[i], R_OK) < 0) { perror(argv[i]); continue; } arg_type = DIRNAME; strcpy(dir_name, argv[i]); } else if (S_ISREG(buf.st_mode)) { /* Regular file */ arg_type = FILENAME; } else { /* Irregular file */ PRINT_ERR_MSG(NGMSG_FILE_UNREG); PRINT_FILE_NAME(argv[i]); continue; } /* For device case, * filesystem type checked in get_mount_point() */ if (arg_type == FILENAME || arg_type == DIRNAME) { if (is_ext4(argv[i]) == RETURN_NG) continue; if (realpath(argv[i], dir_name) == NULL) { perror(NGMSG_REALPATH); PRINT_FILE_NAME(argv[i]); continue; } } switch (arg_type) { case DIRNAME: printf("ext4 defragmentation for directory(%s)\n", argv[i]); int mount_dir_len = 0; mount_dir_len = strnlen(lost_found_dir, PATH_MAX); strncat(lost_found_dir, "/lost+found", PATH_MAX - strnlen(lost_found_dir, PATH_MAX)); /* Not the case("e4defrag mount_piont_dir") */ if (dir_name[mount_dir_len] != '\0') { /* "e4defrag mount_piont_dir/lost+found" * or "e4defrag mount_piont_dir/lost+found/" */ if (strncmp(lost_found_dir, dir_name, strnlen(lost_found_dir, PATH_MAX)) == 0 && (dir_name[strnlen(lost_found_dir, PATH_MAX)] == '\0' || dir_name[strnlen(lost_found_dir, PATH_MAX)] == '/')) { PRINT_ERR_MSG(NGMSG_LOST_FOUND); PRINT_FILE_NAME(argv[i]); continue; } /* "e4defrag mount_piont_dir/else_dir" */ memset(lost_found_dir, 0, PATH_MAX + 1); } case DEVNAME: if (arg_type == DEVNAME) { strncpy(lost_found_dir, dir_name, strnlen(dir_name, PATH_MAX)); strncat(lost_found_dir, "/lost+found/", PATH_MAX - strnlen(lost_found_dir, PATH_MAX)); } /* Regional block allocation */ if (regional_flag) { unsigned int bg_num = 0; int ret = 0; struct ext4_super_block sb; printf(MSG_R_OPTION); fd = open64(dir_name, O_RDONLY); if (fd < 0) { if (detail_flag) { perror(NGMSG_FILE_OPEN); PRINT_FILE_NAME(dir_name); } continue; } memset(&sb, 0, sizeof(struct ext4_super_block)); ret = ioctl(fd, EXT4_IOC_SUPER_INFO, &sb); if (ret < 0) { perror(NGMSG_FILE_SUPER); PRINT_FILE_NAME(dir_name); close(fd); continue; } close(fd); sb.s_blocks_per_group = ext2fs_swab32(sb.s_blocks_per_group); sb.s_inodes_per_group = ext2fs_swab32(sb.s_inodes_per_group); sb.s_first_data_block = ext2fs_swab32(sb.s_first_data_block); bg_num = (buf.st_ino - 1) / sb.s_inodes_per_group; goal = bg_num * sb.s_blocks_per_group + sb.s_first_data_block; } nftw64(dir_name, calc_entry_counts, FTW_OPEN_FD, flags); /* File tree walk */ nftw64(dir_name, ftw_fn, FTW_OPEN_FD, flags); printf("\n\tSuccess:\t\t\t[ %lu/%lu ]\n", succeed_cnt, total_count); printf("\tFailure:\t\t\t[ %lu/%lu ]\n", total_count - succeed_cnt, total_count); if (detail_flag) { printf("\tTotal extents:\t\t\t%4d->%d\n", extents_before_defrag, extents_after_defrag); printf("\tFragmented percentage:\t\t" "%3llu%%->%llu%%\n", !regular_count ? 0 : ((unsigned long long) frag_files_before_defrag * 100) / regular_count, !regular_count ? 0 : ((unsigned long long) frag_files_after_defrag * 100) / regular_count); } break; case FILENAME: total_count = 1; strncat(lost_found_dir, "/lost+found/", PATH_MAX - strnlen(lost_found_dir, PATH_MAX)); if (strncmp(lost_found_dir, dir_name, strnlen(lost_found_dir, PATH_MAX)) == 0) { PRINT_ERR_MSG(NGMSG_LOST_FOUND); PRINT_FILE_NAME(argv[i]); continue; } if (regional_flag) { fprintf(stderr, NGMSG_TYPE, argv[i]); continue; } printf("ext4 defragmentation for %s\n", argv[i]); /* Defrag single file process */ ftw_fn(argv[i], &buf, FTW_F, NULL); if (succeed_cnt != 0) printf(" Success:\t\t\t[1/1]\n"); else printf(" Success:\t\t\t[0/1]\n"); break; } if (succeed_cnt != 0) success_flag = 1; } if (success_flag) return RETURN_OK; exit(1); out: printf(MSG_USAGE); exit(1); } /* * insert_extent() - Sequentially insert extent by physical block number. * * @extlist_head: the head of an extent list. * @ext: the extent element which will be inserted. */ int insert_extent(extent_t **extlist_head, extent_t *ext) { extent_t *ext_tmp = *extlist_head; if (ext == NULL) return RETURN_NG; /* First element */ if (*extlist_head == NULL) { (*extlist_head) = ext; (*extlist_head)->prev = *extlist_head; (*extlist_head)->next = *extlist_head; return RETURN_OK; } if (ext->data.start <= ext_tmp->data.start) { /* Insert before head */ if (ext_tmp->data.start < ext->data.start + ext->data.len) /* Overlap */ return RETURN_NG; /* Adjust head */ *extlist_head = ext; } else { /* Insert into the middle or last of the list */ do { if (ext->data.start < ext_tmp->data.start) break; ext_tmp = ext_tmp->next; } while (ext_tmp != (*extlist_head)); if (ext->data.start < ext_tmp->prev->data.start + ext_tmp->prev->data.len) /* Overlap */ return RETURN_NG; if (ext_tmp != *extlist_head && ext_tmp->data.start < ext->data.start + ext->data.len) /* Overlap */ return RETURN_NG; } ext_tmp = ext_tmp->prev; /* Insert "ext" after "ext_tmp" */ insert(ext_tmp, ext); return RETURN_OK; } /* * insert_exts_group() - Insert a exts_group in decreasing order * of length. * * @exts_group_list_head: the head of a exts_group list. * @exts_group: the exts_group element which will be inserted. */ int insert_exts_group(exts_group_t **exts_group_list_head, exts_group_t *exts_group) { exts_group_t *exts_group_tmp = NULL; if (exts_group == NULL) return RETURN_NG; /* Initialize list */ if (*exts_group_list_head == NULL) { (*exts_group_list_head) = exts_group; (*exts_group_list_head)->prev = *exts_group_list_head; (*exts_group_list_head)->next = *exts_group_list_head; return RETURN_OK; } if (exts_group->len >= (*exts_group_list_head)->len) { /* Insert before exts_group_list_head */ exts_group_tmp = (*exts_group_list_head)->prev; insert(exts_group_tmp, exts_group); *exts_group_list_head = exts_group; return RETURN_OK; } /* Find insertion positon */ for (exts_group_tmp = (*exts_group_list_head)->next; exts_group_tmp != *exts_group_list_head; exts_group_tmp = exts_group_tmp->next) { if (exts_group_tmp->len < exts_group->len) break; } exts_group_tmp = exts_group_tmp->prev; insert(exts_group_tmp, exts_group); return RETURN_OK; } /* * get_exts_group() - Get element from the exts_group list. * * @exts_group_list_head: the head of a exts_group list. * @exts_group: the exts_group element which will be got. */ exts_group_t *get_exts_group(exts_group_t **exts_group_list_head, exts_group_t *exts_group) { if (exts_group == NULL || *exts_group_list_head == NULL) return NULL; /* Keep "exts_group_list_head" point to the largest extent group */ if (exts_group == *exts_group_list_head) *exts_group_list_head = exts_group->next; if (*exts_group_list_head == (*exts_group_list_head)->next && exts_group == *exts_group_list_head) /* Delete the last element in the list */ *exts_group_list_head = NULL; exts_group->prev->next = exts_group->next; exts_group->next->prev = exts_group->prev; return exts_group; } /* * free_exts_group() - Free the exts_group. * * @*exts_group_list_head: the exts_group list head which will be free. */ void free_exts_group(exts_group_t *exts_group_list_head) { exts_group_t *exts_group_tmp = NULL; if (exts_group_list_head == NULL) return; while (exts_group_list_head->next != exts_group_list_head) { exts_group_tmp = exts_group_list_head; exts_group_list_head->prev->next = exts_group_list_head->next; exts_group_list_head->next->prev = exts_group_list_head->prev; exts_group_list_head = exts_group_list_head->next; free(exts_group_tmp); } free(exts_group_list_head); } /* * free_ext() - Free the extent list. * * @extent_list_head: the extent list head of which will be free. */ void free_ext(extent_t *extent_list_head) { extent_t *extent_tmp = NULL; if (extent_list_head == NULL) return; while (extent_list_head->next != extent_list_head) { extent_tmp = extent_list_head; extent_list_head->prev->next = extent_list_head->next; extent_list_head->next->prev = extent_list_head->prev; extent_list_head = extent_list_head->next; free(extent_tmp); } free(extent_list_head); } /* * do_defrag() - Execute the defrag program in force mode. * * @fd: the file's descriptor. * @exts_group: the exts_group which will be defraged. * @defrag_data: the data which will be defraged. */ int do_defrag(int fd, exts_group_t *exts_group, struct ext4_ext_defrag_data defrag_data) { int defraged_size = 0; int fadvise_ret = 0; unsigned long page_num; unsigned char *vec = NULL; /* Init defrag_data for defrag */ defrag_data.ext.start = exts_group->start->data.start; defrag_data.ext.len = exts_group->len; defrag_data.ext.block = 0; defrag_data.defrag_size = exts_group->len; /* Move victim extents to make sufficient space */ defrag_data.flag = DEFRAG_FORCE_GATHER; defrag_data.goal = exts_group->start->data.start; if (page_in_core(fd, defrag_data, &vec, &page_num) == RETURN_NG) return RETURN_NG; defraged_size = ioctl(fd, EXT4_IOC_DEFRAG, &defrag_data); /* Free pages */ fadvise_ret = defrag_fadvise(fd, defrag_data, vec, page_num); FREE(vec); if (fadvise_ret == RETURN_NG || defraged_size < 0) return RETURN_NG; return defraged_size; } /* * get_used_extent() - Get used extent in the block group. * * @fd: the file's descriptor. * @ext_list_head: the head of the extent list. * @istart: start inode in the block group. * @iend: end inode in the block group. * @bstart: start block in the block group. * @bend: end block in the block group. */ int get_used_extent(int fd, extent_t **ext_list_head, unsigned long long istart, unsigned long long iend, ext4_fsblk_t bstart, ext4_fsblk_t bend) { int ret = 0; int blocksize; int extents_buffer_size, fiemap_ino_size; __u64 pos = 0; __u64 group_start, group_end; unsigned long long ino; struct fiemap_extent *extents_buf = NULL; struct fiemap_ino *fiemap_ino_buf = NULL; ret = ioctl(fd, FIGETBSZ, &blocksize); if (ret < 0) { if (detail_flag) perror(NGMSG_FILE_BLOCKSIZE); return RETURN_NG; } /* Convert units, in bytes. * Be careful : now, physical block number in extent is 48bit, * and the maximum blocksize for ext4 is 4K(12bit), * so there is no overflow, but in future it may be changed. */ group_start = bstart * blocksize; group_end = bend * blocksize; /* Alloc space for fiemap_ino */ fiemap_ino_size = sizeof(struct fiemap_ino); extents_buffer_size = EXTENT_MAX_COUNT * sizeof(struct fiemap_extent); fiemap_ino_buf = malloc(fiemap_ino_size + extents_buffer_size); if (!fiemap_ino_buf) return RETURN_NG; extents_buf = fiemap_ino_buf->fiemap.fm_extents; memset(fiemap_ino_buf, 0, fiemap_ino_size); fiemap_ino_buf->fiemap.fm_length = FIEMAP_MAX_OFFSET; fiemap_ino_buf->fiemap.fm_flags |= FIEMAP_FLAG_SYNC; fiemap_ino_buf->fiemap.fm_extent_count = EXTENT_MAX_COUNT; for (ino = istart; ino <= iend; ino++) { fiemap_ino_buf->ino = ino; pos = 0; do { int i; fiemap_ino_buf->fiemap.fm_start = pos; memset(extents_buf, 0, extents_buffer_size); ret = ioctl(fd, EXT4_IOC_FIEMAP_INO, fiemap_ino_buf); if (ret < 0) { /* Skip non-regular file and unused inode */ if (errno == EOPNOTSUPP || errno == ESTALE || errno == ENOENT || errno == EACCES) continue; goto out; } for (i = 0; i < fiemap_ino_buf-> fiemap.fm_mapped_extents; i++) { extent_t *extent = NULL; /* Is this extent in current block group ? */ if (extents_buf[i].fe_physical < group_start || extents_buf[i].fe_physical > group_end) continue; extent = malloc(sizeof(extent_t)); if (extent == NULL) goto out; extent->data.start = extents_buf[i].fe_physical / blocksize; extent->data.block = extents_buf[i].fe_logical / blocksize; extent->data.len = extents_buf[i].fe_length / blocksize; extent->ino = ino; extent->tag = EXT4_EXT_USE; if (insert_extent(ext_list_head, extent) < 0) { FREE(extent); goto out; } } /* Record file's logical offset this time */ pos = extents_buf[EXTENT_MAX_COUNT-1].fe_logical + extents_buf[EXTENT_MAX_COUNT-1].fe_length; /* If fm_extents array has been filled and * there are extents left, continue to cycle. */ } while (fiemap_ino_buf->fiemap.fm_mapped_extents == EXTENT_MAX_COUNT && !(extents_buf[EXTENT_MAX_COUNT-1].fe_flags & FIEMAP_EXTENT_LAST)); } FREE(fiemap_ino_buf); return RETURN_OK; out: FREE(fiemap_ino_buf); return RETURN_NG; } /* * get_free_extent() - Get used extent in the block group. * * @fd: the file's descriptor. * @ino: inode number from struct stat64. * @blocks_per_group: the block number of each block group. * @ext_list_head: the head of the extent list. */ int get_free_extent(int fd, unsigned long long ino, int blocks_per_group, extent_t **ext_list_head) { ext4_grpblk_t pos = 0; struct ext4_extents_info extents_info; memset(&extents_info, 0, sizeof(struct ext4_extents_info)); extents_info.ino = ino; extents_info.max_entries = DEFRAG_MAX_ENT; while (pos < blocks_per_group) { int i; if (ioctl(fd, EXT4_IOC_FREE_BLOCKS_INFO, &extents_info) < 0) return RETURN_NG; for (i = 0; extents_info.ext[i].len != 0 && i < DEFRAG_MAX_ENT; i++) { /* Alloc space for extent */ extent_t *extent = NULL; extent = malloc(sizeof(extent_t)); if (extent == NULL) return RETURN_NG; memset(extent, 0, sizeof(extent_t)); memcpy(&(extent->data), &(extents_info.ext[i]), sizeof(struct ext4_extent_data)); /* Free extent */ extent->tag = EXT4_EXT_FREE; if (insert_extent(ext_list_head, extent) < 0) { FREE(extent); return RETURN_NG; } } /* No free extent after the logical block number "pos". * In other word, offset this time equals to prev recursion. */ if (pos == extents_info.g_offset) break; if (i < DEFRAG_MAX_ENT) break; /* Record the offset of logical block number this time */ pos = extents_info.g_offset; memset(extents_info.ext, 0, sizeof(struct ext4_extent_data) * DEFRAG_MAX_ENT); } return RETURN_OK; } /* * join_extents() - Find continuous region(exts_group). * * @ext_list_head: the head of the extent list. * @target_exts_group_list_head:the head of the target exts_group list. * @exts_group_list_head: the head of the original exts_group list. * @filesize: the file's descriptor. * @max: the max size of free space. */ int join_extents(extent_t *ext_list_head, exts_group_t **target_exts_group_list_head, exts_group_t **exts_group_list_head, unsigned long filesize, int *max) { int len; extent_t *ext_start, *extent_tmp; ext_start = ext_list_head; extent_tmp = ext_list_head; *max = 0; len = ext_list_head->data.len; extent_tmp = extent_tmp->next; do { exts_group_t *exts_group_tmp = NULL; if (len >= filesize) { /* Hit on the way, * one extent group is enough for defrag, so return. */ exts_group_tmp = malloc(sizeof(exts_group_t)); if (!exts_group_tmp) return RETURN_NG; exts_group_tmp->prev = NULL; exts_group_tmp->next = NULL; exts_group_tmp->start = ext_start; exts_group_tmp->end = extent_tmp->prev; exts_group_tmp->len = len; if (insert_exts_group(target_exts_group_list_head, exts_group_tmp) < 0) { FREE(exts_group_tmp); return RETURN_NG; } return CHECK_FRAG_COUNT; } /* This extent and previous extent are not continuous, * so, all previous extents are treated as an extent group. */ if ((extent_tmp->prev->data.start + extent_tmp->prev->data.len) != extent_tmp->data.start) { exts_group_tmp = malloc(sizeof(exts_group_t)); if (exts_group_tmp == NULL) return RETURN_NG; memset(exts_group_tmp, 0, sizeof(exts_group_t)); exts_group_tmp->len = len; exts_group_tmp->start = ext_start; exts_group_tmp->end = extent_tmp->prev; if (insert_exts_group(exts_group_list_head, exts_group_tmp) < 0) { FREE(exts_group_tmp); return RETURN_NG; } *max += len; ext_start = extent_tmp; len = extent_tmp->data.len; extent_tmp = extent_tmp->next; continue; } /* This extent and previous extent are continuous, * so, they belong to the same extent group, and we check * if the next extent belongs to the same extent group. */ len += extent_tmp->data.len; extent_tmp = extent_tmp->next; } while (extent_tmp != ext_list_head->next); return RETURN_OK; } /* * find_exts_group() - Find target exts_group. * * @ext_count: the number of extents. * @filesize: the file's size. * @exts_group_list_head: the head of the original exts_group list * @target_exts_group_list_head: the head of the target exts_group list. */ int find_exts_group(int *ext_count, unsigned long filesize, exts_group_t **exts_group_list_head, exts_group_t **target_exts_group_list_head) { /* Blocks we found for target file */ int len = 0; if (!(*exts_group_list_head)) return RETURN_NG; while (*exts_group_list_head) { exts_group_t *exts_group_tmp = NULL; /* Even add the current largest extent group, * the sapce we found is still not enough for the file, * so just add the current largest extent group, * and do the next loop. */ if ((*exts_group_list_head)->len + len < filesize) { len += (*exts_group_list_head)->len; exts_group_tmp = get_exts_group(exts_group_list_head, *exts_group_list_head); if (insert_exts_group(target_exts_group_list_head, exts_group_tmp) < 0) { FREE(exts_group_tmp); return RETURN_NG; } (*ext_count)++; continue; } /* Now, if add the current largest extent group, * the space we found is enough, * So, search from the smallest extent group * to avoid waste of space */ exts_group_tmp = (*exts_group_list_head)->prev; do { if (exts_group_tmp->len + len >= filesize) { len += exts_group_tmp->len; exts_group_tmp = get_exts_group(exts_group_list_head, exts_group_tmp); if (insert_exts_group (target_exts_group_list_head, exts_group_tmp) < 0) { FREE(exts_group_tmp); return RETURN_NG; } (*ext_count)++; /* The only entry go out normally */ return RETURN_OK; } exts_group_tmp = exts_group_tmp->prev; } while (exts_group_tmp != (*exts_group_list_head)->prev); /* If we come here, there must be something wrong */ return RETURN_NG; } /* If we come here, there must be something wrong(space not enough) */ return RETURN_NG; } /* * check_frag_count() - Check file frag. * * @fd: the file's discriptor. * @extent_count: the number of extents. */ int check_frag_count(int fd, int extent_count) { long long file_extent_count = RETURN_NG; file_extent_count = file_frag_count(fd); if (file_extent_count == RETURN_NG) return RETURN_NG; if (extent_count >= file_extent_count) { /* No improvment */ errno = ENOSPC; return RETURN_NG; } return RETURN_OK; } /* * defrag_proc() - Reserve extent group and execute * the defrag program. * * @fd: the file's discriptor. * @file_name file name. * @target_exts_group_head: the head of the original exts_group list. * @ino: inode number from struct stat64. */ int defrag_proc(int fd, const char *file_name, exts_group_t *target_exts_group_head, unsigned long long ino) { int ret = 0; int flag = 0; int count = 0; int percent = 0; int blocksize = 0; struct stat64 buf; exts_group_t *exts_group = NULL; extent_t *extent = NULL; struct ext4_extents_info extents_info; struct ext4_ext_defrag_data defrag_data; if (!target_exts_group_head) return RETURN_NG; /* Get file size */ memset(&buf, 0, sizeof(struct stat64)); ret = fstat64(fd, &buf); if (ret < 0) { if (detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\n", defraged_file_count, total_count, file_name); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_INFO); } return RETURN_NG; } /* Get block size */ ret = ioctl(fd, FIGETBSZ, &blocksize); if (ret < 0) { if (detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\n", defraged_file_count, total_count, file_name); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_BLOCKSIZE); } return RETURN_NG; } memset(&extents_info, 0, sizeof(extents_info)); memset(&defrag_data, 0, sizeof(struct ext4_ext_defrag_data)); extents_info.ino = 0; exts_group = target_exts_group_head; extents_info.max_entries = DEFRAG_MAX_ENT; extents_info.ino = ino; defrag_data.start_offset = 0; do { /* Move victim extents to make sufficient space */ extent = exts_group->start; int flush_count = 0; do { if (extent->tag != EXT4_EXT_USE) { extent = extent->next; continue; } extents_info.ino = extent->ino; extents_info.goal = fgoal; memcpy(extents_info.ext, &extent->data, sizeof(struct ext4_extent_data)); extent = extent->next; extents_info.entries = 1; ret = ioctl(fd, EXT4_IOC_MOVE_VICTIM, &extents_info); if (ret < 0) { if (errno == EACCES && detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\n", defraged_file_count, total_count, file_name); PRINT_ERR_MSG_WITH_ERRNO (NGMSG_VICTIM_UID); } return ret; } count++; if (detail_flag) { if (!flag) { printf(" Move victim extents to"); printf(" the other block group\n"); flag = 1; } /* moved extent info */ if (!flush_count) { /* flush the defrag process info */ printf("\033[79;0H\033[K ext%-3d", count); printf(" phys:\t%8llu logical:\t%8u " "length:\t%8d\n", (extents_info.ext[0]).start, (extents_info.ext[0]).block, (extents_info.ext[0]).len); flush_count = 1; } else { printf(" ext%-3d phys:\t%8llu logical:" "\t%8u length:\t%8d\n", count, (extents_info.ext[0]).start, (extents_info.ext[0]).block, (extents_info.ext[0]).len); } } } while (extent != exts_group->end->next); if (fsync(fd) < 0) { if (detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\n", defraged_file_count, total_count, file_name); PRINT_ERR_MSG_WITH_ERRNO(NGMSG_FILE_SYNC); } return ret; } /* Init extents_info for ioctl (RESERVE) */ extents_info.entries = 1; extents_info.ext[0].block = exts_group->start->data.block; extents_info.ext[0].start = exts_group->start->data.start; extents_info.ext[0].len = exts_group->len; /* Defrag */ ret = do_defrag(fd, exts_group, defrag_data); if (ret < 0) { if (detail_flag) { printf("\033[79;0H\033[K[%lu/%lu]%s\n", defraged_file_count, total_count, file_name); printf("\tDEFRAG_ERROR ret = %d\t[ NG ]\n", ret); } return ret; } /* Defrag success, so update offset */ defrag_data.start_offset += ret; ret = defrag_data.start_offset; /* Print process progress */ { percent = ((long long)ret * blocksize * 100) / buf.st_size; printf("\033[79;0H\033[K[%lu/%lu]%s:\t%d%%", defraged_file_count, total_count, file_name, min(percent, 100)); fflush(stdout); } exts_group = exts_group->next; } while (exts_group != target_exts_group_head); return ret; } /* * force_defrag() - Execute the defrag program in force defrag mode. * * @fd: the file's descriptor. * @file_name file name. * @buf: a pointer of the struct stat64. * @blocksize: block size in byte. */ int force_defrag(int fd, const char *file_name, const struct stat64 *buf, int blocksize) { int ret = 0; int exts = 0; int maxlen = 0; unsigned int gnumber; unsigned long filesize; unsigned long long istart, iend; ext4_fsblk_t bstart, bend; extent_t *extlist_head = NULL; exts_group_t *exts_group_list_head, *exts_group_list_target_head; struct ext4_super_block sb; exts_group_list_head = NULL; exts_group_list_target_head = NULL; /* Get super block info */ memset(&sb, 0, sizeof(struct ext4_super_block)); if (ioctl(fd, EXT4_IOC_SUPER_INFO, &sb) < 0) return RETURN_NG; sb.s_blocks_per_group = ext2fs_swab32(sb.s_blocks_per_group); sb.s_inodes_per_group = ext2fs_swab32(sb.s_inodes_per_group); gnumber = (buf->st_ino - 1) / sb.s_inodes_per_group; istart = gnumber * sb.s_inodes_per_group; iend = istart + sb.s_inodes_per_group - 1; bstart = gnumber * sb.s_blocks_per_group; bend = bstart + sb.s_blocks_per_group - 1; /* Compute filesize in block */ filesize = (buf->st_size + blocksize - 1) / blocksize; /* Get used extents in the block group */ ret = get_used_extent(fd, &extlist_head, istart, iend, bstart, bend); if (ret == RETURN_NG) goto freelist; /* Get free extents in the group */ ret = get_free_extent(fd, buf->st_ino, sb.s_blocks_per_group, &extlist_head); if (ret == RETURN_NG) goto freelist; /* All space in this group is used by other groups' inodes */ if (extlist_head == NULL) { ret = RETURN_NG; goto freelist; } /* Get continuous region(extents group) */ ret = join_extents(extlist_head, &exts_group_list_target_head, &exts_group_list_head, filesize, &maxlen); if (ret == RETURN_NG) goto freelist; if (ret == CHECK_FRAG_COUNT) { exts = 1; goto frag_check; } if (maxlen < filesize) { /* No enough space */ errno = ENOSPC; ret = RETURN_NG; goto freelist; } if (!exts_group_list_head) { ret = RETURN_NG; goto freelist; } /* Find target extents group */ ret = find_exts_group(&exts, filesize, &exts_group_list_head, &exts_group_list_target_head); if (ret == RETURN_NG) goto freelist; frag_check: /* Check file extent count */ ret = check_frag_count(fd, exts); if (ret == RETURN_NG) goto freelist; /* Reserve extent group and execute the defrag program */ ret = defrag_proc(fd, file_name, exts_group_list_target_head, buf->st_ino); freelist: free_exts_group(exts_group_list_target_head); free_exts_group(exts_group_list_head); free_ext(extlist_head); return ret; }