2011-02-25 01:03:57

by Kyungmin Park

[permalink] [raw]
Subject: [PATCH v3] fat: Batched discard support for fat

From: Kyungmin Park <[email protected]>

FAT supports batched discard as ext4.

Cited from Lukas words.
"The current solution is not ideal because of its bad performance impact.
So basic idea to improve things is to avoid discarding every time some
blocks are freed. and instead batching is together into bigger trims,
which tends to be more effective."

You can find an information in detail at following URLs.
http://lwn.net/Articles/397538/
http://lwn.net/Articles/383933/

Clearify the meaning of "len" (Cited form Lukas mail)

Let the "O" be free (bytes, blocks, whatever), and "=" be used.
Now, we have a filesystem like this.

OOOO==O===OO===OOOOO==O===O===OOOOOOO===
^ ^
0 40

This is how it supposed to wotk if you have called FITIRM with parameters:

start = 0
minlen = 2
len = 20

So you will go through (blocks, bytes...) 0 -> 20

OOOO==O===OO===OOOOO==O===O===OOOOOOO===
^ ^
0 20

So, you will call discard on extents:

0-3
You'll skip 6 because is smaller than minlen
10-11
15-19

instead of

0-3
10-11
15-19
30-36

Signed-off-by: Kyungmin Park <[email protected]>
---
Changelog v3:
Adjust the minlen from queue discard_granularity
Use the corrent len usage
Changelog v2:
Use the given start and len as Lukas comments
Check the queue supports discard feature
---
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f504089..08b53e1 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
int nr_cluster);
extern int fat_free_clusters(struct inode *inode, int cluster);
extern int fat_count_free_clusters(struct super_block *sb);
+extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);

/* fat/file.c */
extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9..a8e3837 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -1,6 +1,8 @@
/*
* Copyright (C) 2004, OGAWA Hirofumi
* Released under GPL v2.
+ *
+ * Batched discard support by Kyungmin Park <[email protected]>
*/

#include <linux/module.h>
@@ -541,6 +543,16 @@ out:
return err;
}

+static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ sector_t block, nr_blocks;
+
+ block = fat_clus_to_blknr(sbi, cluster);
+ nr_blocks = nr_clus * sbi->sec_per_clus;
+ return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
+}
+
int fat_free_clusters(struct inode *inode, int cluster)
{
struct super_block *sb = inode->i_sb;
@@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
if (cluster != fatent.entry + 1) {
int nr_clus = fatent.entry - first_cl + 1;

- sb_issue_discard(sb,
- fat_clus_to_blknr(sbi, first_cl),
- nr_clus * sbi->sec_per_clus,
- GFP_NOFS, 0);
-
+ fat_issue_discard(sb, first_cl, nr_clus);
first_cl = cluster;
}
}
@@ -683,3 +691,88 @@ out:
unlock_fat(sbi);
return err;
}
+
+int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ struct fatent_operations *ops = sbi->fatent_ops;
+ struct fat_entry fatent;
+ unsigned long reada_blocks, reada_mask, cur_block;
+ int err = 0, free, count, entry;
+ int start, len, minlen, trimmed;
+
+ start = range->start >> sb->s_blocksize_bits;
+ start = start / sbi->sec_per_clus;
+ len = range->len >> sb->s_blocksize_bits;
+ len = len / sbi->sec_per_clus;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ minlen = minlen / sbi->sec_per_clus;
+ trimmed = 0;
+ count = 0;
+
+ lock_fat(sbi);
+ if (sbi->free_clusters != -1 && sbi->free_clus_valid)
+ goto out;
+
+ reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
+ reada_mask = reada_blocks - 1;
+ cur_block = 0;
+
+ entry = 0;
+ free = 0;
+ fatent_init(&fatent);
+
+ if (start < FAT_START_ENT)
+ start = FAT_START_ENT;
+
+ fatent_set_entry(&fatent, start);
+
+ while (count < sbi->max_cluster) {
+ if (fatent.entry >= sbi->max_cluster)
+ fatent.entry = FAT_START_ENT;
+ /* readahead of fat blocks */
+ if ((cur_block & reada_mask) == 0) {
+ unsigned long rest = sbi->fat_length - cur_block;
+ fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
+ }
+ cur_block++;
+
+ err = fat_ent_read_block(sb, &fatent);
+ if (err)
+ goto out;
+
+ do {
+ if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
+ free++;
+ if (!entry)
+ entry = fatent.entry;
+ if (count >= len && free >= minlen) {
+ fat_issue_discard(sb, entry, free);
+ trimmed += free;
+ }
+ if (count >= len)
+ goto done;
+ } else if (entry) {
+ if (free >= minlen) {
+ fat_issue_discard(sb, entry, free);
+ trimmed += free;
+ }
+ if (count >= len)
+ goto done;
+ free = 0;
+ entry = 0;
+ }
+ count++;
+ } while (fat_ent_next(sbi, &fatent));
+ }
+ if (free >= minlen) {
+ fat_issue_discard(sb, entry, free);
+ trimmed += free;
+ }
+done:
+ range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
+ fatent_brelse(&fatent);
+out:
+ unlock_fat(sbi);
+ return err;
+}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7257752..9910aba 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return fat_ioctl_get_attributes(inode, user_attr);
case FAT_IOCTL_SET_ATTRIBUTES:
return fat_ioctl_set_attributes(filp, user_attr);
+ case FITRIM:
+ {
+ struct super_block *sb = inode->i_sb;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ struct fstrim_range range;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&range, (struct fstrim_range *)arg,
+ sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max((unsigned int)range.minlen,
+ q->limits.discard_granularity);
+ ret = fat_trim_fs(sb, &range);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user((struct fstrim_range *)arg, &range,
+ sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+ }
+
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}


2011-02-25 11:17:50

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Fri, 25 Feb 2011, Kyungmin Park wrote:

> From: Kyungmin Park <[email protected]>
>
> FAT supports batched discard as ext4.
>
> Cited from Lukas words.
> "The current solution is not ideal because of its bad performance impact.
> So basic idea to improve things is to avoid discarding every time some
> blocks are freed. and instead batching is together into bigger trims,
> which tends to be more effective."
>
> You can find an information in detail at following URLs.
> http://lwn.net/Articles/397538/
> http://lwn.net/Articles/383933/
>
> Clearify the meaning of "len" (Cited form Lukas mail)
>
> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
> Now, we have a filesystem like this.
>
> OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> ^ ^
> 0 40
>
> This is how it supposed to wotk if you have called FITIRM with parameters:
>
> start = 0
> minlen = 2
> len = 20
>
> So you will go through (blocks, bytes...) 0 -> 20
>
> OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> ^ ^
> 0 20
>
> So, you will call discard on extents:
>
> 0-3
> You'll skip 6 because is smaller than minlen
> 10-11
> 15-19
>
> instead of
>
> 0-3
> 10-11
> 15-19
> 30-36

Hi thanks for the next version. And again I have to ask: Did you test it
? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.

Thanks!
-Lukas

>
> Signed-off-by: Kyungmin Park <[email protected]>
> ---
> Changelog v3:
> Adjust the minlen from queue discard_granularity
> Use the corrent len usage
> Changelog v2:
> Use the given start and len as Lukas comments
> Check the queue supports discard feature
> ---
> diff --git a/fs/fat/fat.h b/fs/fat/fat.h
> index f504089..08b53e1 100644
> --- a/fs/fat/fat.h
> +++ b/fs/fat/fat.h
> @@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
> int nr_cluster);
> extern int fat_free_clusters(struct inode *inode, int cluster);
> extern int fat_count_free_clusters(struct super_block *sb);
> +extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);
>
> /* fat/file.c */
> extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
> diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
> index b47d2c9..a8e3837 100644
> --- a/fs/fat/fatent.c
> +++ b/fs/fat/fatent.c
> @@ -1,6 +1,8 @@
> /*
> * Copyright (C) 2004, OGAWA Hirofumi
> * Released under GPL v2.
> + *
> + * Batched discard support by Kyungmin Park <[email protected]>
> */
>
> #include <linux/module.h>
> @@ -541,6 +543,16 @@ out:
> return err;
> }
>
> +static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
> +{
> + struct msdos_sb_info *sbi = MSDOS_SB(sb);
> + sector_t block, nr_blocks;
> +
> + block = fat_clus_to_blknr(sbi, cluster);
> + nr_blocks = nr_clus * sbi->sec_per_clus;
> + return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
> +}
> +
> int fat_free_clusters(struct inode *inode, int cluster)
> {
> struct super_block *sb = inode->i_sb;
> @@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
> if (cluster != fatent.entry + 1) {
> int nr_clus = fatent.entry - first_cl + 1;
>
> - sb_issue_discard(sb,
> - fat_clus_to_blknr(sbi, first_cl),
> - nr_clus * sbi->sec_per_clus,
> - GFP_NOFS, 0);
> -
> + fat_issue_discard(sb, first_cl, nr_clus);
> first_cl = cluster;
> }
> }
> @@ -683,3 +691,88 @@ out:
> unlock_fat(sbi);
> return err;
> }
> +
> +int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
> +{
> + struct msdos_sb_info *sbi = MSDOS_SB(sb);
> + struct fatent_operations *ops = sbi->fatent_ops;
> + struct fat_entry fatent;
> + unsigned long reada_blocks, reada_mask, cur_block;
> + int err = 0, free, count, entry;
> + int start, len, minlen, trimmed;
> +
> + start = range->start >> sb->s_blocksize_bits;
> + start = start / sbi->sec_per_clus;
> + len = range->len >> sb->s_blocksize_bits;
> + len = len / sbi->sec_per_clus;
> + minlen = range->minlen >> sb->s_blocksize_bits;
> + minlen = minlen / sbi->sec_per_clus;
> + trimmed = 0;
> + count = 0;
> +
> + lock_fat(sbi);
> + if (sbi->free_clusters != -1 && sbi->free_clus_valid)
> + goto out;
> +
> + reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
> + reada_mask = reada_blocks - 1;
> + cur_block = 0;
> +
> + entry = 0;
> + free = 0;
> + fatent_init(&fatent);
> +
> + if (start < FAT_START_ENT)
> + start = FAT_START_ENT;
> +
> + fatent_set_entry(&fatent, start);
> +
> + while (count < sbi->max_cluster) {
> + if (fatent.entry >= sbi->max_cluster)
> + fatent.entry = FAT_START_ENT;
> + /* readahead of fat blocks */
> + if ((cur_block & reada_mask) == 0) {
> + unsigned long rest = sbi->fat_length - cur_block;
> + fat_ent_reada(sb, &fatent, min(reada_blocks, rest));

You really do not need new variable "rest" just for passing it into one
function. Get rid of it.

> + }
> + cur_block++;
> +
> + err = fat_ent_read_block(sb, &fatent);
> + if (err)
> + goto out;
> +
> + do {
> + if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
> + free++;
> + if (!entry)
> + entry = fatent.entry;
> + if (count >= len && free >= minlen) {
> + fat_issue_discard(sb, entry, free);
> + trimmed += free;
I really do not understand FAT code very much, but is this right ?
Should not you be setting free = 0 ? What will happen if you'll end up
in the same branch in next iteration ? -- free will be still set to
previous value+1, bu you'll be discarding next entry. I am sorry but
this whole thing is not very readable.

> + }
> + if (count >= len)
> + goto done;
> + } else if (entry) {
> + if (free >= minlen) {
> + fat_issue_discard(sb, entry, free);
> + trimmed += free;
> + }
> + if (count >= len)
> + goto done;
> + free = 0;
> + entry = 0;
> + }

I don't not see why you are testing count all the time since it has not been
changed since the "if" condition started. how about doing one test
before the "if" condition ?

> + count++;
> + } while (fat_ent_next(sbi, &fatent));
> + }
> + if (free >= minlen) {
> + fat_issue_discard(sb, entry, free);
> + trimmed += free;
> + }
> +done:
> + range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
> + fatent_brelse(&fatent);
> +out:
> + unlock_fat(sbi);
> + return err;
> +}
> diff --git a/fs/fat/file.c b/fs/fat/file.c
> index 7257752..9910aba 100644
> --- a/fs/fat/file.c
> +++ b/fs/fat/file.c
> @@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> return fat_ioctl_get_attributes(inode, user_attr);
> case FAT_IOCTL_SET_ATTRIBUTES:
> return fat_ioctl_set_attributes(filp, user_attr);
> + case FITRIM:
> + {
> + struct super_block *sb = inode->i_sb;
> + struct request_queue *q = bdev_get_queue(sb->s_bdev);
> + struct fstrim_range range;
> + int ret = 0;
> +
> + if (!capable(CAP_SYS_ADMIN))
> + return -EPERM;
> +
> + if (!blk_queue_discard(q))
> + return -EOPNOTSUPP;
> +
> + if (copy_from_user(&range, (struct fstrim_range *)arg,
> + sizeof(range)))
> + return -EFAULT;
> +
> + range.minlen = max((unsigned int)range.minlen,
> + q->limits.discard_granularity);
> + ret = fat_trim_fs(sb, &range);
> + if (ret < 0)
> + return ret;
> +
> + if (copy_to_user((struct fstrim_range *)arg, &range,
> + sizeof(range)))
> + return -EFAULT;
> +
> + return 0;
> + }
> +
> default:
> return -ENOTTY; /* Inappropriate ioctl for device */
> }
>

--

2011-02-28 06:45:51

by Kyungmin Park

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
> On Fri, 25 Feb 2011, Kyungmin Park wrote:
>
>> From: Kyungmin Park <[email protected]>
>>
>> FAT supports batched discard as ext4.
>>
>> Cited from Lukas words.
>> "The current solution is not ideal because of its bad performance impact.
>> So basic idea to improve things is to avoid discarding every time some
>> blocks are freed. and instead batching is together into bigger trims,
>> which tends to be more effective."
>>
>> You can find an information in detail at following URLs.
>> http://lwn.net/Articles/397538/
>> http://lwn.net/Articles/383933/
>>
>> Clearify the meaning of "len" (Cited form Lukas mail)
>>
>> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
>> Now, we have a filesystem like this.
>>
>> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
>> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
>>
>> This is how it supposed to wotk if you have called FITIRM with parameters:
>>
>> start = 0
>> minlen = 2
>> len = 20
>>
>> So you will go through (blocks, bytes...) 0 -> 20
>>
>> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> ? ^ ? ? ? ? ? ? ? ? ? ^
>> ? 0 ? ? ? ? ? ? ? ? ? 20
>>
>> So, you will call discard on extents:
>>
>> 0-3
>> You'll skip 6 because is smaller than minlen
>> 10-11
>> 15-19
>>
>> instead of
>>
>> 0-3
>> 10-11
>> 15-19
>> 30-36
>
> Hi thanks for the next version. And again I have to ask: Did you test it
> ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.

I tested it with your test program. Of course I modified for our
environment (eMMC).

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdint.h>
#include <sys/ioctl.h>

struct fstrim_range {
uint64_t start;
uint64_t len;
uint64_t minlen;
};

#define FITRIM _IOWR('X', 121, struct fstrim_range)

int main(int argc, char **argv)
{
struct fstrim_range range;
uint64_t len;
int fd;

if (argc < 2) {
fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
return 1;
}

if (argc == 3)
len = atoll(argv[1]);
else
len = ((1UL<<31) - 1);

range.start = 0;
range.len = len;
range.minlen = 256 * 1024; /* Minimum is 256KiB */

fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("open");
return 1;
}

if (ioctl(fd, FITRIM, &range)) {
if (errno == EOPNOTSUPP)
fprintf(stderr, "TRIM not supported\n");
else
perror("FITRIM");
return 1;
}

fprintf(stderr, "Trimmed size %llu\n", range.len);

return 0;
}

>
> Thanks!
> -Lukas
>
>>
>> Signed-off-by: Kyungmin Park <[email protected]>
>> ---
>> Changelog v3:
>> ? ? ? Adjust the minlen from queue discard_granularity
>> ? ? ? Use the corrent len usage
>> Changelog v2:
>> ? ? ? Use the given start and len as Lukas comments
>> ? ? ? Check the queue supports discard feature
>> ---
>> diff --git a/fs/fat/fat.h b/fs/fat/fat.h
>> index f504089..08b53e1 100644
>> --- a/fs/fat/fat.h
>> +++ b/fs/fat/fat.h
>> @@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_cluster);
>> ?extern int fat_free_clusters(struct inode *inode, int cluster);
>> ?extern int fat_count_free_clusters(struct super_block *sb);
>> +extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);
>>
>> ?/* fat/file.c */
>> ?extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
>> diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
>> index b47d2c9..a8e3837 100644
>> --- a/fs/fat/fatent.c
>> +++ b/fs/fat/fatent.c
>> @@ -1,6 +1,8 @@
>> ?/*
>> ? * Copyright (C) 2004, OGAWA Hirofumi
>> ? * Released under GPL v2.
>> + *
>> + * Batched discard support by Kyungmin Park <[email protected]>
>> ? */
>>
>> ?#include <linux/module.h>
>> @@ -541,6 +543,16 @@ out:
>> ? ? ? return err;
>> ?}
>>
>> +static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
>> +{
>> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>> + ? ? sector_t block, nr_blocks;
>> +
>> + ? ? block = fat_clus_to_blknr(sbi, cluster);
>> + ? ? nr_blocks = nr_clus * sbi->sec_per_clus;
>> + ? ? return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
>> +}
>> +
>> ?int fat_free_clusters(struct inode *inode, int cluster)
>> ?{
>> ? ? ? struct super_block *sb = inode->i_sb;
>> @@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
>> ? ? ? ? ? ? ? ? ? ? ? if (cluster != fatent.entry + 1) {
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_clus = fatent.entry - first_cl + 1;
>>
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? sb_issue_discard(sb,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_clus_to_blknr(sbi, first_cl),
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nr_clus * sbi->sec_per_clus,
>> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GFP_NOFS, 0);
>> -
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, first_cl, nr_clus);
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? first_cl = cluster;
>> ? ? ? ? ? ? ? ? ? ? ? }
>> ? ? ? ? ? ? ? }
>> @@ -683,3 +691,88 @@ out:
>> ? ? ? unlock_fat(sbi);
>> ? ? ? return err;
>> ?}
>> +
>> +int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
>> +{
>> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>> + ? ? struct fatent_operations *ops = sbi->fatent_ops;
>> + ? ? struct fat_entry fatent;
>> + ? ? unsigned long reada_blocks, reada_mask, cur_block;
>> + ? ? int err = 0, free, count, entry;
>> + ? ? int start, len, minlen, trimmed;
>> +
>> + ? ? start = range->start >> sb->s_blocksize_bits;
>> + ? ? start = start / sbi->sec_per_clus;
>> + ? ? len = range->len >> sb->s_blocksize_bits;
>> + ? ? len = len / sbi->sec_per_clus;
>> + ? ? minlen = range->minlen >> sb->s_blocksize_bits;
>> + ? ? minlen = minlen / sbi->sec_per_clus;
>> + ? ? trimmed = 0;
>> + ? ? count = 0;
>> +
>> + ? ? lock_fat(sbi);
>> + ? ? if (sbi->free_clusters != -1 && sbi->free_clus_valid)
>> + ? ? ? ? ? ? goto out;
>> +
>> + ? ? reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
>> + ? ? reada_mask = reada_blocks - 1;
>> + ? ? cur_block = 0;
>> +
>> + ? ? entry = 0;
>> + ? ? free = 0;
>> + ? ? fatent_init(&fatent);
>> +
>> + ? ? if (start < FAT_START_ENT)
>> + ? ? ? ? ? ? start = FAT_START_ENT;
>> +
>> + ? ? fatent_set_entry(&fatent, start);
>> +
>> + ? ? while (count < sbi->max_cluster) {
>> + ? ? ? ? ? ? if (fatent.entry >= sbi->max_cluster)
>> + ? ? ? ? ? ? ? ? ? ? fatent.entry = FAT_START_ENT;
>> + ? ? ? ? ? ? /* readahead of fat blocks */
>> + ? ? ? ? ? ? if ((cur_block & reada_mask) == 0) {
>> + ? ? ? ? ? ? ? ? ? ? unsigned long rest = sbi->fat_length - cur_block;
>> + ? ? ? ? ? ? ? ? ? ? fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
>
> You really do not need new variable "rest" just for passing it into one
> function. Get rid of it.

Umm. I don't want to modify it since it's routine is same as
free_count codes. I just borrowed it from FAT codes.
>
>> + ? ? ? ? ? ? }
>> + ? ? ? ? ? ? cur_block++;
>> +
>> + ? ? ? ? ? ? err = fat_ent_read_block(sb, &fatent);
>> + ? ? ? ? ? ? if (err)
>> + ? ? ? ? ? ? ? ? ? ? goto out;
>> +
>> + ? ? ? ? ? ? do {
>> + ? ? ? ? ? ? ? ? ? ? if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free++;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (!entry)
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = fatent.entry;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len && free >= minlen) {
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
> I really do not understand FAT code very much, but is this right ?
> Should not you be setting free = 0 ?
Right but count is larger then "len". it will exit at next if (count
>= len) goto done statement.

> What will happen if you'll end up
> in the same branch in next iteration ? -- free will be still set to
> previous value+1, bu you'll be discarding next entry. I am sorry but
> this whole thing is not very readable.

>
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>> + ? ? ? ? ? ? ? ? ? ? } else if (entry) {
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (free >= minlen) {
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free = 0;
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = 0;
>> + ? ? ? ? ? ? ? ? ? ? }
>
> I don't not see why you are testing count all the time since it has not been
> changed since the "if" condition started. how about doing one test
> before the "if" condition ?

For looping the whole fat max_cluster size. now start can be any
address and if start is a middle point and len is max, then it will
search from middle -> end -> start -> middle. The "count" is used for
this purpose.

Thank you,
Kyungmin Park
>
>> + ? ? ? ? ? ? ? ? ? ? count++;
>> + ? ? ? ? ? ? } while (fat_ent_next(sbi, &fatent));
>> + ? ? }
>> + ? ? if (free >= minlen) {
>> + ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> + ? ? ? ? ? ? trimmed += free;
>> + ? ? }
>> +done:
>> + ? ? range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
>> + ? ? fatent_brelse(&fatent);
>> +out:
>> + ? ? unlock_fat(sbi);
>> + ? ? return err;
>> +}
>> diff --git a/fs/fat/file.c b/fs/fat/file.c
>> index 7257752..9910aba 100644
>> --- a/fs/fat/file.c
>> +++ b/fs/fat/file.c
>> @@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>> ? ? ? ? ? ? ? return fat_ioctl_get_attributes(inode, user_attr);
>> ? ? ? case FAT_IOCTL_SET_ATTRIBUTES:
>> ? ? ? ? ? ? ? return fat_ioctl_set_attributes(filp, user_attr);
>> + ? ? case FITRIM:
>> + ? ? {
>> + ? ? ? ? ? ? struct super_block *sb = inode->i_sb;
>> + ? ? ? ? ? ? struct request_queue *q = bdev_get_queue(sb->s_bdev);
>> + ? ? ? ? ? ? struct fstrim_range range;
>> + ? ? ? ? ? ? int ret = 0;
>> +
>> + ? ? ? ? ? ? if (!capable(CAP_SYS_ADMIN))
>> + ? ? ? ? ? ? ? ? ? ? return -EPERM;
>> +
>> + ? ? ? ? ? ? if (!blk_queue_discard(q))
>> + ? ? ? ? ? ? ? ? ? ? return -EOPNOTSUPP;
>> +
>> + ? ? ? ? ? ? if (copy_from_user(&range, (struct fstrim_range *)arg,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>> +
>> + ? ? ? ? ? ? range.minlen = max((unsigned int)range.minlen,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? q->limits.discard_granularity);
>> + ? ? ? ? ? ? ret = fat_trim_fs(sb, &range);
>> + ? ? ? ? ? ? if (ret < 0)
>> + ? ? ? ? ? ? ? ? ? ? return ret;
>> +
>> + ? ? ? ? ? ? if (copy_to_user((struct fstrim_range *)arg, &range,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>> +
>> + ? ? ? ? ? ? return 0;
>> + ? ? }
>> +
>> ? ? ? default:
>> ? ? ? ? ? ? ? return -ENOTTY; /* Inappropriate ioctl for device */
>> ? ? ? }
>>
>
> --
>

2011-02-28 12:51:28

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Mon, 28 Feb 2011, Kyungmin Park wrote:

> On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
> > On Fri, 25 Feb 2011, Kyungmin Park wrote:
> >
> >> From: Kyungmin Park <[email protected]>
> >>
> >> FAT supports batched discard as ext4.
> >>
> >> Cited from Lukas words.
> >> "The current solution is not ideal because of its bad performance impact.
> >> So basic idea to improve things is to avoid discarding every time some
> >> blocks are freed. and instead batching is together into bigger trims,
> >> which tends to be more effective."
> >>
> >> You can find an information in detail at following URLs.
> >> http://lwn.net/Articles/397538/
> >> http://lwn.net/Articles/383933/
> >>
> >> Clearify the meaning of "len" (Cited form Lukas mail)
> >>
> >> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
> >> Now, we have a filesystem like this.
> >>
> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> >> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
> >> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
> >>
> >> This is how it supposed to wotk if you have called FITIRM with parameters:
> >>
> >> start = 0
> >> minlen = 2
> >> len = 20
> >>
> >> So you will go through (blocks, bytes...) 0 -> 20
> >>
> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> >> ? ^ ? ? ? ? ? ? ? ? ? ^
> >> ? 0 ? ? ? ? ? ? ? ? ? 20
> >>
> >> So, you will call discard on extents:
> >>
> >> 0-3
> >> You'll skip 6 because is smaller than minlen
> >> 10-11
> >> 15-19
> >>
> >> instead of
> >>
> >> 0-3
> >> 10-11
> >> 15-19
> >> 30-36
> >
> > Hi thanks for the next version. And again I have to ask: Did you test it
> > ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.
>
> I tested it with your test program. Of course I modified for our
> environment (eMMC).

Ok, good.

>
> #include <errno.h>
> #include <fcntl.h>
> #include <stdio.h>
> #include <stdint.h>
> #include <sys/ioctl.h>
>
> struct fstrim_range {
> uint64_t start;
> uint64_t len;
> uint64_t minlen;
> };
>
> #define FITRIM _IOWR('X', 121, struct fstrim_range)
>
> int main(int argc, char **argv)
> {
> struct fstrim_range range;
> uint64_t len;
> int fd;
>
> if (argc < 2) {
> fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
> return 1;
> }
>
> if (argc == 3)
> len = atoll(argv[1]);
> else
> len = ((1UL<<31) - 1);
>
> range.start = 0;
> range.len = len;
> range.minlen = 256 * 1024; /* Minimum is 256KiB */

Why exactly you need to set this ? What will happen if the minlen is 0 ?

>
> fd = open(argv[1], O_RDONLY);
> if (fd < 0) {
> perror("open");
> return 1;
> }
>
> if (ioctl(fd, FITRIM, &range)) {
> if (errno == EOPNOTSUPP)
> fprintf(stderr, "TRIM not supported\n");
> else
> perror("FITRIM");
> return 1;
> }
>
> fprintf(stderr, "Trimmed size %llu\n", range.len);
>
> return 0;
> }
>
> >
> > Thanks!
> > -Lukas
> >
> >>
> >> Signed-off-by: Kyungmin Park <[email protected]>
> >> ---
> >> Changelog v3:
> >> ? ? ? Adjust the minlen from queue discard_granularity
> >> ? ? ? Use the corrent len usage
> >> Changelog v2:
> >> ? ? ? Use the given start and len as Lukas comments
> >> ? ? ? Check the queue supports discard feature
> >> ---
> >> diff --git a/fs/fat/fat.h b/fs/fat/fat.h
> >> index f504089..08b53e1 100644
> >> --- a/fs/fat/fat.h
> >> +++ b/fs/fat/fat.h
> >> @@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_cluster);
> >> ?extern int fat_free_clusters(struct inode *inode, int cluster);
> >> ?extern int fat_count_free_clusters(struct super_block *sb);
> >> +extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);
> >>
> >> ?/* fat/file.c */
> >> ?extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
> >> diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
> >> index b47d2c9..a8e3837 100644
> >> --- a/fs/fat/fatent.c
> >> +++ b/fs/fat/fatent.c
> >> @@ -1,6 +1,8 @@
> >> ?/*
> >> ? * Copyright (C) 2004, OGAWA Hirofumi
> >> ? * Released under GPL v2.
> >> + *
> >> + * Batched discard support by Kyungmin Park <[email protected]>
> >> ? */
> >>
> >> ?#include <linux/module.h>
> >> @@ -541,6 +543,16 @@ out:
> >> ? ? ? return err;
> >> ?}
> >>
> >> +static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
> >> +{
> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
> >> + ? ? sector_t block, nr_blocks;
> >> +
> >> + ? ? block = fat_clus_to_blknr(sbi, cluster);
> >> + ? ? nr_blocks = nr_clus * sbi->sec_per_clus;
> >> + ? ? return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
> >> +}
> >> +
> >> ?int fat_free_clusters(struct inode *inode, int cluster)
> >> ?{
> >> ? ? ? struct super_block *sb = inode->i_sb;
> >> @@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
> >> ? ? ? ? ? ? ? ? ? ? ? if (cluster != fatent.entry + 1) {
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_clus = fatent.entry - first_cl + 1;
> >>
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? sb_issue_discard(sb,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_clus_to_blknr(sbi, first_cl),
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nr_clus * sbi->sec_per_clus,
> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GFP_NOFS, 0);
> >> -
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, first_cl, nr_clus);
> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? first_cl = cluster;
> >> ? ? ? ? ? ? ? ? ? ? ? }
> >> ? ? ? ? ? ? ? }
> >> @@ -683,3 +691,88 @@ out:
> >> ? ? ? unlock_fat(sbi);
> >> ? ? ? return err;
> >> ?}
> >> +
> >> +int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
> >> +{
> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
> >> + ? ? struct fatent_operations *ops = sbi->fatent_ops;
> >> + ? ? struct fat_entry fatent;
> >> + ? ? unsigned long reada_blocks, reada_mask, cur_block;
> >> + ? ? int err = 0, free, count, entry;
> >> + ? ? int start, len, minlen, trimmed;
> >> +
> >> + ? ? start = range->start >> sb->s_blocksize_bits;
> >> + ? ? start = start / sbi->sec_per_clus;
> >> + ? ? len = range->len >> sb->s_blocksize_bits;
> >> + ? ? len = len / sbi->sec_per_clus;
> >> + ? ? minlen = range->minlen >> sb->s_blocksize_bits;
> >> + ? ? minlen = minlen / sbi->sec_per_clus;
> >> + ? ? trimmed = 0;
> >> + ? ? count = 0;
> >> +
> >> + ? ? lock_fat(sbi);
> >> + ? ? if (sbi->free_clusters != -1 && sbi->free_clus_valid)
> >> + ? ? ? ? ? ? goto out;
> >> +
> >> + ? ? reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
> >> + ? ? reada_mask = reada_blocks - 1;
> >> + ? ? cur_block = 0;
> >> +
> >> + ? ? entry = 0;
> >> + ? ? free = 0;
> >> + ? ? fatent_init(&fatent);
> >> +
> >> + ? ? if (start < FAT_START_ENT)
> >> + ? ? ? ? ? ? start = FAT_START_ENT;
> >> +
> >> + ? ? fatent_set_entry(&fatent, start);
> >> +
> >> + ? ? while (count < sbi->max_cluster) {
> >> + ? ? ? ? ? ? if (fatent.entry >= sbi->max_cluster)
> >> + ? ? ? ? ? ? ? ? ? ? fatent.entry = FAT_START_ENT;
> >> + ? ? ? ? ? ? /* readahead of fat blocks */
> >> + ? ? ? ? ? ? if ((cur_block & reada_mask) == 0) {
> >> + ? ? ? ? ? ? ? ? ? ? unsigned long rest = sbi->fat_length - cur_block;
> >> + ? ? ? ? ? ? ? ? ? ? fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
> >
> > You really do not need new variable "rest" just for passing it into one
> > function. Get rid of it.
>
> Umm. I don't want to modify it since it's routine is same as
> free_count codes. I just borrowed it from FAT codes.
> >
> >> + ? ? ? ? ? ? }
> >> + ? ? ? ? ? ? cur_block++;
> >> +
> >> + ? ? ? ? ? ? err = fat_ent_read_block(sb, &fatent);
> >> + ? ? ? ? ? ? if (err)
> >> + ? ? ? ? ? ? ? ? ? ? goto out;
> >> +
> >> + ? ? ? ? ? ? do {
> >> + ? ? ? ? ? ? ? ? ? ? if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free++;
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (!entry)
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = fatent.entry;
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len && free >= minlen) {
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;

I think you can remove this condition completely is you move "done"
label befor the if (free >= minlen) condition.

> > I really do not understand FAT code very much, but is this right ?
> > Should not you be setting free = 0 ?
> Right but count is larger then "len". it will exit at next if (count
> >= len) goto done statement.

Oh, ok I see. But it proves my point that this code is not very well
readable.

>
> > What will happen if you'll end up
> > in the same branch in next iteration ? -- free will be still set to
> > previous value+1, bu you'll be discarding next entry. I am sorry but
> > this whole thing is not very readable.
>
> >
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
^^^^^^^^^^^^^^^^^^
this

> >> + ? ? ? ? ? ? ? ? ? ? } else if (entry) {
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (free >= minlen) {
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
^^^^^^^^^^^^^^^^^^
and this

> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free = 0;
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = 0;
> >> + ? ? ? ? ? ? ? ? ? ? }
> >
> > I don't not see why you are testing count all the time since it has not been
> > changed since the "if" condition started. how about doing one test
> > before the "if" condition ?
>
> For looping the whole fat max_cluster size. now start can be any
> address and if start is a middle point and len is max, then it will
> search from middle -> end -> start -> middle. The "count" is used for
> this purpose.
>
> Thank you,
> Kyungmin Park
> >
can go here -->>
? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;

> >> + ? ? ? ? ? ? ? ? ? ? count++;
> >> + ? ? ? ? ? ? } while (fat_ent_next(sbi, &fatent));
> >> + ? ? }
> >> + ? ? if (free >= minlen) {
> >> + ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
> >> + ? ? ? ? ? ? trimmed += free;
> >> + ? ? }
> >> +done:
> >> + ? ? range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
> >> + ? ? fatent_brelse(&fatent);
> >> +out:
> >> + ? ? unlock_fat(sbi);
> >> + ? ? return err;
> >> +}
> >> diff --git a/fs/fat/file.c b/fs/fat/file.c
> >> index 7257752..9910aba 100644
> >> --- a/fs/fat/file.c
> >> +++ b/fs/fat/file.c
> >> @@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
> >> ? ? ? ? ? ? ? return fat_ioctl_get_attributes(inode, user_attr);
> >> ? ? ? case FAT_IOCTL_SET_ATTRIBUTES:
> >> ? ? ? ? ? ? ? return fat_ioctl_set_attributes(filp, user_attr);
> >> + ? ? case FITRIM:
> >> + ? ? {
> >> + ? ? ? ? ? ? struct super_block *sb = inode->i_sb;
> >> + ? ? ? ? ? ? struct request_queue *q = bdev_get_queue(sb->s_bdev);
> >> + ? ? ? ? ? ? struct fstrim_range range;
> >> + ? ? ? ? ? ? int ret = 0;
> >> +
> >> + ? ? ? ? ? ? if (!capable(CAP_SYS_ADMIN))
> >> + ? ? ? ? ? ? ? ? ? ? return -EPERM;
> >> +
> >> + ? ? ? ? ? ? if (!blk_queue_discard(q))
> >> + ? ? ? ? ? ? ? ? ? ? return -EOPNOTSUPP;
> >> +
> >> + ? ? ? ? ? ? if (copy_from_user(&range, (struct fstrim_range *)arg,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
> >> +
> >> + ? ? ? ? ? ? range.minlen = max((unsigned int)range.minlen,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? q->limits.discard_granularity);
> >> + ? ? ? ? ? ? ret = fat_trim_fs(sb, &range);
> >> + ? ? ? ? ? ? if (ret < 0)
> >> + ? ? ? ? ? ? ? ? ? ? return ret;
> >> +
> >> + ? ? ? ? ? ? if (copy_to_user((struct fstrim_range *)arg, &range,
> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
> >> +
> >> + ? ? ? ? ? ? return 0;
> >> + ? ? }
> >> +
> >> ? ? ? default:
> >> ? ? ? ? ? ? ? return -ENOTTY; /* Inappropriate ioctl for device */
> >> ? ? ? }
> >>
> >
> > --
> >
>

--

2011-03-02 02:07:12

by Kyungmin Park

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Mon, Feb 28, 2011 at 9:51 PM, Lukas Czerner <[email protected]> wrote:
> On Mon, 28 Feb 2011, Kyungmin Park wrote:
>
>> On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
>> > On Fri, 25 Feb 2011, Kyungmin Park wrote:
>> >
>> >> From: Kyungmin Park <[email protected]>
>> >>
>> >> FAT supports batched discard as ext4.
>> >>
>> >> Cited from Lukas words.
>> >> "The current solution is not ideal because of its bad performance impact.
>> >> So basic idea to improve things is to avoid discarding every time some
>> >> blocks are freed. and instead batching is together into bigger trims,
>> >> which tends to be more effective."
>> >>
>> >> You can find an information in detail at following URLs.
>> >> http://lwn.net/Articles/397538/
>> >> http://lwn.net/Articles/383933/
>> >>
>> >> Clearify the meaning of "len" (Cited form Lukas mail)
>> >>
>> >> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
>> >> Now, we have a filesystem like this.
>> >>
>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> >> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
>> >> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
>> >>
>> >> This is how it supposed to wotk if you have called FITIRM with parameters:
>> >>
>> >> start = 0
>> >> minlen = 2
>> >> len = 20
>> >>
>> >> So you will go through (blocks, bytes...) 0 -> 20
>> >>
>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> >> ? ^ ? ? ? ? ? ? ? ? ? ^
>> >> ? 0 ? ? ? ? ? ? ? ? ? 20
>> >>
>> >> So, you will call discard on extents:
>> >>
>> >> 0-3
>> >> You'll skip 6 because is smaller than minlen
>> >> 10-11
>> >> 15-19
>> >>
>> >> instead of
>> >>
>> >> 0-3
>> >> 10-11
>> >> 15-19
>> >> 30-36
>> >
>> > Hi thanks for the next version. And again I have to ask: Did you test it
>> > ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.
>>
>> I tested it with your test program. Of course I modified for our
>> environment (eMMC).
>
> Ok, good.
>
>>
>> #include <errno.h>
>> #include <fcntl.h>
>> #include <stdio.h>
>> #include <stdint.h>
>> #include <sys/ioctl.h>
>>
>> struct fstrim_range {
>> ? ? ? ? uint64_t start;
>> ? ? ? ? uint64_t len;
>> ? ? ? ? uint64_t minlen;
>> };
>>
>> #define FITRIM ? ? ? ? ?_IOWR('X', 121, struct fstrim_range)
>>
>> int main(int argc, char **argv)
>> {
>> ? ? ? ? struct fstrim_range range;
>> ? ? ? ? uint64_t len;
>> ? ? ? ? int fd;
>>
>> ? ? ? ? if (argc < 2) {
>> ? ? ? ? ? ? ? ? fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
>> ? ? ? ? ? ? ? ? return 1;
>> ? ? ? ? }
>>
>> ? ? ? ? if (argc == 3)
>> ? ? ? ? ? ? ? ? len = atoll(argv[1]);
>> ? ? ? ? else
>> ? ? ? ? ? ? ? ? len = ((1UL<<31) - 1);
>>
>> ? ? ? ? range.start = 0;
>> ? ? ? ? range.len = len;
>> ? ? ? ? range.minlen = 256 * 1024; ? ? ?/* Minimum is 256KiB */
>
> Why exactly you need to set this ? What will happen if the minlen is 0 ?

It's dependent on eMMC chip. it's for our environment. If it passed
with 0, the code is working but the less than 256KiB trim command is
meaningless.

>
>>
>> ? ? ? ? fd = open(argv[1], O_RDONLY);
>> ? ? ? ? if (fd < 0) {
>> ? ? ? ? ? ? ? ? perror("open");
>> ? ? ? ? ? ? ? ? return 1;
>> ? ? ? ? }
>>
>> ? ? ? ? if (ioctl(fd, FITRIM, &range)) {
>> ? ? ? ? ? ? ? ? if (errno == EOPNOTSUPP)
>> ? ? ? ? ? ? ? ? ? ? ? ? fprintf(stderr, "TRIM not supported\n");
>> ? ? ? ? ? ? ? ? else
>> ? ? ? ? ? ? ? ? ? ? ? ? perror("FITRIM");
>> ? ? ? ? ? ? ? ? return 1;
>> ? ? ? ? }
>>
>> ? ? ? ? fprintf(stderr, "Trimmed size %llu\n", range.len);
>>
>> ? ? ? ? return 0;
>> }
>>
>> >
>> > Thanks!
>> > -Lukas
>> >
>> >>
>> >> Signed-off-by: Kyungmin Park <[email protected]>
>> >> ---
>> >> Changelog v3:
>> >> ? ? ? Adjust the minlen from queue discard_granularity
>> >> ? ? ? Use the corrent len usage
>> >> Changelog v2:
>> >> ? ? ? Use the given start and len as Lukas comments
>> >> ? ? ? Check the queue supports discard feature
>> >> ---
>> >> diff --git a/fs/fat/fat.h b/fs/fat/fat.h
>> >> index f504089..08b53e1 100644
>> >> --- a/fs/fat/fat.h
>> >> +++ b/fs/fat/fat.h
>> >> @@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_cluster);
>> >> ?extern int fat_free_clusters(struct inode *inode, int cluster);
>> >> ?extern int fat_count_free_clusters(struct super_block *sb);
>> >> +extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);
>> >>
>> >> ?/* fat/file.c */
>> >> ?extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
>> >> diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
>> >> index b47d2c9..a8e3837 100644
>> >> --- a/fs/fat/fatent.c
>> >> +++ b/fs/fat/fatent.c
>> >> @@ -1,6 +1,8 @@
>> >> ?/*
>> >> ? * Copyright (C) 2004, OGAWA Hirofumi
>> >> ? * Released under GPL v2.
>> >> + *
>> >> + * Batched discard support by Kyungmin Park <[email protected]>
>> >> ? */
>> >>
>> >> ?#include <linux/module.h>
>> >> @@ -541,6 +543,16 @@ out:
>> >> ? ? ? return err;
>> >> ?}
>> >>
>> >> +static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
>> >> +{
>> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>> >> + ? ? sector_t block, nr_blocks;
>> >> +
>> >> + ? ? block = fat_clus_to_blknr(sbi, cluster);
>> >> + ? ? nr_blocks = nr_clus * sbi->sec_per_clus;
>> >> + ? ? return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
>> >> +}
>> >> +
>> >> ?int fat_free_clusters(struct inode *inode, int cluster)
>> >> ?{
>> >> ? ? ? struct super_block *sb = inode->i_sb;
>> >> @@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
>> >> ? ? ? ? ? ? ? ? ? ? ? if (cluster != fatent.entry + 1) {
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_clus = fatent.entry - first_cl + 1;
>> >>
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? sb_issue_discard(sb,
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_clus_to_blknr(sbi, first_cl),
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nr_clus * sbi->sec_per_clus,
>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GFP_NOFS, 0);
>> >> -
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, first_cl, nr_clus);
>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? first_cl = cluster;
>> >> ? ? ? ? ? ? ? ? ? ? ? }
>> >> ? ? ? ? ? ? ? }
>> >> @@ -683,3 +691,88 @@ out:
>> >> ? ? ? unlock_fat(sbi);
>> >> ? ? ? return err;
>> >> ?}
>> >> +
>> >> +int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
>> >> +{
>> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>> >> + ? ? struct fatent_operations *ops = sbi->fatent_ops;
>> >> + ? ? struct fat_entry fatent;
>> >> + ? ? unsigned long reada_blocks, reada_mask, cur_block;
>> >> + ? ? int err = 0, free, count, entry;
>> >> + ? ? int start, len, minlen, trimmed;
>> >> +
>> >> + ? ? start = range->start >> sb->s_blocksize_bits;
>> >> + ? ? start = start / sbi->sec_per_clus;
>> >> + ? ? len = range->len >> sb->s_blocksize_bits;
>> >> + ? ? len = len / sbi->sec_per_clus;
>> >> + ? ? minlen = range->minlen >> sb->s_blocksize_bits;
>> >> + ? ? minlen = minlen / sbi->sec_per_clus;
>> >> + ? ? trimmed = 0;
>> >> + ? ? count = 0;
>> >> +
>> >> + ? ? lock_fat(sbi);
>> >> + ? ? if (sbi->free_clusters != -1 && sbi->free_clus_valid)
>> >> + ? ? ? ? ? ? goto out;
>> >> +
>> >> + ? ? reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
>> >> + ? ? reada_mask = reada_blocks - 1;
>> >> + ? ? cur_block = 0;
>> >> +
>> >> + ? ? entry = 0;
>> >> + ? ? free = 0;
>> >> + ? ? fatent_init(&fatent);
>> >> +
>> >> + ? ? if (start < FAT_START_ENT)
>> >> + ? ? ? ? ? ? start = FAT_START_ENT;
>> >> +
>> >> + ? ? fatent_set_entry(&fatent, start);
>> >> +
>> >> + ? ? while (count < sbi->max_cluster) {
>> >> + ? ? ? ? ? ? if (fatent.entry >= sbi->max_cluster)
>> >> + ? ? ? ? ? ? ? ? ? ? fatent.entry = FAT_START_ENT;
>> >> + ? ? ? ? ? ? /* readahead of fat blocks */
>> >> + ? ? ? ? ? ? if ((cur_block & reada_mask) == 0) {
>> >> + ? ? ? ? ? ? ? ? ? ? unsigned long rest = sbi->fat_length - cur_block;
>> >> + ? ? ? ? ? ? ? ? ? ? fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
>> >
>> > You really do not need new variable "rest" just for passing it into one
>> > function. Get rid of it.
>>
>> Umm. I don't want to modify it since it's routine is same as
>> free_count codes. I just borrowed it from FAT codes.
>> >
>> >> + ? ? ? ? ? ? }
>> >> + ? ? ? ? ? ? cur_block++;
>> >> +
>> >> + ? ? ? ? ? ? err = fat_ent_read_block(sb, &fatent);
>> >> + ? ? ? ? ? ? if (err)
>> >> + ? ? ? ? ? ? ? ? ? ? goto out;
>> >> +
>> >> + ? ? ? ? ? ? do {
>> >> + ? ? ? ? ? ? ? ? ? ? if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free++;
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (!entry)
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = fatent.entry;
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len && free >= minlen) {
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>
> I think you can remove this condition completely is you move "done"
> label befor the if (free >= minlen) condition.
>
>> > I really do not understand FAT code very much, but is this right ?
>> > Should not you be setting free = 0 ?
>> Right but count is larger then "len". it will exit at next if (count
>> >= len) goto done statement.
>
> Oh, ok I see. But it proves my point that this code is not very well
> readable.
>
>>
>> > What will happen if you'll end up
>> > in the same branch in next iteration ? -- free will be still set to
>> > previous value+1, bu you'll be discarding next entry. I am sorry but
>> > this whole thing is not very readable.
>>
>> >
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ^^^^^^^^^^^^^^^^^^
> this

Okay I will fix it.
>
>> >> + ? ? ? ? ? ? ? ? ? ? } else if (entry) {
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (free >= minlen) {
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ^^^^^^^^^^^^^^^^^^
> and this
>
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free = 0;
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = 0;
>> >> + ? ? ? ? ? ? ? ? ? ? }
>> >
>> > I don't not see why you are testing count all the time since it has not been
>> > changed since the "if" condition started. how about doing one test
>> > before the "if" condition ?
>>
>> For looping the whole fat max_cluster size. now start can be any
>> address and if start is a middle point and len is max, then it will
>> search from middle -> end -> start -> middle. The "count" is used for
>> this purpose.
>>
>> Thank you,
>> Kyungmin Park
>> >
> can go here -->>
> ?? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
> ?? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>
>> >> + ? ? ? ? ? ? ? ? ? ? count++;
>> >> + ? ? ? ? ? ? } while (fat_ent_next(sbi, &fatent));
>> >> + ? ? }
>> >> + ? ? if (free >= minlen) {
>> >> + ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> >> + ? ? ? ? ? ? trimmed += free;
>> >> + ? ? }
>> >> +done:
>> >> + ? ? range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
>> >> + ? ? fatent_brelse(&fatent);
>> >> +out:
>> >> + ? ? unlock_fat(sbi);
>> >> + ? ? return err;
>> >> +}
>> >> diff --git a/fs/fat/file.c b/fs/fat/file.c
>> >> index 7257752..9910aba 100644
>> >> --- a/fs/fat/file.c
>> >> +++ b/fs/fat/file.c
>> >> @@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>> >> ? ? ? ? ? ? ? return fat_ioctl_get_attributes(inode, user_attr);
>> >> ? ? ? case FAT_IOCTL_SET_ATTRIBUTES:
>> >> ? ? ? ? ? ? ? return fat_ioctl_set_attributes(filp, user_attr);
>> >> + ? ? case FITRIM:
>> >> + ? ? {
>> >> + ? ? ? ? ? ? struct super_block *sb = inode->i_sb;
>> >> + ? ? ? ? ? ? struct request_queue *q = bdev_get_queue(sb->s_bdev);
>> >> + ? ? ? ? ? ? struct fstrim_range range;
>> >> + ? ? ? ? ? ? int ret = 0;
>> >> +
>> >> + ? ? ? ? ? ? if (!capable(CAP_SYS_ADMIN))
>> >> + ? ? ? ? ? ? ? ? ? ? return -EPERM;
>> >> +
>> >> + ? ? ? ? ? ? if (!blk_queue_discard(q))
>> >> + ? ? ? ? ? ? ? ? ? ? return -EOPNOTSUPP;
>> >> +
>> >> + ? ? ? ? ? ? if (copy_from_user(&range, (struct fstrim_range *)arg,
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>> >> +
>> >> + ? ? ? ? ? ? range.minlen = max((unsigned int)range.minlen,
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? q->limits.discard_granularity);
>> >> + ? ? ? ? ? ? ret = fat_trim_fs(sb, &range);
>> >> + ? ? ? ? ? ? if (ret < 0)
>> >> + ? ? ? ? ? ? ? ? ? ? return ret;
>> >> +
>> >> + ? ? ? ? ? ? if (copy_to_user((struct fstrim_range *)arg, &range,
>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>> >> +
>> >> + ? ? ? ? ? ? return 0;
>> >> + ? ? }
>> >> +
>> >> ? ? ? default:
>> >> ? ? ? ? ? ? ? return -ENOTTY; /* Inappropriate ioctl for device */
>> >> ? ? ? }
>> >>
>> >
>> > --
>> >
>>
>
> --

2011-03-02 08:25:35

by Kyungmin Park

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Wed, Mar 2, 2011 at 11:07 AM, Kyungmin Park <[email protected]> wrote:
> On Mon, Feb 28, 2011 at 9:51 PM, Lukas Czerner <[email protected]> wrote:
>> On Mon, 28 Feb 2011, Kyungmin Park wrote:
>>
>>> On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
>>> > On Fri, 25 Feb 2011, Kyungmin Park wrote:
>>> >
>>> >> From: Kyungmin Park <[email protected]>
>>> >>
>>> >> FAT supports batched discard as ext4.
>>> >>
>>> >> Cited from Lukas words.
>>> >> "The current solution is not ideal because of its bad performance impact.
>>> >> So basic idea to improve things is to avoid discarding every time some
>>> >> blocks are freed. and instead batching is together into bigger trims,
>>> >> which tends to be more effective."
>>> >>
>>> >> You can find an information in detail at following URLs.
>>> >> http://lwn.net/Articles/397538/
>>> >> http://lwn.net/Articles/383933/
>>> >>
>>> >> Clearify the meaning of "len" (Cited form Lukas mail)
>>> >>
>>> >> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
>>> >> Now, we have a filesystem like this.
>>> >>
>>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>>> >> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
>>> >> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
>>> >>
>>> >> This is how it supposed to wotk if you have called FITIRM with parameters:
>>> >>
>>> >> start = 0
>>> >> minlen = 2
>>> >> len = 20
>>> >>
>>> >> So you will go through (blocks, bytes...) 0 -> 20
>>> >>
>>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>>> >> ? ^ ? ? ? ? ? ? ? ? ? ^
>>> >> ? 0 ? ? ? ? ? ? ? ? ? 20
>>> >>
>>> >> So, you will call discard on extents:
>>> >>
>>> >> 0-3
>>> >> You'll skip 6 because is smaller than minlen
>>> >> 10-11
>>> >> 15-19
>>> >>
>>> >> instead of
>>> >>
>>> >> 0-3
>>> >> 10-11
>>> >> 15-19
>>> >> 30-36
>>> >
>>> > Hi thanks for the next version. And again I have to ask: Did you test it
>>> > ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.
>>>
>>> I tested it with your test program. Of course I modified for our
>>> environment (eMMC).
>>
>> Ok, good.
>>
>>>
>>> #include <errno.h>
>>> #include <fcntl.h>
>>> #include <stdio.h>
>>> #include <stdint.h>
>>> #include <sys/ioctl.h>
>>>
>>> struct fstrim_range {
>>> ? ? ? ? uint64_t start;
>>> ? ? ? ? uint64_t len;
>>> ? ? ? ? uint64_t minlen;
>>> };
>>>
>>> #define FITRIM ? ? ? ? ?_IOWR('X', 121, struct fstrim_range)
>>>
>>> int main(int argc, char **argv)
>>> {
>>> ? ? ? ? struct fstrim_range range;
>>> ? ? ? ? uint64_t len;
>>> ? ? ? ? int fd;
>>>
>>> ? ? ? ? if (argc < 2) {
>>> ? ? ? ? ? ? ? ? fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
>>> ? ? ? ? ? ? ? ? return 1;
>>> ? ? ? ? }
>>>
>>> ? ? ? ? if (argc == 3)
>>> ? ? ? ? ? ? ? ? len = atoll(argv[1]);
>>> ? ? ? ? else
>>> ? ? ? ? ? ? ? ? len = ((1UL<<31) - 1);
>>>
>>> ? ? ? ? range.start = 0;
>>> ? ? ? ? range.len = len;
>>> ? ? ? ? range.minlen = 256 * 1024; ? ? ?/* Minimum is 256KiB */
>>
>> Why exactly you need to set this ? What will happen if the minlen is 0 ?
>
> It's dependent on eMMC chip. it's for our environment. If it passed
> with 0, the code is working but the less than 256KiB trim command is
> meaningless.
>
>>
>>>
>>> ? ? ? ? fd = open(argv[1], O_RDONLY);
>>> ? ? ? ? if (fd < 0) {
>>> ? ? ? ? ? ? ? ? perror("open");
>>> ? ? ? ? ? ? ? ? return 1;
>>> ? ? ? ? }
>>>
>>> ? ? ? ? if (ioctl(fd, FITRIM, &range)) {
>>> ? ? ? ? ? ? ? ? if (errno == EOPNOTSUPP)
>>> ? ? ? ? ? ? ? ? ? ? ? ? fprintf(stderr, "TRIM not supported\n");
>>> ? ? ? ? ? ? ? ? else
>>> ? ? ? ? ? ? ? ? ? ? ? ? perror("FITRIM");
>>> ? ? ? ? ? ? ? ? return 1;
>>> ? ? ? ? }
>>>
>>> ? ? ? ? fprintf(stderr, "Trimmed size %llu\n", range.len);
>>>
>>> ? ? ? ? return 0;
>>> }
>>>
>>> >
>>> > Thanks!
>>> > -Lukas
>>> >
>>> >>
>>> >> Signed-off-by: Kyungmin Park <[email protected]>
>>> >> ---
>>> >> Changelog v3:
>>> >> ? ? ? Adjust the minlen from queue discard_granularity
>>> >> ? ? ? Use the corrent len usage
>>> >> Changelog v2:
>>> >> ? ? ? Use the given start and len as Lukas comments
>>> >> ? ? ? Check the queue supports discard feature
>>> >> ---
>>> >> diff --git a/fs/fat/fat.h b/fs/fat/fat.h
>>> >> index f504089..08b53e1 100644
>>> >> --- a/fs/fat/fat.h
>>> >> +++ b/fs/fat/fat.h
>>> >> @@ -299,6 +299,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
>>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_cluster);
>>> >> ?extern int fat_free_clusters(struct inode *inode, int cluster);
>>> >> ?extern int fat_count_free_clusters(struct super_block *sb);
>>> >> +extern int fat_trim_fs(struct super_block *sb, struct fstrim_range *range);
>>> >>
>>> >> ?/* fat/file.c */
>>> >> ?extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
>>> >> diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
>>> >> index b47d2c9..a8e3837 100644
>>> >> --- a/fs/fat/fatent.c
>>> >> +++ b/fs/fat/fatent.c
>>> >> @@ -1,6 +1,8 @@
>>> >> ?/*
>>> >> ? * Copyright (C) 2004, OGAWA Hirofumi
>>> >> ? * Released under GPL v2.
>>> >> + *
>>> >> + * Batched discard support by Kyungmin Park <[email protected]>
>>> >> ? */
>>> >>
>>> >> ?#include <linux/module.h>
>>> >> @@ -541,6 +543,16 @@ out:
>>> >> ? ? ? return err;
>>> >> ?}
>>> >>
>>> >> +static int fat_issue_discard(struct super_block *sb, int cluster, int nr_clus)
>>> >> +{
>>> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>>> >> + ? ? sector_t block, nr_blocks;
>>> >> +
>>> >> + ? ? block = fat_clus_to_blknr(sbi, cluster);
>>> >> + ? ? nr_blocks = nr_clus * sbi->sec_per_clus;
>>> >> + ? ? return sb_issue_discard(sb, block, nr_blocks, GFP_NOFS, 0);
>>> >> +}
>>> >> +
>>> >> ?int fat_free_clusters(struct inode *inode, int cluster)
>>> >> ?{
>>> >> ? ? ? struct super_block *sb = inode->i_sb;
>>> >> @@ -575,11 +587,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
>>> >> ? ? ? ? ? ? ? ? ? ? ? if (cluster != fatent.entry + 1) {
>>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int nr_clus = fatent.entry - first_cl + 1;
>>> >>
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? sb_issue_discard(sb,
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_clus_to_blknr(sbi, first_cl),
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? nr_clus * sbi->sec_per_clus,
>>> >> - ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? GFP_NOFS, 0);
>>> >> -
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, first_cl, nr_clus);
>>> >> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? first_cl = cluster;
>>> >> ? ? ? ? ? ? ? ? ? ? ? }
>>> >> ? ? ? ? ? ? ? }
>>> >> @@ -683,3 +691,88 @@ out:
>>> >> ? ? ? unlock_fat(sbi);
>>> >> ? ? ? return err;
>>> >> ?}
>>> >> +
>>> >> +int fat_trim_fs(struct super_block *sb, struct fstrim_range *range)
>>> >> +{
>>> >> + ? ? struct msdos_sb_info *sbi = MSDOS_SB(sb);
>>> >> + ? ? struct fatent_operations *ops = sbi->fatent_ops;
>>> >> + ? ? struct fat_entry fatent;
>>> >> + ? ? unsigned long reada_blocks, reada_mask, cur_block;
>>> >> + ? ? int err = 0, free, count, entry;
>>> >> + ? ? int start, len, minlen, trimmed;
>>> >> +
>>> >> + ? ? start = range->start >> sb->s_blocksize_bits;
>>> >> + ? ? start = start / sbi->sec_per_clus;
>>> >> + ? ? len = range->len >> sb->s_blocksize_bits;
>>> >> + ? ? len = len / sbi->sec_per_clus;
>>> >> + ? ? minlen = range->minlen >> sb->s_blocksize_bits;
>>> >> + ? ? minlen = minlen / sbi->sec_per_clus;
>>> >> + ? ? trimmed = 0;
>>> >> + ? ? count = 0;
>>> >> +
>>> >> + ? ? lock_fat(sbi);
>>> >> + ? ? if (sbi->free_clusters != -1 && sbi->free_clus_valid)
>>> >> + ? ? ? ? ? ? goto out;
>>> >> +
>>> >> + ? ? reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
>>> >> + ? ? reada_mask = reada_blocks - 1;
>>> >> + ? ? cur_block = 0;
>>> >> +
>>> >> + ? ? entry = 0;
>>> >> + ? ? free = 0;
>>> >> + ? ? fatent_init(&fatent);
>>> >> +
>>> >> + ? ? if (start < FAT_START_ENT)
>>> >> + ? ? ? ? ? ? start = FAT_START_ENT;
>>> >> +
>>> >> + ? ? fatent_set_entry(&fatent, start);
>>> >> +
>>> >> + ? ? while (count < sbi->max_cluster) {
>>> >> + ? ? ? ? ? ? if (fatent.entry >= sbi->max_cluster)
>>> >> + ? ? ? ? ? ? ? ? ? ? fatent.entry = FAT_START_ENT;
>>> >> + ? ? ? ? ? ? /* readahead of fat blocks */
>>> >> + ? ? ? ? ? ? if ((cur_block & reada_mask) == 0) {
>>> >> + ? ? ? ? ? ? ? ? ? ? unsigned long rest = sbi->fat_length - cur_block;
>>> >> + ? ? ? ? ? ? ? ? ? ? fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
>>> >
>>> > You really do not need new variable "rest" just for passing it into one
>>> > function. Get rid of it.
>>>
>>> Umm. I don't want to modify it since it's routine is same as
>>> free_count codes. I just borrowed it from FAT codes.
>>> >
>>> >> + ? ? ? ? ? ? }
>>> >> + ? ? ? ? ? ? cur_block++;
>>> >> +
>>> >> + ? ? ? ? ? ? err = fat_ent_read_block(sb, &fatent);
>>> >> + ? ? ? ? ? ? if (err)
>>> >> + ? ? ? ? ? ? ? ? ? ? goto out;
>>> >> +
>>> >> + ? ? ? ? ? ? do {
>>> >> + ? ? ? ? ? ? ? ? ? ? if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free++;
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (!entry)
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = fatent.entry;
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len && free >= minlen) {
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>>
>> I think you can remove this condition completely is you move "done"
>> label befor the if (free >= minlen) condition.
>>
>>> > I really do not understand FAT code very much, but is this right ?
>>> > Should not you be setting free = 0 ?
>>> Right but count is larger then "len". it will exit at next if (count
>>> >= len) goto done statement.
>>
>> Oh, ok I see. But it proves my point that this code is not very well
>> readable.
>>
>>>
>>> > What will happen if you'll end up
>>> > in the same branch in next iteration ? -- free will be still set to
>>> > previous value+1, bu you'll be discarding next entry. I am sorry but
>>> > this whole thing is not very readable.
>>>
>>> >
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ^^^^^^^^^^^^^^^^^^
>> this
>
> Okay I will fix it.
>>
>>> >> + ? ? ? ? ? ? ? ? ? ? } else if (entry) {
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (free >= minlen) {
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ^^^^^^^^^^^^^^^^^^
>> and this
>>
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? free = 0;
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = 0;
>>> >> + ? ? ? ? ? ? ? ? ? ? }
>>> >
>>> > I don't not see why you are testing count all the time since it has not been
>>> > changed since the "if" condition started. how about doing one test
>>> > before the "if" condition ?
>>>
>>> For looping the whole fat max_cluster size. now start can be any
>>> address and if start is a middle point and len is max, then it will
>>> search from middle -> end -> start -> middle. The "count" is used for
>>> this purpose.
>>>
>>> Thank you,
>>> Kyungmin Park
>>> >
>> can go here -->>
>> ?? ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> ?? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>>
>>> >> + ? ? ? ? ? ? ? ? ? ? count++;
>>> >> + ? ? ? ? ? ? } while (fat_ent_next(sbi, &fatent));
>>> >> + ? ? }

How about this code?

while (count < sbi->max_cluster) {
if (fatent.entry >= sbi->max_cluster)
fatent.entry = FAT_START_ENT;
/* readahead of fat blocks */
if ((cur_block & reada_mask) == 0) {
unsigned long rest = sbi->fat_length - cur_block;
fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
}
cur_block++;

err = fat_ent_read_block(sb, &fatent);
if (err)
goto out;

do {
if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
free++;
if (!entry)
entry = fatent.entry;
} else if (entry) {
/*
* Discard if free entry is equal or greater
* than minimum length
*/
if (free >= minlen) {
fat_issue_discard(sb, entry, free);
trimmed += free;
}
free = 0;
entry = 0;
}
count++;
/* Check the loop count */
if (count >= len)
goto done;
} while (fat_ent_next(sbi, &fatent));
}
done:
if (free >= minlen) {
fat_issue_discard(sb, entry, free);
trimmed += free;
}
range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
fatent_brelse(&fatent);
out:
unlock_fat(sbi);
return err;


>>> >> + ? ? if (free >= minlen) {
>>> >> + ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>>> >> + ? ? ? ? ? ? trimmed += free;
>>> >> + ? ? }
>>> >> +done:
>>> >> + ? ? range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
>>> >> + ? ? fatent_brelse(&fatent);
>>> >> +out:
>>> >> + ? ? unlock_fat(sbi);
>>> >> + ? ? return err;
>>> >> +}
>>> >> diff --git a/fs/fat/file.c b/fs/fat/file.c
>>> >> index 7257752..9910aba 100644
>>> >> --- a/fs/fat/file.c
>>> >> +++ b/fs/fat/file.c
>>> >> @@ -125,6 +125,36 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>>> >> ? ? ? ? ? ? ? return fat_ioctl_get_attributes(inode, user_attr);
>>> >> ? ? ? case FAT_IOCTL_SET_ATTRIBUTES:
>>> >> ? ? ? ? ? ? ? return fat_ioctl_set_attributes(filp, user_attr);
>>> >> + ? ? case FITRIM:
>>> >> + ? ? {
>>> >> + ? ? ? ? ? ? struct super_block *sb = inode->i_sb;
>>> >> + ? ? ? ? ? ? struct request_queue *q = bdev_get_queue(sb->s_bdev);
>>> >> + ? ? ? ? ? ? struct fstrim_range range;
>>> >> + ? ? ? ? ? ? int ret = 0;
>>> >> +
>>> >> + ? ? ? ? ? ? if (!capable(CAP_SYS_ADMIN))
>>> >> + ? ? ? ? ? ? ? ? ? ? return -EPERM;
>>> >> +
>>> >> + ? ? ? ? ? ? if (!blk_queue_discard(q))
>>> >> + ? ? ? ? ? ? ? ? ? ? return -EOPNOTSUPP;
>>> >> +
>>> >> + ? ? ? ? ? ? if (copy_from_user(&range, (struct fstrim_range *)arg,
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>>> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>>> >> +
>>> >> + ? ? ? ? ? ? range.minlen = max((unsigned int)range.minlen,
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? q->limits.discard_granularity);
>>> >> + ? ? ? ? ? ? ret = fat_trim_fs(sb, &range);
>>> >> + ? ? ? ? ? ? if (ret < 0)
>>> >> + ? ? ? ? ? ? ? ? ? ? return ret;
>>> >> +
>>> >> + ? ? ? ? ? ? if (copy_to_user((struct fstrim_range *)arg, &range,
>>> >> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sizeof(range)))
>>> >> + ? ? ? ? ? ? ? ? ? ? return -EFAULT;
>>> >> +
>>> >> + ? ? ? ? ? ? return 0;
>>> >> + ? ? }
>>> >> +
>>> >> ? ? ? default:
>>> >> ? ? ? ? ? ? ? return -ENOTTY; /* Inappropriate ioctl for device */
>>> >> ? ? ? }
>>> >>
>>> >
>>> > --
>>> >
>>>
>>
>> --
>

2011-03-02 12:10:48

by Lukas Czerner

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Wed, 2 Mar 2011, Kyungmin Park wrote:

> On Wed, Mar 2, 2011 at 11:07 AM, Kyungmin Park <[email protected]> wrote:
> > On Mon, Feb 28, 2011 at 9:51 PM, Lukas Czerner <[email protected]> wrote:
> >> On Mon, 28 Feb 2011, Kyungmin Park wrote:
> >>
> >>> On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
> >>> > On Fri, 25 Feb 2011, Kyungmin Park wrote:
> >>> >
> >>> >> From: Kyungmin Park <[email protected]>
> >>> >>
> >>> >> FAT supports batched discard as ext4.
> >>> >>
> >>> >> Cited from Lukas words.
> >>> >> "The current solution is not ideal because of its bad performance impact.
> >>> >> So basic idea to improve things is to avoid discarding every time some
> >>> >> blocks are freed. and instead batching is together into bigger trims,
> >>> >> which tends to be more effective."
> >>> >>
> >>> >> You can find an information in detail at following URLs.
> >>> >> http://lwn.net/Articles/397538/
> >>> >> http://lwn.net/Articles/383933/
> >>> >>
> >>> >> Clearify the meaning of "len" (Cited form Lukas mail)
> >>> >>
> >>> >> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
> >>> >> Now, we have a filesystem like this.
> >>> >>
> >>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> >>> >> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
> >>> >> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
> >>> >>
> >>> >> This is how it supposed to wotk if you have called FITIRM with parameters:
> >>> >>
> >>> >> start = 0
> >>> >> minlen = 2
> >>> >> len = 20
> >>> >>
> >>> >> So you will go through (blocks, bytes...) 0 -> 20
> >>> >>
> >>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
> >>> >> ? ^ ? ? ? ? ? ? ? ? ? ^
> >>> >> ? 0 ? ? ? ? ? ? ? ? ? 20
> >>> >>
> >>> >> So, you will call discard on extents:
> >>> >>
> >>> >> 0-3
> >>> >> You'll skip 6 because is smaller than minlen
> >>> >> 10-11
> >>> >> 15-19
> >>> >>
> >>> >> instead of
> >>> >>
> >>> >> 0-3
> >>> >> 10-11
> >>> >> 15-19
> >>> >> 30-36
> >>> >
> >>> > Hi thanks for the next version. And again I have to ask: Did you test it
> >>> > ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.
> >>>
> >>> I tested it with your test program. Of course I modified for our
> >>> environment (eMMC).
> >>
> >> Ok, good.
> >>
> >>>
> >>> #include <errno.h>
> >>> #include <fcntl.h>
> >>> #include <stdio.h>
> >>> #include <stdint.h>
> >>> #include <sys/ioctl.h>
> >>>
> >>> struct fstrim_range {
> >>> ? ? ? ? uint64_t start;
> >>> ? ? ? ? uint64_t len;
> >>> ? ? ? ? uint64_t minlen;
> >>> };
> >>>
> >>> #define FITRIM ? ? ? ? ?_IOWR('X', 121, struct fstrim_range)
> >>>
> >>> int main(int argc, char **argv)
> >>> {
> >>> ? ? ? ? struct fstrim_range range;
> >>> ? ? ? ? uint64_t len;
> >>> ? ? ? ? int fd;
> >>>
> >>> ? ? ? ? if (argc < 2) {
> >>> ? ? ? ? ? ? ? ? fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
> >>> ? ? ? ? ? ? ? ? return 1;
> >>> ? ? ? ? }
> >>>
> >>> ? ? ? ? if (argc == 3)
> >>> ? ? ? ? ? ? ? ? len = atoll(argv[1]);
> >>> ? ? ? ? else
> >>> ? ? ? ? ? ? ? ? len = ((1UL<<31) - 1);
> >>>
> >>> ? ? ? ? range.start = 0;
> >>> ? ? ? ? range.len = len;
> >>> ? ? ? ? range.minlen = 256 * 1024; ? ? ?/* Minimum is 256KiB */
> >>
> >> Why exactly you need to set this ? What will happen if the minlen is 0 ?
> >
> > It's dependent on eMMC chip. it's for our environment. If it passed
> > with 0, the code is working but the less than 256KiB trim command is
> > meaningless.

Ok but user would not care, actually he would not even know so it is
good that this will work, but if kernel knows what minlen value is
meaningless maybe you should adjust that properly the same way as you
are adjusting according to the discard_granularity ?

..snip..

>
> How about this code?
>
> while (count < sbi->max_cluster) {
> if (fatent.entry >= sbi->max_cluster)
> fatent.entry = FAT_START_ENT;
> /* readahead of fat blocks */
> if ((cur_block & reada_mask) == 0) {
> unsigned long rest = sbi->fat_length - cur_block;
> fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
> }
> cur_block++;
>
> err = fat_ent_read_block(sb, &fatent);
> if (err)
> goto out;
>
> do {
> if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
> free++;
> if (!entry)
> entry = fatent.entry;
> } else if (entry) {
> /*
> * Discard if free entry is equal or greater
> * than minimum length
> */

This comment states what we can already see in the simple condition, so
it is not needed, but rather comment while() and do..while loops. But as
I said I am not familiar with FAT code so that might be just my fault.

> if (free >= minlen) {
> fat_issue_discard(sb, entry, free);
> trimmed += free;
> }
> free = 0;
> entry = 0;
> }
> count++;
> /* Check the loop count */
same thing with this comment, we can see that in the simple condition.
> if (count >= len)
> goto done;
> } while (fat_ent_next(sbi, &fatent));
> }
> done:
> if (free >= minlen) {
> fat_issue_discard(sb, entry, free);
> trimmed += free;
> }
> range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
> fatent_brelse(&fatent);
> out:
> unlock_fat(sbi);
> return err;
>
It looks better, thanks for your work!

-Lukas


..snip..

2011-03-05 04:25:40

by Kyungmin Park

[permalink] [raw]
Subject: Re: [PATCH v3] fat: Batched discard support for fat

On Wed, Mar 2, 2011 at 9:10 PM, Lukas Czerner <[email protected]> wrote:
> On Wed, 2 Mar 2011, Kyungmin Park wrote:
>
>> On Wed, Mar 2, 2011 at 11:07 AM, Kyungmin Park <[email protected]> wrote:
>> > On Mon, Feb 28, 2011 at 9:51 PM, Lukas Czerner <[email protected]> wrote:
>> >> On Mon, 28 Feb 2011, Kyungmin Park wrote:
>> >>
>> >>> On Fri, Feb 25, 2011 at 8:17 PM, Lukas Czerner <[email protected]> wrote:
>> >>> > On Fri, 25 Feb 2011, Kyungmin Park wrote:
>> >>> >
>> >>> >> From: Kyungmin Park <[email protected]>
>> >>> >>
>> >>> >> FAT supports batched discard as ext4.
>> >>> >>
>> >>> >> Cited from Lukas words.
>> >>> >> "The current solution is not ideal because of its bad performance impact.
>> >>> >> So basic idea to improve things is to avoid discarding every time some
>> >>> >> blocks are freed. and instead batching is together into bigger trims,
>> >>> >> which tends to be more effective."
>> >>> >>
>> >>> >> You can find an information in detail at following URLs.
>> >>> >> http://lwn.net/Articles/397538/
>> >>> >> http://lwn.net/Articles/383933/
>> >>> >>
>> >>> >> Clearify the meaning of "len" (Cited form Lukas mail)
>> >>> >>
>> >>> >> Let the "O" be free (bytes, blocks, whatever), and "=" be used.
>> >>> >> Now, we have a filesystem like this.
>> >>> >>
>> >>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> >>> >> ? ^ ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?^
>> >>> >> ? 0 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?40
>> >>> >>
>> >>> >> This is how it supposed to wotk if you have called FITIRM with parameters:
>> >>> >>
>> >>> >> start = 0
>> >>> >> minlen = 2
>> >>> >> len = 20
>> >>> >>
>> >>> >> So you will go through (blocks, bytes...) 0 -> 20
>> >>> >>
>> >>> >> ? OOOO==O===OO===OOOOO==O===O===OOOOOOO===
>> >>> >> ? ^ ? ? ? ? ? ? ? ? ? ^
>> >>> >> ? 0 ? ? ? ? ? ? ? ? ? 20
>> >>> >>
>> >>> >> So, you will call discard on extents:
>> >>> >>
>> >>> >> 0-3
>> >>> >> You'll skip 6 because is smaller than minlen
>> >>> >> 10-11
>> >>> >> 15-19
>> >>> >>
>> >>> >> instead of
>> >>> >>
>> >>> >> 0-3
>> >>> >> 10-11
>> >>> >> 15-19
>> >>> >> 30-36
>> >>> >
>> >>> > Hi thanks for the next version. And again I have to ask: Did you test it
>> >>> > ? and how ? Did you tried xfstest No. 251 ? Couple of comments bellow.
>> >>>
>> >>> I tested it with your test program. Of course I modified for our
>> >>> environment (eMMC).
>> >>
>> >> Ok, good.
>> >>
>> >>>
>> >>> #include <errno.h>
>> >>> #include <fcntl.h>
>> >>> #include <stdio.h>
>> >>> #include <stdint.h>
>> >>> #include <sys/ioctl.h>
>> >>>
>> >>> struct fstrim_range {
>> >>> ? ? ? ? uint64_t start;
>> >>> ? ? ? ? uint64_t len;
>> >>> ? ? ? ? uint64_t minlen;
>> >>> };
>> >>>
>> >>> #define FITRIM ? ? ? ? ?_IOWR('X', 121, struct fstrim_range)
>> >>>
>> >>> int main(int argc, char **argv)
>> >>> {
>> >>> ? ? ? ? struct fstrim_range range;
>> >>> ? ? ? ? uint64_t len;
>> >>> ? ? ? ? int fd;
>> >>>
>> >>> ? ? ? ? if (argc < 2) {
>> >>> ? ? ? ? ? ? ? ? fprintf(stderr, "usage: %s mountpoint [size]\n", argv[0]);
>> >>> ? ? ? ? ? ? ? ? return 1;
>> >>> ? ? ? ? }
>> >>>
>> >>> ? ? ? ? if (argc == 3)
>> >>> ? ? ? ? ? ? ? ? len = atoll(argv[1]);
>> >>> ? ? ? ? else
>> >>> ? ? ? ? ? ? ? ? len = ((1UL<<31) - 1);
>> >>>
>> >>> ? ? ? ? range.start = 0;
>> >>> ? ? ? ? range.len = len;
>> >>> ? ? ? ? range.minlen = 256 * 1024; ? ? ?/* Minimum is 256KiB */
>> >>
>> >> Why exactly you need to set this ? What will happen if the minlen is 0 ?
>> >
>> > It's dependent on eMMC chip. it's for our environment. If it passed
>> > with 0, the code is working but the less than 256KiB trim command is
>> > meaningless.
>
> Ok but user would not care, actually he would not even know so it is
> good that this will work, but if kernel knows what minlen value is
> meaningless maybe you should adjust that properly the same way as you
> are adjusting according to the discard_granularity ?

It's out of this patch, it will be handled each underlying devices,
e.g., MMC and ATA.
>
> ..snip..
>
>>
>> How about this code?
>>
>> ? ? ? ? while (count < sbi->max_cluster) {
>> ? ? ? ? ? ? ? ? if (fatent.entry >= sbi->max_cluster)
>> ? ? ? ? ? ? ? ? ? ? ? ? fatent.entry = FAT_START_ENT;
>> ? ? ? ? ? ? ? ? /* readahead of fat blocks */
>> ? ? ? ? ? ? ? ? if ((cur_block & reada_mask) == 0) {
>> ? ? ? ? ? ? ? ? ? ? ? ? unsigned long rest = sbi->fat_length - cur_block;
>> ? ? ? ? ? ? ? ? ? ? ? ? fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
>> ? ? ? ? ? ? ? ? }
>> ? ? ? ? ? ? ? ? cur_block++;
>>
>> ? ? ? ? ? ? ? ? err = fat_ent_read_block(sb, &fatent);
>> ? ? ? ? ? ? ? ? if (err)
>> ? ? ? ? ? ? ? ? ? ? ? ? goto out;
>>
>> ? ? ? ? ? ? ? ? do {
>> ? ? ? ? ? ? ? ? ? ? ? ? if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? free++;
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (!entry)
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = fatent.entry;
>> ? ? ? ? ? ? ? ? ? ? ? ? } else if (entry) {
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? /*
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?* Discard if free entry is equal or greater
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?* than minimum length
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?*/
>
> This comment states what we can already see in the simple condition, so
> it is not needed, but rather comment while() and do..while loops. But as
> I said I am not familiar with FAT code so that might be just my fault.

No problem I will delete it.
>
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? if (free >= minlen) {
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? trimmed += free;
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? }
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? free = 0;
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? entry = 0;
>> ? ? ? ? ? ? ? ? ? ? ? ? }
>> ? ? ? ? ? ? ? ? ? ? ? ? count++;
>> ? ? ? ? ? ? ? ? ? ? ? ? /* Check the loop count */
> same thing with this comment, we can see that in the simple condition.
I'll delete it.
>> ? ? ? ? ? ? ? ? ? ? ? ? if (count >= len)
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? goto done;
>> ? ? ? ? ? ? ? ? } while (fat_ent_next(sbi, &fatent));
>> ? ? ? ? }
>> done:
>> ? ? ? ? if (free >= minlen) {
>> ? ? ? ? ? ? ? ? fat_issue_discard(sb, entry, free);
>> ? ? ? ? ? ? ? ? trimmed += free;
>> ? ? ? ? }
>> ? ? ? ? range->len = (trimmed * sbi->sec_per_clus) << sb->s_blocksize_bits;
>> ? ? ? ? fatent_brelse(&fatent);
>> out:
>> ? ? ? ? unlock_fat(sbi);
>> ? ? ? ? return err;
>>
> It looks better, thanks for your work!

I'll resend it.

Thank you,
Kyungmin Park
>
> -Lukas
>
>
> ..snip..