LinuxLists.cc - [PATCH 1/1] fat: improve sync performance by grouping writes revised again

2006-11-01 16:24:32

Subject: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

From: Holden Karau <[email protected]> http://www.holdenkarau.com
This is an attempt at improving fat_mirror_bhs in sync mode [namely it
writes all of the data for a backup block, and then blocks untill
finished]. The old behavior would write & block in smaller chunks, so
this should be slightly faster. It also removes the fix me requesting
that it be fixed to behave this way :-)
Signed-off-by: Holden Karau <[email protected]>
---
This is an improved version of an earlier patch, I've cleaned up the
formatting, it now also groups the write for the primary fat block
[so even if the user has only 2 FATs it should still be slightly
faster]. and it now handles running running out of memory more
gracefully. If no one objects, I'd like to see if it would be possible to
put this in the -mm tree to make sure it doesn't eat anyones fs :-)
In case the patch gets mangled, I've put it up at
http://www.holdenkarau.com/~holden/projects/fat/001_improve_fat_sync_performance_revised4.patch

And now for the patch:
--- a/fs/fat/fatent.c 2006-09-19 23:42:06.000000000 -0400
+++ b/fs/fat/fatent.c 2006-11-01 10:58:19.000000000 -0500
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2004, OGAWA Hirofumi
+ * Copyright (C) 2006, Holden Karau [Xandros]
* Released under GPL v2.
*/

@@ -343,52 +344,83 @@ int fat_ent_read(struct inode *inode, st
return ops->ent_get(fatent);
}

-/* FIXME: We can write the blocks as more big chunk. */
-static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
- int nr_bhs)
+
+
+static int fat_mirror_bhs_optw(struct super_block *sb, struct buffer_head **bhs,
+ int nr_bhs , int wait)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
- struct buffer_head *c_bh;
- int err, n, copy;
+ struct buffer_head **c_bh;
+ int err, n, copy, i;

+ /* Always wait if mounted -o sync */
+ if (sb->s_flags & MS_SYNCHRONOUS )
+ wait = 1;
+ c_bh = kmalloc(nr_bhs*(sbi->fats) , GFP_KERNEL);
+ if (NULL == c_bh) {
+ printk(KERN_CRIT "not enough memory to store pointers to FAT blocks, will not sync. Possible data loss\n");
+ err = -ENOMEM;
+ goto error;
+ }
err = 0;
+ i = 0;
for (copy = 1; copy < sbi->fats; copy++) {
sector_t backup_fat = sbi->fat_length * copy;
-
- for (n = 0; n < nr_bhs; n++) {
- c_bh = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
- if (!c_bh) {
- err = -ENOMEM;
- goto error;
+ for (n = 0 ; n < nr_bhs ; n++ ) {
+ c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
+ /* If there is not enough memory, fall back to the old system */
+ if (!c_bh[(copy-1)*nr_bhs+n]) {
+ printk("fat: not enough memory for all blocks , syncing at %d\n" ,(copy-1)*nr_bhs+n);
+ fat_sync_bhs_optw( c_bh+i , (copy-1)*nr_bhs+n-i-1 , wait );
+ /* Free the now sync'd blocks */
+ for (; i < (copy-1)*nr_bhs+n ; i++)
+ brelse(c_bh[i]);
+ /* We try the same block again */
+ c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
+ if (!c_bh[(copy-1)*nr_bhs+n]) {
+ printk(KERN_CRIT "fat:not enough memory for block after existing blocks released. Possible data loss.\n");
+ err = -ENOMEM;
+ goto error;
+ }
}
- memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
- set_buffer_uptodate(c_bh);
- mark_buffer_dirty(c_bh);
- if (sb->s_flags & MS_SYNCHRONOUS)
- err = sync_dirty_buffer(c_bh);
- brelse(c_bh);
- if (err)
- goto error;
+ memcpy(c_bh[(copy-1)*nr_bhs+n]->b_data, bhs[n]->b_data, sb->s_blocksize);
+ set_buffer_uptodate(c_bh[(copy-1)*nr_bhs+n]);
+ mark_buffer_dirty(c_bh[(copy-1)*nr_bhs+n]);
}
}
+
+ if (wait) {
+ for (n = 0 ; n < nr_bhs ; n++)
+ c_bh[nr_bhs*(sbi->fats-1)+n] = bhs[n];
+ }
+ err = fat_sync_bhs_optw( c_bh+i , nr_bhs*(sbi->fats+wait-1)-i , wait );
+ if (err)
+ goto error;
+ for (n = 0; n+i < nr_bhs*(sbi->fats-1); n++ ) {
+ brelse(c_bh[n+i]);
+ }
error:
+ if (NULL != c_bh) {
+ kfree(c_bh);
+ }
return err;
}

+static inline int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
+ int nr_bhs )
+{
+ return fat_mirror_bhs_optw(sb , bhs , nr_bhs, 0);
+}
+
+
int fat_ent_write(struct inode *inode, struct fat_entry *fatent,
int new, int wait)
{
struct super_block *sb = inode->i_sb;
struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
- int err;

ops->ent_put(fatent, new);
- if (wait) {
- err = fat_sync_bhs(fatent->bhs, fatent->nr_bhs);
- if (err)
- return err;
- }
- return fat_mirror_bhs(sb, fatent->bhs, fatent->nr_bhs);
+ return fat_mirror_bhs_optw(sb, fatent->bhs, fatent->nr_bhs , wait);
}

static inline int fat_ent_next(struct msdos_sb_info *sbi,
@@ -505,9 +537,9 @@ out:
fatent_brelse(&fatent);
if (!err) {
if (inode_needs_sync(inode))
- err = fat_sync_bhs(bhs, nr_bhs);
- if (!err)
- err = fat_mirror_bhs(sb, bhs, nr_bhs);
+ err = fat_mirror_bhs_optw(sb , bhs, nr_bhs , 1);
+ else
+ err = fat_mirror_bhs_optw(sb, bhs, nr_bhs , 0 );
}
for (i = 0; i < nr_bhs; i++)
brelse(bhs[i]);
@@ -549,11 +581,6 @@ int fat_free_clusters(struct inode *inod
}

if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
- if (sb->s_flags & MS_SYNCHRONOUS) {
- err = fat_sync_bhs(bhs, nr_bhs);
- if (err)
- goto error;
- }
err = fat_mirror_bhs(sb, bhs, nr_bhs);
if (err)
goto error;
@@ -564,11 +591,6 @@ int fat_free_clusters(struct inode *inod
fat_collect_bhs(bhs, &nr_bhs, &fatent);
} while (cluster != FAT_ENT_EOF);

- if (sb->s_flags & MS_SYNCHRONOUS) {
- err = fat_sync_bhs(bhs, nr_bhs);
- if (err)
- goto error;
- }
err = fat_mirror_bhs(sb, bhs, nr_bhs);
error:
fatent_brelse(&fatent);
--- a/fs/fat/misc.c 2006-09-19 23:42:06.000000000 -0400
+++ b/fs/fat/misc.c 2006-11-01 10:18:08.000000000 -0500
@@ -194,19 +194,28 @@ void fat_date_unix2dos(int unix_date, __

EXPORT_SYMBOL_GPL(fat_date_unix2dos);

-int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
+
+
+int fat_sync_bhs_optw(struct buffer_head **bhs, int nr_bhs ,int wait)
{
int i, err = 0;

ll_rw_block(SWRITE, nr_bhs, bhs);
- for (i = 0; i < nr_bhs; i++) {
- wait_on_buffer(bhs[i]);
- if (buffer_eopnotsupp(bhs[i])) {
- clear_buffer_eopnotsupp(bhs[i]);
- err = -EOPNOTSUPP;
- } else if (!err && !buffer_uptodate(bhs[i]))
- err = -EIO;
+ if (wait) {
+ for (i = 0; i < nr_bhs; i++) {
+ wait_on_buffer(bhs[i]);
+ if (buffer_eopnotsupp(bhs[i])) {
+ clear_buffer_eopnotsupp(bhs[i]);
+ err = -EOPNOTSUPP;
+ } else if (!err && !buffer_uptodate(bhs[i]))
+ err = -EIO;
+ }
}
+
return err;
}

+inline int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs ) {
+ return fat_sync_bhs_optw(bhs , nr_bhs , 1);
+}
+
--- a/include/linux/msdos_fs.h 2006-09-19 23:42:06.000000000 -0400
+++ b/include/linux/msdos_fs.h 2006-10-25 18:53:50.000000000 -0400
@@ -419,6 +419,7 @@ extern int fat_chain_add(struct inode *i
extern int date_dos2unix(unsigned short time, unsigned short date);
extern void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
+extern int fat_sync_bhs_optw(struct buffer_head **bhs, int nr_bhs, int wait);

int fat_cache_init(void);
void fat_cache_destroy(void);

2006-11-01 16:48:04

by Jörn Engel

[permalink] [raw]

Subject: Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

On Wed, 1 November 2006 11:17:50 -0500, Holden Karau wrote:
> + c_bh = kmalloc(nr_bhs*(sbi->fats) , GFP_KERNEL);
> + if (NULL == c_bh) {
> + printk(KERN_CRIT "not enough memory to store pointers to FAT blocks, will not sync. Possible data loss\n");
> + err = -ENOMEM;
> + goto error;
> + }

o I personally hate Yoda code ("Null the pointer is not, young Jedi").
o Old code simply returned -ENOMEM without printk. Assuming this was
sufficient before, the printk can be dropped.
o Some people prefer assigning err outside the condition. It is
supposed to give slightly better code on i386, iirc.

Result would be something like:
c_bh = kmalloc(...
err = -ENOMEM;
if (!c_bh)
goto error;

> + for (n = 0 ; n < nr_bhs ; n++ ) {
> + c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
> + /* If there is not enough memory, fall back to the old system */
> + if (!c_bh[(copy-1)*nr_bhs+n]) {
> + printk("fat: not enough memory for all blocks , syncing at %d\n" ,(copy-1)*nr_bhs+n);

Whether this printk makes sense, I cannot tell.

> + fat_sync_bhs_optw( c_bh+i , (copy-1)*nr_bhs+n-i-1 , wait );
> + /* Free the now sync'd blocks */
> + for (; i < (copy-1)*nr_bhs+n ; i++)
> + brelse(c_bh[i]);
> + /* We try the same block again */
> + c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
> + if (!c_bh[(copy-1)*nr_bhs+n]) {
> + printk(KERN_CRIT "fat:not enough memory for block after existing blocks released. Possible data loss.\n");
> + err = -ENOMEM;
> + goto error;
> + }

As above.

> error:
> + if (NULL != c_bh) {
> + kfree(c_bh);
> + }

kfree(NULL) works just fine. You can remove the condition.

> +int fat_sync_bhs_optw(struct buffer_head **bhs, int nr_bhs ,int wait)
> {
> int i, err = 0;
>
> ll_rw_block(SWRITE, nr_bhs, bhs);
> - for (i = 0; i < nr_bhs; i++) {
> - wait_on_buffer(bhs[i]);
> - if (buffer_eopnotsupp(bhs[i])) {
> - clear_buffer_eopnotsupp(bhs[i]);
> - err = -EOPNOTSUPP;
> - } else if (!err && !buffer_uptodate(bhs[i]))
> - err = -EIO;
> + if (wait) {
> + for (i = 0; i < nr_bhs; i++) {
> + wait_on_buffer(bhs[i]);
> + if (buffer_eopnotsupp(bhs[i])) {
> + clear_buffer_eopnotsupp(bhs[i]);
> + err = -EOPNOTSUPP;
> + } else if (!err && !buffer_uptodate(bhs[i]))
> + err = -EIO;
> + }
> }
> +
> return err;
> }

You could keep the old indentation if your condition was changed to

if (!wait)
return 0;

J?rn

--
You can take my soul, but not my lack of enthusiasm.
-- Wally

2006-11-01 18:02:16

by Holden Karau

[permalink] [raw]

Subject: Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

On 11/1/06, J?rn Engel <[email protected]> wrote:
> On Wed, 1 November 2006 11:17:50 -0500, Holden Karau wrote:
> > + c_bh = kmalloc(nr_bhs*(sbi->fats) , GFP_KERNEL);
> > + if (NULL == c_bh) {
> > + printk(KERN_CRIT "not enough memory to store pointers to FAT blocks, will not sync. Possible data loss\n");
> > + err = -ENOMEM;
> > + goto error;
> > + }
>
> o I personally hate Yoda code ("Null the pointer is not, young Jedi").
> o Old code simply returned -ENOMEM without printk. Assuming this was
> sufficient before, the printk can be dropped.
Ok, I'll drop the printk
> o Some people prefer assigning err outside the condition. It is
> supposed to give slightly better code on i386, iirc.
>
> Result would be something like:
> c_bh = kmalloc(...
> err = -ENOMEM;
> if (!c_bh)
> goto error;
That wouldn't work so well since we always return err, and possibly
slightly better code for i386 doesn't seem all that worth it.
>
> > + for (n = 0 ; n < nr_bhs ; n++ ) {
> > + c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
> > + /* If there is not enough memory, fall back to the old system */
> > + if (!c_bh[(copy-1)*nr_bhs+n]) {
> > + printk("fat: not enough memory for all blocks , syncing at %d\n" ,(copy-1)*nr_bhs+n);
>
> Whether this printk makes sense, I cannot tell.
I suppose I might as well drop it.
>
> > + fat_sync_bhs_optw( c_bh+i , (copy-1)*nr_bhs+n-i-1 , wait );
> > + /* Free the now sync'd blocks */
> > + for (; i < (copy-1)*nr_bhs+n ; i++)
> > + brelse(c_bh[i]);
> > + /* We try the same block again */
> > + c_bh[(copy-1)*nr_bhs+n] = sb_getblk(sb, backup_fat + bhs[n]->b_blocknr);
> > + if (!c_bh[(copy-1)*nr_bhs+n]) {
> > + printk(KERN_CRIT "fat:not enough memory for block after existing blocks released. Possible data loss.\n");
Based on the same reasoning you provided, I should probably drop this one too.
> > + err = -ENOMEM;
> > + goto error;
> > + }
>
> As above.
I'll drop the printk, but the same holds true about err
>
> > error:
> > + if (NULL != c_bh) {
> > + kfree(c_bh);
> > + }
>
> kfree(NULL) works just fine. You can remove the condition.
Thanks, I should have checked that :-)
>
> > +int fat_sync_bhs_optw(struct buffer_head **bhs, int nr_bhs ,int wait)
> > {
> > int i, err = 0;
> >
> > ll_rw_block(SWRITE, nr_bhs, bhs);
> > - for (i = 0; i < nr_bhs; i++) {
> > - wait_on_buffer(bhs[i]);
> > - if (buffer_eopnotsupp(bhs[i])) {
> > - clear_buffer_eopnotsupp(bhs[i]);
> > - err = -EOPNOTSUPP;
> > - } else if (!err && !buffer_uptodate(bhs[i]))
> > - err = -EIO;
> > + if (wait) {
> > + for (i = 0; i < nr_bhs; i++) {
> > + wait_on_buffer(bhs[i]);
> > + if (buffer_eopnotsupp(bhs[i])) {
> > + clear_buffer_eopnotsupp(bhs[i]);
> > + err = -EOPNOTSUPP;
> > + } else if (!err && !buffer_uptodate(bhs[i]))
> > + err = -EIO;
> > + }
> > }
> > +
> > return err;
> > }
>
> You could keep the old indentation if your condition was changed to
>
> if (!wait)
> return 0;
Sounds good.
>
> J?rn
>
> --
> You can take my soul, but not my lack of enthusiasm.
> -- Wally
>

--
Cell: 613-276-1645

2006-11-01 20:24:42

by Jörn Engel

[permalink] [raw]

Subject: Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

On Wed, 1 November 2006 13:02:12 -0500, Holden Karau wrote:
> On 11/1/06, J?rn Engel <[email protected]> wrote:
> >
> >Result would be something like:
> > c_bh = kmalloc(...
> > err = -ENOMEM;
> > if (!c_bh)
> > goto error;
> That wouldn't work so well since we always return err,

I don't quite follow. If the branch is taken, err is -ENOMEM. If the
branch is not taken, err is set to 0 with the next instruction.

Both methods definitely work. Whether one is preferrable over the
other is imo 90% taste and maybe 10% better code on some architecture.
So just pick what you prefer.

J?rn

--
Unless something dramatically changes, by 2015 we'll be largely
wondering what all the fuss surrounding Linux was really about.
-- Rob Enderle

2006-11-01 20:51:54

by Phillip Susi

[permalink] [raw]

Subject: Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

I think this is getting into micro-optimization, which is usually bad.
Also moving the assignment of err outside the body of the if only
results in slightly faster code in the case where there is an error,
since you can test and _maybe_ conditionally jump directly to the error:
label if it is not very far away. With the assignment in the body, the
conditional jump must jump to the assignment followed by an
unconditional jump to the label.

In other words, the only time this micro optimization will be of benefit
is if you are erroring out most of the time rather than only under
exceptional conditions, AND the error label isn't too far away for a
conditional branch to reach. In other words, just don't do it ;)

J?rn Engel wrote:
> On Wed, 1 November 2006 13:02:12 -0500, Holden Karau wrote:
>> On 11/1/06, J?rn Engel <[email protected]> wrote:
>>> Result would be something like:
>>> c_bh = kmalloc(...
>>> err = -ENOMEM;
>>> if (!c_bh)
>>> goto error;
>> That wouldn't work so well since we always return err,
>
> I don't quite follow. If the branch is taken, err is -ENOMEM. If the
> branch is not taken, err is set to 0 with the next instruction.
>
> Both methods definitely work. Whether one is preferrable over the
> other is imo 90% taste and maybe 10% better code on some architecture.
> So just pick what you prefer.
>
> J?rn
>

2006-11-01 22:17:52

by Holden Karau

[permalink] [raw]

Subject: Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again

Hi J?rn,

If I do
c_bh = kmalloc(blah);
err= -ENOMEM;
if (!c_bh)
goto error;
//here err = -ENOMEM
... do some stuff...
error:
return err;

It will return -ENOMEM, no? I mean I could set err back to 0 and do
something like:

c_bh = kmalloc(blah);
err= -ENOMEM;
if (!c_bh)
goto error;
err = 0;
... do some stuf...
error:
return err;

At first glance, at least for me, I'd be scratching my head when I
looked at that.

Also given that this error state is to be an exception not the rule,
if what Phillip suggests is correct, than it would actually be a tiney
be slower. So, all in all I'd rather leave it the way it is :-)

On 11/1/06, Phillip Susi <[email protected]> wrote:
> I think this is getting into micro-optimization, which is usually bad.
> Also moving the assignment of err outside the body of the if only
> results in slightly faster code in the case where there is an error,
> since you can test and _maybe_ conditionally jump directly to the error:
> label if it is not very far away. With the assignment in the body, the
> conditional jump must jump to the assignment followed by an
> unconditional jump to the label.
>
> In other words, the only time this micro optimization will be of benefit
> is if you are erroring out most of the time rather than only under
> exceptional conditions, AND the error label isn't too far away for a
> conditional branch to reach. In other words, just don't do it ;)
>
> J?rn Engel wrote:
> > On Wed, 1 November 2006 13:02:12 -0500, Holden Karau wrote:
> >> On 11/1/06, J?rn Engel <[email protected]> wrote:
> >>> Result would be something like:
> >>> c_bh = kmalloc(...
> >>> err = -ENOMEM;
> >>> if (!c_bh)
> >>> goto error;
> >> That wouldn't work so well since we always return err,
> >
> > I don't quite follow. If the branch is taken, err is -ENOMEM. If the
> > branch is not taken, err is set to 0 with the next instruction.
> >
> > Both methods definitely work. Whether one is preferrable over the
> > other is imo 90% taste and maybe 10% better code on some architecture.
> > So just pick what you prefer.
> >
> > J?rn
> >
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>

--
Cell: 613-276-1645

2006-11-02 10:25:17

by Jörn Engel

[permalink] [raw]

Subject: historical micro-optimizations (Re: [PATCH 1/1] fat: improve sync performance by grouping writes revised again)

On Wed, 1 November 2006 15:52:09 -0500, Phillip Susi wrote:
>
> In other words, the only time this micro optimization will be of benefit
> is if you are erroring out most of the time rather than only under
> exceptional conditions, AND the error label isn't too far away for a
> conditional branch to reach. In other words, just don't do it ;)

The difference was in code size, so the icache impact would have
benefitted the good case as well. "was" and "would have" because I
finally got off my lazy arse and tested the code. With gcc 4.12 both
variants compiled to exactly the same code. With 2.95 there was a one
instruction (2 bytes) difference.

I didn't test all the versions in between, but the advantage is
definitely a thing of the past.

And even if the 2 byte difference still existed, it wouldn't really
matter much, we all agree on that. That's why I said:

> >Both methods definitely work. Whether one is preferrable over the
> >other is imo 90% taste and maybe 10% better code on some architecture.
> >So just pick what you prefer.

The only thing I was arguing was that one method would not work - it
does. So I hope this was sufficient distraction for everyone and we
can get back to work. :)

J?rn

--
You can't tell where a program is going to spend its time. Bottlenecks
occur in surprising places, so don't try to second guess and put in a
speed hack until you've proven that's where the bottleneck is.
-- Rob Pike