2008-07-02 08:27:01

by YAMAMOTO Takashi

[permalink] [raw]
Subject: [PATCH] fix task dirty balancing

hi,

task_dirty_inc doesn't seem to be called properly for
filesystems which don't use set_page_dirty for write(2).
eg. ext2 w/o nobh option.

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <[email protected]>
---

commit e68f05bf56d0652c107bba1cff3f8491e41a2117
Author: YAMAMOTO Takashi <[email protected]>
Date: Wed Jul 2 16:17:33 2008 +0900

fix dirty balancing for tasks.

call task_dirty_inc when dirtying a page with mark_buffer_dirty.

diff --git a/fs/buffer.c b/fs/buffer.c
index 4788a9e..2f1c7c6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1219,8 +1219,9 @@ void mark_buffer_dirty(struct buffer_head *bh)
return;
}

- if (!test_set_buffer_dirty(bh))
- __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+ if (!test_set_buffer_dirty(bh) &&
+ __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0))
+ task_dirty_inc(current);
}

/*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index bd91987..61d0aec 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,7 @@ int wakeup_pdflush(long nr_pages);
void laptop_io_completion(void);
void laptop_sync_completion(void);
void throttle_vm_writeout(gfp_t gfp_mask);
+void task_dirty_inc(struct task_struct *);

/* These are exported to sysctl. */
extern int dirty_background_ratio;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 29b1d1e..4dc85d0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

-static inline void task_dirty_inc(struct task_struct *tsk)
+void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
+EXPORT_SYMBOL_GPL(task_dirty_inc);

/*
* Obtain an accurate fraction of the BDI's portion.


2008-07-02 20:32:24

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Wed, 2008-07-02 at 17:26 +0900, YAMAMOTO Takashi wrote:
> hi,
>
> task_dirty_inc doesn't seem to be called properly for
> filesystems which don't use set_page_dirty for write(2).
> eg. ext2 w/o nobh option.

I'm thinking this is an ext2 bug. So I'd rather it'd just call
set_page_dirty() like a proper filesystem instead of doing things like
this.

And I certainly don't like exporting task_dirty_inc() - filesystems and
the like should not have to know about things like that.

Of course I'm utterly ignorant of filesystems, hence lets include more
clue-full people.

> YAMAMOTO Takashi
>
>
> Signed-off-by: YAMAMOTO Takashi <[email protected]>
> ---
>
> commit e68f05bf56d0652c107bba1cff3f8491e41a2117
> Author: YAMAMOTO Takashi <[email protected]>
> Date: Wed Jul 2 16:17:33 2008 +0900
>
> fix dirty balancing for tasks.
>
> call task_dirty_inc when dirtying a page with mark_buffer_dirty.
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 4788a9e..2f1c7c6 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -1219,8 +1219,9 @@ void mark_buffer_dirty(struct buffer_head *bh)
> return;
> }
>
> - if (!test_set_buffer_dirty(bh))
> - __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
> + if (!test_set_buffer_dirty(bh) &&
> + __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0))
> + task_dirty_inc(current);
> }
>
> /*
> diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> index bd91987..61d0aec 100644
> --- a/include/linux/writeback.h
> +++ b/include/linux/writeback.h
> @@ -95,6 +95,7 @@ int wakeup_pdflush(long nr_pages);
> void laptop_io_completion(void);
> void laptop_sync_completion(void);
> void throttle_vm_writeout(gfp_t gfp_mask);
> +void task_dirty_inc(struct task_struct *);
>
> /* These are exported to sysctl. */
> extern int dirty_background_ratio;
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 29b1d1e..4dc85d0 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> }
> EXPORT_SYMBOL_GPL(bdi_writeout_inc);
>
> -static inline void task_dirty_inc(struct task_struct *tsk)
> +void task_dirty_inc(struct task_struct *tsk)
> {
> prop_inc_single(&vm_dirties, &tsk->dirties);
> }
> +EXPORT_SYMBOL_GPL(task_dirty_inc);
>
> /*
> * Obtain an accurate fraction of the BDI's portion.

2008-07-03 08:30:16

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

> On Wed, 2008-07-02 at 17:26 +0900, YAMAMOTO Takashi wrote:
> > hi,
> >
> > task_dirty_inc doesn't seem to be called properly for
> > filesystems which don't use set_page_dirty for write(2).
> > eg. ext2 w/o nobh option.
>
> I'm thinking this is an ext2 bug. So I'd rather it'd just call
> set_page_dirty() like a proper filesystem instead of doing things like
> this.

set_page_dirty marks all buffers in a page dirty.
it isn't suitable for what mark_buffer_dirty is used for.

YAMAMOTO Takashi

>
> And I certainly don't like exporting task_dirty_inc() - filesystems and
> the like should not have to know about things like that.
>
> Of course I'm utterly ignorant of filesystems, hence lets include more
> clue-full people.
>
> > YAMAMOTO Takashi
> >
> >
> > Signed-off-by: YAMAMOTO Takashi <[email protected]>
> > ---
> >
> > commit e68f05bf56d0652c107bba1cff3f8491e41a2117
> > Author: YAMAMOTO Takashi <[email protected]>
> > Date: Wed Jul 2 16:17:33 2008 +0900
> >
> > fix dirty balancing for tasks.
> >
> > call task_dirty_inc when dirtying a page with mark_buffer_dirty.
> >
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 4788a9e..2f1c7c6 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -1219,8 +1219,9 @@ void mark_buffer_dirty(struct buffer_head *bh)
> > return;
> > }
> >
> > - if (!test_set_buffer_dirty(bh))
> > - __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
> > + if (!test_set_buffer_dirty(bh) &&
> > + __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0))
> > + task_dirty_inc(current);
> > }
> >
> > /*
> > diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> > index bd91987..61d0aec 100644
> > --- a/include/linux/writeback.h
> > +++ b/include/linux/writeback.h
> > @@ -95,6 +95,7 @@ int wakeup_pdflush(long nr_pages);
> > void laptop_io_completion(void);
> > void laptop_sync_completion(void);
> > void throttle_vm_writeout(gfp_t gfp_mask);
> > +void task_dirty_inc(struct task_struct *);
> >
> > /* These are exported to sysctl. */
> > extern int dirty_background_ratio;
> > diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> > index 29b1d1e..4dc85d0 100644
> > --- a/mm/page-writeback.c
> > +++ b/mm/page-writeback.c
> > @@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> > }
> > EXPORT_SYMBOL_GPL(bdi_writeout_inc);
> >
> > -static inline void task_dirty_inc(struct task_struct *tsk)
> > +void task_dirty_inc(struct task_struct *tsk)
> > {
> > prop_inc_single(&vm_dirties, &tsk->dirties);
> > }
> > +EXPORT_SYMBOL_GPL(task_dirty_inc);
> >
> > /*
> > * Obtain an accurate fraction of the BDI's portion.

2008-07-05 05:58:48

by Kamezawa Hiroyuki

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Wed, 02 Jul 2008 22:27:18 +0200
Peter Zijlstra <[email protected]> wrote:

> On Wed, 2008-07-02 at 17:26 +0900, YAMAMOTO Takashi wrote:
> > hi,
> >
> > task_dirty_inc doesn't seem to be called properly for
> > filesystems which don't use set_page_dirty for write(2).
> > eg. ext2 w/o nobh option.
>
> I'm thinking this is an ext2 bug. So I'd rather it'd just call
> set_page_dirty() like a proper filesystem instead of doing things like
> this.
>
> And I certainly don't like exporting task_dirty_inc() - filesystems and
> the like should not have to know about things like that.
>
Hmm, a bit complicated for me.

At first, there are 2 __set_page_dirty() in the kernel.
- mm/page-writeback.c: __set_page_dirty()
.... set_page_dirty() calls this.
- fs/buffer.c : __set_page_dirty()
.... __set_page_dirty_buffers() and mark_buffer_dirty() calls this.

Why per-task dirty acconitng is done in mm/page-writeback.c::set_page_dirty() ?

It seems other accounting is done in the fs/buffer.c: __set_page_dirty()

The purpose of task-dirty accounting is different from others ?

= fs/buffer.c
697 static int __set_page_dirty(struct page *page,
698 struct address_space *mapping, int warn)
699 {
700 if (unlikely(!mapping))
701 return !TestSetPageDirty(page);
702
703 if (TestSetPageDirty(page))
704 return 0;
705
706 write_lock_irq(&mapping->tree_lock);
707 if (page->mapping) { /* Race with truncate? */
708 WARN_ON_ONCE(warn && !PageUptodate(page));
709
710 if (mapping_cap_account_dirty(mapping)) {
711 __inc_zone_page_state(page, NR_FILE_DIRTY);
712 __inc_bdi_stat(mapping->backing_dev_info,
713 BDI_RECLAIMABLE);
714 task_io_account_write(PAGE_CACHE_SIZE);
715 }
716 radix_tree_tag_set(&mapping->page_tree,
717 page_index(page), PAGECACHE_TAG_DIRTY);
718 }
719 write_unlock_irq(&mapping->tree_lock);
720 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
721
722 return 1;
==

And task-dirty-limit don't have to take care of following 2 case ?
- __set_page_dirty_nobuffers(struct page *page) (increment BDI_RECRAIMABLE)
- test_set_page_writeback() (increment BDI_RECLAIMABLE)

Thanks,
-Kame



> Of course I'm utterly ignorant of filesystems, hence lets include more
> clue-full people.
>
> > YAMAMOTO Takashi
> >
> >
> > Signed-off-by: YAMAMOTO Takashi <[email protected]>
> > ---
> >
> > commit e68f05bf56d0652c107bba1cff3f8491e41a2117
> > Author: YAMAMOTO Takashi <[email protected]>
> > Date: Wed Jul 2 16:17:33 2008 +0900
> >
> > fix dirty balancing for tasks.
> >
> > call task_dirty_inc when dirtying a page with mark_buffer_dirty.
> >
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 4788a9e..2f1c7c6 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -1219,8 +1219,9 @@ void mark_buffer_dirty(struct buffer_head *bh)
> > return;
> > }
> >
> > - if (!test_set_buffer_dirty(bh))
> > - __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
> > + if (!test_set_buffer_dirty(bh) &&
> > + __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0))
> > + task_dirty_inc(current);
> > }
> >
> > /*
> > diff --git a/include/linux/writeback.h b/include/linux/writeback.h
> > index bd91987..61d0aec 100644
> > --- a/include/linux/writeback.h
> > +++ b/include/linux/writeback.h
> > @@ -95,6 +95,7 @@ int wakeup_pdflush(long nr_pages);
> > void laptop_io_completion(void);
> > void laptop_sync_completion(void);
> > void throttle_vm_writeout(gfp_t gfp_mask);
> > +void task_dirty_inc(struct task_struct *);
> >
> > /* These are exported to sysctl. */
> > extern int dirty_background_ratio;
> > diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> > index 29b1d1e..4dc85d0 100644
> > --- a/mm/page-writeback.c
> > +++ b/mm/page-writeback.c
> > @@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> > }
> > EXPORT_SYMBOL_GPL(bdi_writeout_inc);
> >
> > -static inline void task_dirty_inc(struct task_struct *tsk)
> > +void task_dirty_inc(struct task_struct *tsk)
> > {
> > prop_inc_single(&vm_dirties, &tsk->dirties);
> > }
> > +EXPORT_SYMBOL_GPL(task_dirty_inc);
> >
> > /*
> > * Obtain an accurate fraction of the BDI's portion.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

2008-07-05 09:45:38

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Sat, 2008-07-05 at 15:04 +0900, KAMEZAWA Hiroyuki wrote:
> On Wed, 02 Jul 2008 22:27:18 +0200
> Peter Zijlstra <[email protected]> wrote:
>
> > On Wed, 2008-07-02 at 17:26 +0900, YAMAMOTO Takashi wrote:
> > > hi,
> > >
> > > task_dirty_inc doesn't seem to be called properly for
> > > filesystems which don't use set_page_dirty for write(2).
> > > eg. ext2 w/o nobh option.
> >
> > I'm thinking this is an ext2 bug. So I'd rather it'd just call
> > set_page_dirty() like a proper filesystem instead of doing things like
> > this.
> >
> > And I certainly don't like exporting task_dirty_inc() - filesystems and
> > the like should not have to know about things like that.
> >
> Hmm, a bit complicated for me.
>
> At first, there are 2 __set_page_dirty() in the kernel.
> - mm/page-writeback.c: __set_page_dirty()
> .... set_page_dirty() calls this.
> - fs/buffer.c : __set_page_dirty()
> .... __set_page_dirty_buffers() and mark_buffer_dirty() calls this.
>
> Why per-task dirty acconitng is done in mm/page-writeback.c::set_page_dirty() ?
>
> It seems other accounting is done in the fs/buffer.c: __set_page_dirty()
>
> The purpose of task-dirty accounting is different from others ?
>
> = fs/buffer.c
> 697 static int __set_page_dirty(struct page *page,
> 698 struct address_space *mapping, int warn)
> 699 {
> 700 if (unlikely(!mapping))
> 701 return !TestSetPageDirty(page);
> 702
> 703 if (TestSetPageDirty(page))
> 704 return 0;
> 705
> 706 write_lock_irq(&mapping->tree_lock);
> 707 if (page->mapping) { /* Race with truncate? */
> 708 WARN_ON_ONCE(warn && !PageUptodate(page));
> 709
> 710 if (mapping_cap_account_dirty(mapping)) {
> 711 __inc_zone_page_state(page, NR_FILE_DIRTY);
> 712 __inc_bdi_stat(mapping->backing_dev_info,
> 713 BDI_RECLAIMABLE);
> 714 task_io_account_write(PAGE_CACHE_SIZE);
> 715 }
> 716 radix_tree_tag_set(&mapping->page_tree,
> 717 page_index(page), PAGECACHE_TAG_DIRTY);
> 718 }
> 719 write_unlock_irq(&mapping->tree_lock);
> 720 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> 721
> 722 return 1;
> ==
>
> And task-dirty-limit don't have to take care of following 2 case ?
> - __set_page_dirty_nobuffers(struct page *page) (increment BDI_RECRAIMABLE)
> - test_set_page_writeback() (increment BDI_RECLAIMABLE)


Gah - what a mess...

It's in set_page_dirty() so it wouldn't have to be in all the
a_ops->set_page_dirty() functions...

But now it turns out people don't use set_page_dirty() to dirty
pages :-(

For the purpose of task_dirty_inc() I guess we might as well pair it
with task_io_account_write() for each PAGE_CACHE_SIZE (and ignore the
DIO bit, since that doesn't care about the dirty limit anyway).

Might be my ignorance, but _why_ do we have __set_page_dirty_nobuffers()
reimplemented in fs/buffers.c:__set_page_dirty() ? - those two functions
look suspiciously similar.

Also, why was the EXPORT added anyway - fs/buffers.o never ends up in
modules?

Please beat me to cleaning up this stuff - otherwise I'll have to look
at it when I get back from holidays.

Peter

2008-07-07 06:46:41

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

> Also, why was the EXPORT added anyway - fs/buffers.o never ends up in
> modules?

are you talking about EXPORT_SYMBOL_GPL in my patch?
well, actually, i don't know. :) it might be unnecessary.

YAMAMOTO Takashi

2008-07-07 10:37:29

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Saturday 05 July 2008 19:30, Peter Zijlstra wrote:
> On Sat, 2008-07-05 at 15:04 +0900, KAMEZAWA Hiroyuki wrote:

> > And task-dirty-limit don't have to take care of following 2 case ?
> > - __set_page_dirty_nobuffers(struct page *page) (increment
> > BDI_RECRAIMABLE) - test_set_page_writeback() (increment BDI_RECLAIMABLE)
>
> Gah - what a mess...

It's not so bad once you get past the funny names and conventions :)


> It's in set_page_dirty() so it wouldn't have to be in all the
> a_ops->set_page_dirty() functions...

At some point, they have to actually set the page dirty though, in
which case they would normally call __set_page_dirty_nobuffers or
similar (ie. rather than SetPageDirty, unless they really know what
they're doing).


> But now it turns out people don't use set_page_dirty() to dirty
> pages :-(

They do, but they also use other things :) Filesystems of course are
in complete control of the aops, so they can definitely bypass it.


> For the purpose of task_dirty_inc() I guess we might as well pair it
> with task_io_account_write() for each PAGE_CACHE_SIZE (and ignore the
> DIO bit, since that doesn't care about the dirty limit anyway).

Yes, the dirty increment should go in the same places where we increment
all the dirty statistics... it's the right spot to do it AFAIKS.


> Might be my ignorance, but _why_ do we have __set_page_dirty_nobuffers()
> reimplemented in fs/buffers.c:__set_page_dirty() ? - those two functions
> look suspiciously similar.

Probably no really good reason. The buffers.c code should probably be
merged / unified in page-writeback.c.


> Also, why was the EXPORT added anyway - fs/buffers.o never ends up in
> modules?

Definitely it would. Almost every filesystem can be build modularly. Or
did you mean some other symbol?

2008-07-08 23:38:54

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

> Please beat me to cleaning up this stuff - otherwise I'll have to look
> at it when I get back from holidays.

how about the following?

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <[email protected]>
---

diff --git a/fs/buffer.c b/fs/buffer.c
index 4ffb5bb..1bc833d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -699,41 +699,6 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
EXPORT_SYMBOL(mark_buffer_dirty_inode);

/*
- * Mark the page dirty, and set it dirty in the radix tree, and mark the inode
- * dirty.
- *
- * If warn is true, then emit a warning if the page is not uptodate and has
- * not been truncated.
- */
-static int __set_page_dirty(struct page *page,
- struct address_space *mapping, int warn)
-{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- if (TestSetPageDirty(page))
- return 0;
-
- spin_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
-
- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
- }
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
- }
- spin_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-
- return 1;
-}
-
-/*
* Add a page to the dirty page list.
*
* It is a sad fact of life that this function is called from several places
@@ -762,22 +727,21 @@ int __set_page_dirty_buffers(struct page *page)
{
struct address_space *mapping = page_mapping(page);

- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- spin_lock(&mapping->private_lock);
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
- struct buffer_head *bh = head;
+ if (likely(mapping)) {
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;

- do {
- set_buffer_dirty(bh);
- bh = bh->b_this_page;
- } while (bh != head);
+ do {
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+ }
+ spin_unlock(&mapping->private_lock);
}
- spin_unlock(&mapping->private_lock);

- return __set_page_dirty(page, mapping, 1);
+ return __set_page_dirty_nobuffers(page);
}
EXPORT_SYMBOL(__set_page_dirty_buffers);

@@ -1220,7 +1184,7 @@ void mark_buffer_dirty(struct buffer_head *bh)
}

if (!test_set_buffer_dirty(bh))
- __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
+ __set_page_dirty_nobuffers(bh->b_page);
}

/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 29b1d1e..e6fa69e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

-static inline void task_dirty_inc(struct task_struct *tsk)
+static void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
@@ -1074,12 +1074,14 @@ int __set_page_dirty_no_writeback(struct page *page)
*/
int __set_page_dirty_nobuffers(struct page *page)
{
- if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
- struct address_space *mapping2;
+ struct address_space *mapping;

- if (!mapping)
- return 1;
+ if (TestSetPageDirty(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (likely(mapping)) {
+ struct address_space *mapping2;

spin_lock_irq(&mapping->tree_lock);
mapping2 = page_mapping(page);
@@ -1100,9 +1102,11 @@ int __set_page_dirty_nobuffers(struct page *page)
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- return 1;
}
- return 0;
+
+ task_dirty_inc(current);
+
+ return 1;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

@@ -1122,7 +1126,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-static int __set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);

@@ -1140,14 +1144,6 @@ static int __set_page_dirty(struct page *page)
}
return 0;
}
-
-int set_page_dirty(struct page *page)
-{
- int ret = __set_page_dirty(page);
- if (ret)
- task_dirty_inc(current);
- return ret;
-}
EXPORT_SYMBOL(set_page_dirty);

/*

2008-07-09 07:42:21

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Wednesday 09 July 2008 09:38, YAMAMOTO Takashi wrote:
> hi,
>
> > Please beat me to cleaning up this stuff - otherwise I'll have to look
> > at it when I get back from holidays.
>
> how about the following?

Quite good, however I would like to keep the buffers warning if it isn't
too difficult (it has already caught one or two real bugs). Also, we should
split out the bugfix from the cleanup. But yes overall I think the result
looks quite nice.


> YAMAMOTO Takashi
>
>
> Signed-off-by: YAMAMOTO Takashi <[email protected]>
> ---
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 4ffb5bb..1bc833d 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -699,41 +699,6 @@ void mark_buffer_dirty_inode(struct buffer_head *bh,
> struct inode *inode) EXPORT_SYMBOL(mark_buffer_dirty_inode);
>
> /*
> - * Mark the page dirty, and set it dirty in the radix tree, and mark the
> inode - * dirty.
> - *
> - * If warn is true, then emit a warning if the page is not uptodate and
> has - * not been truncated.
> - */
> -static int __set_page_dirty(struct page *page,
> - struct address_space *mapping, int warn)
> -{
> - if (unlikely(!mapping))
> - return !TestSetPageDirty(page);
> -
> - if (TestSetPageDirty(page))
> - return 0;
> -
> - spin_lock_irq(&mapping->tree_lock);
> - if (page->mapping) { /* Race with truncate? */
> - WARN_ON_ONCE(warn && !PageUptodate(page));
> -
> - if (mapping_cap_account_dirty(mapping)) {
> - __inc_zone_page_state(page, NR_FILE_DIRTY);
> - __inc_bdi_stat(mapping->backing_dev_info,
> - BDI_RECLAIMABLE);
> - task_io_account_write(PAGE_CACHE_SIZE);
> - }
> - radix_tree_tag_set(&mapping->page_tree,
> - page_index(page), PAGECACHE_TAG_DIRTY);
> - }
> - spin_unlock_irq(&mapping->tree_lock);
> - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> -
> - return 1;
> -}
> -
> -/*
> * Add a page to the dirty page list.
> *
> * It is a sad fact of life that this function is called from several
> places @@ -762,22 +727,21 @@ int __set_page_dirty_buffers(struct page
> *page) {
> struct address_space *mapping = page_mapping(page);
>
> - if (unlikely(!mapping))
> - return !TestSetPageDirty(page);
> -
> - spin_lock(&mapping->private_lock);
> - if (page_has_buffers(page)) {
> - struct buffer_head *head = page_buffers(page);
> - struct buffer_head *bh = head;
> + if (likely(mapping)) {
> + spin_lock(&mapping->private_lock);
> + if (page_has_buffers(page)) {
> + struct buffer_head *head = page_buffers(page);
> + struct buffer_head *bh = head;
>
> - do {
> - set_buffer_dirty(bh);
> - bh = bh->b_this_page;
> - } while (bh != head);
> + do {
> + set_buffer_dirty(bh);
> + bh = bh->b_this_page;
> + } while (bh != head);
> + }
> + spin_unlock(&mapping->private_lock);
> }
> - spin_unlock(&mapping->private_lock);
>
> - return __set_page_dirty(page, mapping, 1);
> + return __set_page_dirty_nobuffers(page);
> }
> EXPORT_SYMBOL(__set_page_dirty_buffers);
>
> @@ -1220,7 +1184,7 @@ void mark_buffer_dirty(struct buffer_head *bh)
> }
>
> if (!test_set_buffer_dirty(bh))
> - __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
> + __set_page_dirty_nobuffers(bh->b_page);
> }
>
> /*
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 29b1d1e..e6fa69e 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> }
> EXPORT_SYMBOL_GPL(bdi_writeout_inc);
>
> -static inline void task_dirty_inc(struct task_struct *tsk)
> +static void task_dirty_inc(struct task_struct *tsk)
> {
> prop_inc_single(&vm_dirties, &tsk->dirties);
> }
> @@ -1074,12 +1074,14 @@ int __set_page_dirty_no_writeback(struct page
> *page) */
> int __set_page_dirty_nobuffers(struct page *page)
> {
> - if (!TestSetPageDirty(page)) {
> - struct address_space *mapping = page_mapping(page);
> - struct address_space *mapping2;
> + struct address_space *mapping;
>
> - if (!mapping)
> - return 1;
> + if (TestSetPageDirty(page))
> + return 0;
> +
> + mapping = page_mapping(page);
> + if (likely(mapping)) {
> + struct address_space *mapping2;
>
> spin_lock_irq(&mapping->tree_lock);
> mapping2 = page_mapping(page);
> @@ -1100,9 +1102,11 @@ int __set_page_dirty_nobuffers(struct page *page)
> /* !PageAnon && !swapper_space */
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - return 1;
> }
> - return 0;
> +
> + task_dirty_inc(current);
> +
> + return 1;
> }
> EXPORT_SYMBOL(__set_page_dirty_nobuffers);
>
> @@ -1122,7 +1126,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
> * If the mapping doesn't provide a set_page_dirty a_op, then
> * just fall through and assume that it wants buffer_heads.
> */
> -static int __set_page_dirty(struct page *page)
> +int set_page_dirty(struct page *page)
> {
> struct address_space *mapping = page_mapping(page);
>
> @@ -1140,14 +1144,6 @@ static int __set_page_dirty(struct page *page)
> }
> return 0;
> }
> -
> -int set_page_dirty(struct page *page)
> -{
> - int ret = __set_page_dirty(page);
> - if (ret)
> - task_dirty_inc(current);
> - return ret;
> -}
> EXPORT_SYMBOL(set_page_dirty);
>
> /*

2008-07-10 03:11:05

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

thanks for the review.

> On Wednesday 09 July 2008 09:38, YAMAMOTO Takashi wrote:
> > hi,
> >
> > > Please beat me to cleaning up this stuff - otherwise I'll have to look
> > > at it when I get back from holidays.
> >
> > how about the following?
>
> Quite good, however I would like to keep the buffers warning if it isn't
> too difficult (it has already caught one or two real bugs).

isn't the WARN_ON_ONCE in __set_page_dirty_nobuffers enough?

> Also, we should
> split out the bugfix from the cleanup. But yes overall I think the result
> looks quite nice.

honestly, i don't think it makes much sense to separate the fix and
the cleanup in this particular case. trying to keep the bug while
the code cleanup naturally fixes it, or vice versa, would be a waste
of time.

YAMAMOTO Takashi

>
>
> > YAMAMOTO Takashi
> >
> >
> > Signed-off-by: YAMAMOTO Takashi <[email protected]>
> > ---
> >
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 4ffb5bb..1bc833d 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -699,41 +699,6 @@ void mark_buffer_dirty_inode(struct buffer_head *bh,
> > struct inode *inode) EXPORT_SYMBOL(mark_buffer_dirty_inode);
> >
> > /*
> > - * Mark the page dirty, and set it dirty in the radix tree, and mark the
> > inode - * dirty.
> > - *
> > - * If warn is true, then emit a warning if the page is not uptodate and
> > has - * not been truncated.
> > - */
> > -static int __set_page_dirty(struct page *page,
> > - struct address_space *mapping, int warn)
> > -{
> > - if (unlikely(!mapping))
> > - return !TestSetPageDirty(page);
> > -
> > - if (TestSetPageDirty(page))
> > - return 0;
> > -
> > - spin_lock_irq(&mapping->tree_lock);
> > - if (page->mapping) { /* Race with truncate? */
> > - WARN_ON_ONCE(warn && !PageUptodate(page));
> > -
> > - if (mapping_cap_account_dirty(mapping)) {
> > - __inc_zone_page_state(page, NR_FILE_DIRTY);
> > - __inc_bdi_stat(mapping->backing_dev_info,
> > - BDI_RECLAIMABLE);
> > - task_io_account_write(PAGE_CACHE_SIZE);
> > - }
> > - radix_tree_tag_set(&mapping->page_tree,
> > - page_index(page), PAGECACHE_TAG_DIRTY);
> > - }
> > - spin_unlock_irq(&mapping->tree_lock);
> > - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> > -
> > - return 1;
> > -}
> > -
> > -/*
> > * Add a page to the dirty page list.
> > *
> > * It is a sad fact of life that this function is called from several
> > places @@ -762,22 +727,21 @@ int __set_page_dirty_buffers(struct page
> > *page) {
> > struct address_space *mapping = page_mapping(page);
> >
> > - if (unlikely(!mapping))
> > - return !TestSetPageDirty(page);
> > -
> > - spin_lock(&mapping->private_lock);
> > - if (page_has_buffers(page)) {
> > - struct buffer_head *head = page_buffers(page);
> > - struct buffer_head *bh = head;
> > + if (likely(mapping)) {
> > + spin_lock(&mapping->private_lock);
> > + if (page_has_buffers(page)) {
> > + struct buffer_head *head = page_buffers(page);
> > + struct buffer_head *bh = head;
> >
> > - do {
> > - set_buffer_dirty(bh);
> > - bh = bh->b_this_page;
> > - } while (bh != head);
> > + do {
> > + set_buffer_dirty(bh);
> > + bh = bh->b_this_page;
> > + } while (bh != head);
> > + }
> > + spin_unlock(&mapping->private_lock);
> > }
> > - spin_unlock(&mapping->private_lock);
> >
> > - return __set_page_dirty(page, mapping, 1);
> > + return __set_page_dirty_nobuffers(page);
> > }
> > EXPORT_SYMBOL(__set_page_dirty_buffers);
> >
> > @@ -1220,7 +1184,7 @@ void mark_buffer_dirty(struct buffer_head *bh)
> > }
> >
> > if (!test_set_buffer_dirty(bh))
> > - __set_page_dirty(bh->b_page, page_mapping(bh->b_page), 0);
> > + __set_page_dirty_nobuffers(bh->b_page);
> > }
> >
> > /*
> > diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> > index 29b1d1e..e6fa69e 100644
> > --- a/mm/page-writeback.c
> > +++ b/mm/page-writeback.c
> > @@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> > }
> > EXPORT_SYMBOL_GPL(bdi_writeout_inc);
> >
> > -static inline void task_dirty_inc(struct task_struct *tsk)
> > +static void task_dirty_inc(struct task_struct *tsk)
> > {
> > prop_inc_single(&vm_dirties, &tsk->dirties);
> > }
> > @@ -1074,12 +1074,14 @@ int __set_page_dirty_no_writeback(struct page
> > *page) */
> > int __set_page_dirty_nobuffers(struct page *page)
> > {
> > - if (!TestSetPageDirty(page)) {
> > - struct address_space *mapping = page_mapping(page);
> > - struct address_space *mapping2;
> > + struct address_space *mapping;
> >
> > - if (!mapping)
> > - return 1;
> > + if (TestSetPageDirty(page))
> > + return 0;
> > +
> > + mapping = page_mapping(page);
> > + if (likely(mapping)) {
> > + struct address_space *mapping2;
> >
> > spin_lock_irq(&mapping->tree_lock);
> > mapping2 = page_mapping(page);
> > @@ -1100,9 +1102,11 @@ int __set_page_dirty_nobuffers(struct page *page)
> > /* !PageAnon && !swapper_space */
> > __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> > }
> > - return 1;
> > }
> > - return 0;
> > +
> > + task_dirty_inc(current);
> > +
> > + return 1;
> > }
> > EXPORT_SYMBOL(__set_page_dirty_nobuffers);
> >
> > @@ -1122,7 +1126,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
> > * If the mapping doesn't provide a set_page_dirty a_op, then
> > * just fall through and assume that it wants buffer_heads.
> > */
> > -static int __set_page_dirty(struct page *page)
> > +int set_page_dirty(struct page *page)
> > {
> > struct address_space *mapping = page_mapping(page);
> >
> > @@ -1140,14 +1144,6 @@ static int __set_page_dirty(struct page *page)
> > }
> > return 0;
> > }
> > -
> > -int set_page_dirty(struct page *page)
> > -{
> > - int ret = __set_page_dirty(page);
> > - if (ret)
> > - task_dirty_inc(current);
> > - return ret;
> > -}
> > EXPORT_SYMBOL(set_page_dirty);
> >
> > /*
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

2008-07-10 07:24:21

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Thursday 10 July 2008 13:10, YAMAMOTO Takashi wrote:
> hi,
>
> thanks for the review.
>
> > On Wednesday 09 July 2008 09:38, YAMAMOTO Takashi wrote:
> > > hi,
> > >
> > > > Please beat me to cleaning up this stuff - otherwise I'll have to
> > > > look at it when I get back from holidays.
> > >
> > > how about the following?
> >
> > Quite good, however I would like to keep the buffers warning if it isn't
> > too difficult (it has already caught one or two real bugs).
>
> isn't the WARN_ON_ONCE in __set_page_dirty_nobuffers enough?

No, because it will skip warning if the page has buffers (which is a
very common case for fs/buffer.c :)).


> > Also, we should
> > split out the bugfix from the cleanup. But yes overall I think the result
> > looks quite nice.
>
> honestly, i don't think it makes much sense to separate the fix and
> the cleanup in this particular case. trying to keep the bug while
> the code cleanup naturally fixes it, or vice versa, would be a waste
> of time.

It always makes sense to separate fix and cleanup IMO. The most important
reason is that it makes it clearer to review the fix. Secondarily, it
makes it easier to ensure no unwanted changes in the cleanup part.

2008-07-24 00:27:26

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

> On Thursday 10 July 2008 13:10, YAMAMOTO Takashi wrote:
> > hi,
> >
> > thanks for the review.
> >
> > > On Wednesday 09 July 2008 09:38, YAMAMOTO Takashi wrote:
> > > > hi,
> > > >
> > > > > Please beat me to cleaning up this stuff - otherwise I'll have to
> > > > > look at it when I get back from holidays.
> > > >
> > > > how about the following?
> > >
> > > Quite good, however I would like to keep the buffers warning if it isn't
> > > too difficult (it has already caught one or two real bugs).
> >
> > isn't the WARN_ON_ONCE in __set_page_dirty_nobuffers enough?
>
> No, because it will skip warning if the page has buffers (which is a
> very common case for fs/buffer.c :)).

i see. thanks.

> > > Also, we should
> > > split out the bugfix from the cleanup. But yes overall I think the result
> > > looks quite nice.
> >
> > honestly, i don't think it makes much sense to separate the fix and
> > the cleanup in this particular case. trying to keep the bug while
> > the code cleanup naturally fixes it, or vice versa, would be a waste
> > of time.
>
> It always makes sense to separate fix and cleanup IMO. The most important
> reason is that it makes it clearer to review the fix. Secondarily, it
> makes it easier to ensure no unwanted changes in the cleanup part.

ok, i removed the cleanup part because i don't want to participate in
this kind of discussion. is the following patch ok for you?

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <[email protected]>
---

diff --git a/fs/buffer.c b/fs/buffer.c
index 4ffb5bb..3a89d58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static int __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);

if (TestSetPageDirty(page))
return 0;

- spin_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
+ if (likely(mapping)) {
+ spin_lock_irq(&mapping->tree_lock);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));

- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ __inc_bdi_stat(mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ task_io_account_write(PAGE_CACHE_SIZE);
+ }
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
}
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
+ spin_unlock_irq(&mapping->tree_lock);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- spin_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ task_dirty_inc(current);

return 1;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a4eeb3c..33fd91a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);

/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+void task_dirty_inc(struct task_struct *tsk);

/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 29b1d1e..382f742 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

-static inline void task_dirty_inc(struct task_struct *tsk)
+void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
+EXPORT_SYMBOL_GPL(task_dirty_inc);

/*
* Obtain an accurate fraction of the BDI's portion.
@@ -1074,8 +1075,13 @@ int __set_page_dirty_no_writeback(struct page *page)
*/
int __set_page_dirty_nobuffers(struct page *page)
{
- if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping;
+
+ if (TestSetPageDirty(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (likely(mapping)) {
struct address_space *mapping2;

if (!mapping)
@@ -1100,9 +1106,11 @@ int __set_page_dirty_nobuffers(struct page *page)
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- return 1;
}
- return 0;
+
+ task_dirty_inc(current);
+
+ return 1;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

@@ -1122,7 +1130,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-static int __set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);

@@ -1140,14 +1148,6 @@ static int __set_page_dirty(struct page *page)
}
return 0;
}
-
-int set_page_dirty(struct page *page)
-{
- int ret = __set_page_dirty(page);
- if (ret)
- task_dirty_inc(current);
- return ret;
-}
EXPORT_SYMBOL(set_page_dirty);

/*

2008-07-24 15:09:36

by Nick Piggin

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

Hi,

On Thursday 24 July 2008 10:27, YAMAMOTO Takashi wrote:

> ok, i removed the cleanup part because i don't want to participate in
> this kind of discussion.
> is the following patch ok for you?

Thanks, it looks good. I don't think we need to export task_dirty_inc,
as buffer.c cannot be built as a module. But otherwise,

Acked-by: Nick Piggin <[email protected]>

It would be nice to get this into 2.6.27.


> YAMAMOTO Takashi
>
>
> Signed-off-by: YAMAMOTO Takashi <[email protected]>
> ---
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 4ffb5bb..3a89d58 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
> static int __set_page_dirty(struct page *page,
> struct address_space *mapping, int warn)
> {
> - if (unlikely(!mapping))
> - return !TestSetPageDirty(page);
>
> if (TestSetPageDirty(page))
> return 0;
>
> - spin_lock_irq(&mapping->tree_lock);
> - if (page->mapping) { /* Race with truncate? */
> - WARN_ON_ONCE(warn && !PageUptodate(page));
> + if (likely(mapping)) {
> + spin_lock_irq(&mapping->tree_lock);
> + if (page->mapping) { /* Race with truncate? */
> + WARN_ON_ONCE(warn && !PageUptodate(page));
>
> - if (mapping_cap_account_dirty(mapping)) {
> - __inc_zone_page_state(page, NR_FILE_DIRTY);
> - __inc_bdi_stat(mapping->backing_dev_info,
> - BDI_RECLAIMABLE);
> - task_io_account_write(PAGE_CACHE_SIZE);
> + if (mapping_cap_account_dirty(mapping)) {
> + __inc_zone_page_state(page, NR_FILE_DIRTY);
> + __inc_bdi_stat(mapping->backing_dev_info,
> + BDI_RECLAIMABLE);
> + task_io_account_write(PAGE_CACHE_SIZE);
> + }
> + radix_tree_tag_set(&mapping->page_tree,
> + page_index(page), PAGECACHE_TAG_DIRTY);
> }
> - radix_tree_tag_set(&mapping->page_tree,
> - page_index(page), PAGECACHE_TAG_DIRTY);
> + spin_unlock_irq(&mapping->tree_lock);
> + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - spin_unlock_irq(&mapping->tree_lock);
> - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> +
> + task_dirty_inc(current);
>
> return 1;
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a4eeb3c..33fd91a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *,
> struct vm_fault *);
>
> /* mm/page-writeback.c */
> int write_one_page(struct page *page, int wait);
> +void task_dirty_inc(struct task_struct *tsk);
>
> /* readahead.c */
> #define VM_MAX_READAHEAD 128 /* kbytes */
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 29b1d1e..382f742 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -176,10 +176,11 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> }
> EXPORT_SYMBOL_GPL(bdi_writeout_inc);
>
> -static inline void task_dirty_inc(struct task_struct *tsk)
> +void task_dirty_inc(struct task_struct *tsk)
> {
> prop_inc_single(&vm_dirties, &tsk->dirties);
> }
> +EXPORT_SYMBOL_GPL(task_dirty_inc);
>
> /*
> * Obtain an accurate fraction of the BDI's portion.
> @@ -1074,8 +1075,13 @@ int __set_page_dirty_no_writeback(struct page *page)
> */
> int __set_page_dirty_nobuffers(struct page *page)
> {
> - if (!TestSetPageDirty(page)) {
> - struct address_space *mapping = page_mapping(page);
> + struct address_space *mapping;
> +
> + if (TestSetPageDirty(page))
> + return 0;
> +
> + mapping = page_mapping(page);
> + if (likely(mapping)) {
> struct address_space *mapping2;
>
> if (!mapping)
> @@ -1100,9 +1106,11 @@ int __set_page_dirty_nobuffers(struct page *page)
> /* !PageAnon && !swapper_space */
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - return 1;
> }
> - return 0;
> +
> + task_dirty_inc(current);
> +
> + return 1;
> }
> EXPORT_SYMBOL(__set_page_dirty_nobuffers);
>
> @@ -1122,7 +1130,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
> * If the mapping doesn't provide a set_page_dirty a_op, then
> * just fall through and assume that it wants buffer_heads.
> */
> -static int __set_page_dirty(struct page *page)
> +int set_page_dirty(struct page *page)
> {
> struct address_space *mapping = page_mapping(page);
>
> @@ -1140,14 +1148,6 @@ static int __set_page_dirty(struct page *page)
> }
> return 0;
> }
> -
> -int set_page_dirty(struct page *page)
> -{
> - int ret = __set_page_dirty(page);
> - if (ret)
> - task_dirty_inc(current);
> - return ret;
> -}
> EXPORT_SYMBOL(set_page_dirty);
>
> /*

2008-07-25 08:05:24

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

> Hi,
>
> On Thursday 24 July 2008 10:27, YAMAMOTO Takashi wrote:
>
> > ok, i removed the cleanup part because i don't want to participate in
> > this kind of discussion.
> > is the following patch ok for you?
>
> Thanks, it looks good. I don't think we need to export task_dirty_inc,
> as buffer.c cannot be built as a module. But otherwise,
>
> Acked-by: Nick Piggin <[email protected]>
>
> It would be nice to get this into 2.6.27.

thanks for your review. i removed the unnecessary export.

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <[email protected]>
---

diff --git a/fs/buffer.c b/fs/buffer.c
index 4ffb5bb..3a89d58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static int __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);

if (TestSetPageDirty(page))
return 0;

- spin_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
+ if (likely(mapping)) {
+ spin_lock_irq(&mapping->tree_lock);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));

- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ __inc_bdi_stat(mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ task_io_account_write(PAGE_CACHE_SIZE);
+ }
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
}
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
+ spin_unlock_irq(&mapping->tree_lock);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- spin_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ task_dirty_inc(current);

return 1;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a4eeb3c..33fd91a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);

/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+void task_dirty_inc(struct task_struct *tsk);

/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 29b1d1e..e710481 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

-static inline void task_dirty_inc(struct task_struct *tsk)
+void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
@@ -1074,8 +1074,13 @@ int __set_page_dirty_no_writeback(struct page *page)
*/
int __set_page_dirty_nobuffers(struct page *page)
{
- if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping;
+
+ if (TestSetPageDirty(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (likely(mapping)) {
struct address_space *mapping2;

if (!mapping)
@@ -1100,9 +1105,11 @@ int __set_page_dirty_nobuffers(struct page *page)
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- return 1;
}
- return 0;
+
+ task_dirty_inc(current);
+
+ return 1;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

@@ -1122,7 +1129,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-static int __set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);

@@ -1140,14 +1147,6 @@ static int __set_page_dirty(struct page *page)
}
return 0;
}
-
-int set_page_dirty(struct page *page)
-{
- int ret = __set_page_dirty(page);
- if (ret)
- task_dirty_inc(current);
- return ret;
-}
EXPORT_SYMBOL(set_page_dirty);

/*

2008-07-25 09:57:59

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Fri, 2008-07-25 at 17:04 +0900, YAMAMOTO Takashi wrote:

> Signed-off-by: YAMAMOTO Takashi <[email protected]>
> ---
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 4ffb5bb..3a89d58 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
> static int __set_page_dirty(struct page *page,
> struct address_space *mapping, int warn)
> {
> - if (unlikely(!mapping))
> - return !TestSetPageDirty(page);
>
> if (TestSetPageDirty(page))
> return 0;
>
> - spin_lock_irq(&mapping->tree_lock);
> - if (page->mapping) { /* Race with truncate? */
> - WARN_ON_ONCE(warn && !PageUptodate(page));
> + if (likely(mapping)) {
> + spin_lock_irq(&mapping->tree_lock);
> + if (page->mapping) { /* Race with truncate? */
> + WARN_ON_ONCE(warn && !PageUptodate(page));
>
> - if (mapping_cap_account_dirty(mapping)) {
> - __inc_zone_page_state(page, NR_FILE_DIRTY);
> - __inc_bdi_stat(mapping->backing_dev_info,
> - BDI_RECLAIMABLE);
> - task_io_account_write(PAGE_CACHE_SIZE);
> + if (mapping_cap_account_dirty(mapping)) {
> + __inc_zone_page_state(page, NR_FILE_DIRTY);
> + __inc_bdi_stat(mapping->backing_dev_info,
> + BDI_RECLAIMABLE);
> + task_io_account_write(PAGE_CACHE_SIZE);
> + }
> + radix_tree_tag_set(&mapping->page_tree,
> + page_index(page), PAGECACHE_TAG_DIRTY);
> }
> - radix_tree_tag_set(&mapping->page_tree,
> - page_index(page), PAGECACHE_TAG_DIRTY);
> + spin_unlock_irq(&mapping->tree_lock);
> + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - spin_unlock_irq(&mapping->tree_lock);
> - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> +
> + task_dirty_inc(current);
>
> return 1;
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a4eeb3c..33fd91a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
>
> /* mm/page-writeback.c */
> int write_one_page(struct page *page, int wait);
> +void task_dirty_inc(struct task_struct *tsk);
>
> /* readahead.c */
> #define VM_MAX_READAHEAD 128 /* kbytes */
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 29b1d1e..e710481 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> }
> EXPORT_SYMBOL_GPL(bdi_writeout_inc);
>
> -static inline void task_dirty_inc(struct task_struct *tsk)
> +void task_dirty_inc(struct task_struct *tsk)
> {
> prop_inc_single(&vm_dirties, &tsk->dirties);
> }
> @@ -1074,8 +1074,13 @@ int __set_page_dirty_no_writeback(struct page *page)
> */
> int __set_page_dirty_nobuffers(struct page *page)
> {
> - if (!TestSetPageDirty(page)) {
> - struct address_space *mapping = page_mapping(page);
> + struct address_space *mapping;
> +
> + if (TestSetPageDirty(page))
> + return 0;
> +
> + mapping = page_mapping(page);
> + if (likely(mapping)) {
> struct address_space *mapping2;
>
> if (!mapping)

This results in funny code..

> @@ -1100,9 +1105,11 @@ int __set_page_dirty_nobuffers(struct page *page)
> /* !PageAnon && !swapper_space */
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - return 1;
> }
> - return 0;
> +
> + task_dirty_inc(current);
> +
> + return 1;
> }
> EXPORT_SYMBOL(__set_page_dirty_nobuffers);
>
> @@ -1122,7 +1129,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
> * If the mapping doesn't provide a set_page_dirty a_op, then
> * just fall through and assume that it wants buffer_heads.
> */
> -static int __set_page_dirty(struct page *page)
> +int set_page_dirty(struct page *page)
> {
> struct address_space *mapping = page_mapping(page);
>
> @@ -1140,14 +1147,6 @@ static int __set_page_dirty(struct page *page)
> }
> return 0;
> }
> -
> -int set_page_dirty(struct page *page)
> -{
> - int ret = __set_page_dirty(page);
> - if (ret)
> - task_dirty_inc(current);
> - return ret;
> -}
> EXPORT_SYMBOL(set_page_dirty);
>
> /*

2008-07-28 05:50:43

by YAMAMOTO Takashi

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

hi,

> > @@ -1074,8 +1074,13 @@ int __set_page_dirty_no_writeback(struct page *page)
> > */
> > int __set_page_dirty_nobuffers(struct page *page)
> > {
> > - if (!TestSetPageDirty(page)) {
> > - struct address_space *mapping = page_mapping(page);
> > + struct address_space *mapping;
> > +
> > + if (TestSetPageDirty(page))
> > + return 0;
> > +
> > + mapping = page_mapping(page);
> > + if (likely(mapping)) {
> > struct address_space *mapping2;
> >
> > if (!mapping)
>
> This results in funny code..

oops, you're right.
i removed the dead code.

YAMAMOTO Takashi


Signed-off-by: YAMAMOTO Takashi <[email protected]>
---

diff --git a/fs/buffer.c b/fs/buffer.c
index 4ffb5bb..3a89d58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static int __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);

if (TestSetPageDirty(page))
return 0;

- spin_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
+ if (likely(mapping)) {
+ spin_lock_irq(&mapping->tree_lock);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(warn && !PageUptodate(page));

- if (mapping_cap_account_dirty(mapping)) {
- __inc_zone_page_state(page, NR_FILE_DIRTY);
- __inc_bdi_stat(mapping->backing_dev_info,
- BDI_RECLAIMABLE);
- task_io_account_write(PAGE_CACHE_SIZE);
+ if (mapping_cap_account_dirty(mapping)) {
+ __inc_zone_page_state(page, NR_FILE_DIRTY);
+ __inc_bdi_stat(mapping->backing_dev_info,
+ BDI_RECLAIMABLE);
+ task_io_account_write(PAGE_CACHE_SIZE);
+ }
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
}
- radix_tree_tag_set(&mapping->page_tree,
- page_index(page), PAGECACHE_TAG_DIRTY);
+ spin_unlock_irq(&mapping->tree_lock);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- spin_unlock_irq(&mapping->tree_lock);
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ task_dirty_inc(current);

return 1;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a4eeb3c..33fd91a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);

/* mm/page-writeback.c */
int write_one_page(struct page *page, int wait);
+void task_dirty_inc(struct task_struct *tsk);

/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 29b1d1e..3062f26 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL_GPL(bdi_writeout_inc);

-static inline void task_dirty_inc(struct task_struct *tsk)
+void task_dirty_inc(struct task_struct *tsk)
{
prop_inc_single(&vm_dirties, &tsk->dirties);
}
@@ -1074,12 +1074,14 @@ int __set_page_dirty_no_writeback(struct page *page)
*/
int __set_page_dirty_nobuffers(struct page *page)
{
- if (!TestSetPageDirty(page)) {
- struct address_space *mapping = page_mapping(page);
- struct address_space *mapping2;
+ struct address_space *mapping;

- if (!mapping)
- return 1;
+ if (TestSetPageDirty(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (likely(mapping)) {
+ struct address_space *mapping2;

spin_lock_irq(&mapping->tree_lock);
mapping2 = page_mapping(page);
@@ -1100,9 +1102,11 @@ int __set_page_dirty_nobuffers(struct page *page)
/* !PageAnon && !swapper_space */
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
- return 1;
}
- return 0;
+
+ task_dirty_inc(current);
+
+ return 1;
}
EXPORT_SYMBOL(__set_page_dirty_nobuffers);

@@ -1122,7 +1126,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
* If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads.
*/
-static int __set_page_dirty(struct page *page)
+int set_page_dirty(struct page *page)
{
struct address_space *mapping = page_mapping(page);

@@ -1140,14 +1144,6 @@ static int __set_page_dirty(struct page *page)
}
return 0;
}
-
-int set_page_dirty(struct page *page)
-{
- int ret = __set_page_dirty(page);
- if (ret)
- task_dirty_inc(current);
- return ret;
-}
EXPORT_SYMBOL(set_page_dirty);

/*

2008-07-28 07:24:21

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] fix task dirty balancing

On Mon, 2008-07-28 at 14:50 +0900, YAMAMOTO Takashi wrote:
> hi,
>
> > > @@ -1074,8 +1074,13 @@ int __set_page_dirty_no_writeback(struct page *page)
> > > */
> > > int __set_page_dirty_nobuffers(struct page *page)
> > > {
> > > - if (!TestSetPageDirty(page)) {
> > > - struct address_space *mapping = page_mapping(page);
> > > + struct address_space *mapping;
> > > +
> > > + if (TestSetPageDirty(page))
> > > + return 0;
> > > +
> > > + mapping = page_mapping(page);
> > > + if (likely(mapping)) {
> > > struct address_space *mapping2;
> > >
> > > if (!mapping)
> >
> > This results in funny code..
>
> oops, you're right.
> i removed the dead code.

Looks good

Do we want to tidy stuff up with something like:


diff -u b/fs/buffer.c b/fs/buffer.c
--- b/fs/buffer.c
+++ b/fs/buffer.c
@@ -708,13 +708,16 @@
static int __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
-
if (TestSetPageDirty(page))
return 0;

if (likely(mapping)) {
+ struct address_space *mapping2;
+
spin_lock_irq(&mapping->tree_lock);
- if (page->mapping) { /* Race with truncate? */
+ mapping2 = page->mapping;
+ if (mapping2) { /* Race with truncate? */
+ BUG_ON(mapping2 != mapping);
WARN_ON_ONCE(warn && !PageUptodate(page));

if (mapping_cap_account_dirty(mapping)) {
diff -u b/mm/page-writeback.c b/mm/page-writeback.c
--- b/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1088,6 +1088,7 @@
if (mapping2) { /* Race with truncate? */
BUG_ON(mapping2 != mapping);
WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+
if (mapping_cap_account_dirty(mapping)) {
__inc_zone_page_state(page, NR_FILE_DIRTY);
__inc_bdi_stat(mapping->backing_dev_info,




Acked-by: Peter Zijlstra <[email protected]>

> Signed-off-by: YAMAMOTO Takashi <[email protected]>
> ---
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 4ffb5bb..3a89d58 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -708,27 +708,29 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
> static int __set_page_dirty(struct page *page,
> struct address_space *mapping, int warn)
> {
> - if (unlikely(!mapping))
> - return !TestSetPageDirty(page);
>
> if (TestSetPageDirty(page))
> return 0;
>
> - spin_lock_irq(&mapping->tree_lock);
> - if (page->mapping) { /* Race with truncate? */
> - WARN_ON_ONCE(warn && !PageUptodate(page));
> + if (likely(mapping)) {
> + spin_lock_irq(&mapping->tree_lock);
> + if (page->mapping) { /* Race with truncate? */
> + WARN_ON_ONCE(warn && !PageUptodate(page));
>
> - if (mapping_cap_account_dirty(mapping)) {
> - __inc_zone_page_state(page, NR_FILE_DIRTY);
> - __inc_bdi_stat(mapping->backing_dev_info,
> - BDI_RECLAIMABLE);
> - task_io_account_write(PAGE_CACHE_SIZE);
> + if (mapping_cap_account_dirty(mapping)) {
> + __inc_zone_page_state(page, NR_FILE_DIRTY);
> + __inc_bdi_stat(mapping->backing_dev_info,
> + BDI_RECLAIMABLE);
> + task_io_account_write(PAGE_CACHE_SIZE);
> + }
> + radix_tree_tag_set(&mapping->page_tree,
> + page_index(page), PAGECACHE_TAG_DIRTY);
> }
> - radix_tree_tag_set(&mapping->page_tree,
> - page_index(page), PAGECACHE_TAG_DIRTY);
> + spin_unlock_irq(&mapping->tree_lock);
> + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - spin_unlock_irq(&mapping->tree_lock);
> - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> +
> + task_dirty_inc(current);
>
> return 1;
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index a4eeb3c..33fd91a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1167,6 +1167,7 @@ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *);
>
> /* mm/page-writeback.c */
> int write_one_page(struct page *page, int wait);
> +void task_dirty_inc(struct task_struct *tsk);
>
> /* readahead.c */
> #define VM_MAX_READAHEAD 128 /* kbytes */
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 29b1d1e..3062f26 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -176,7 +176,7 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
> }
> EXPORT_SYMBOL_GPL(bdi_writeout_inc);
>
> -static inline void task_dirty_inc(struct task_struct *tsk)
> +void task_dirty_inc(struct task_struct *tsk)
> {
> prop_inc_single(&vm_dirties, &tsk->dirties);
> }
> @@ -1074,12 +1074,14 @@ int __set_page_dirty_no_writeback(struct page *page)
> */
> int __set_page_dirty_nobuffers(struct page *page)
> {
> - if (!TestSetPageDirty(page)) {
> - struct address_space *mapping = page_mapping(page);
> - struct address_space *mapping2;
> + struct address_space *mapping;
>
> - if (!mapping)
> - return 1;
> + if (TestSetPageDirty(page))
> + return 0;
> +
> + mapping = page_mapping(page);
> + if (likely(mapping)) {
> + struct address_space *mapping2;
>
> spin_lock_irq(&mapping->tree_lock);
> mapping2 = page_mapping(page);
> @@ -1100,9 +1102,11 @@ int __set_page_dirty_nobuffers(struct page *page)
> /* !PageAnon && !swapper_space */
> __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> }
> - return 1;
> }
> - return 0;
> +
> + task_dirty_inc(current);
> +
> + return 1;
> }
> EXPORT_SYMBOL(__set_page_dirty_nobuffers);
>
> @@ -1122,7 +1126,7 @@ EXPORT_SYMBOL(redirty_page_for_writepage);
> * If the mapping doesn't provide a set_page_dirty a_op, then
> * just fall through and assume that it wants buffer_heads.
> */
> -static int __set_page_dirty(struct page *page)
> +int set_page_dirty(struct page *page)
> {
> struct address_space *mapping = page_mapping(page);
>
> @@ -1140,14 +1144,6 @@ static int __set_page_dirty(struct page *page)
> }
> return 0;
> }
> -
> -int set_page_dirty(struct page *page)
> -{
> - int ret = __set_page_dirty(page);
> - if (ret)
> - task_dirty_inc(current);
> - return ret;
> -}
> EXPORT_SYMBOL(set_page_dirty);
>
> /*