2003-11-22 00:52:59

by Matthew Dobson

[permalink] [raw]
Subject: [RFC] Make balance_dirty_pages zone aware (2/2)

diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-setup_perzone_counters/include/linux/writeback.h linux-2.6.0-test9-numafy_balance_dirty_pages/include/linux/writeback.h
--- linux-2.6.0-test9-setup_perzone_counters/include/linux/writeback.h Wed Nov 19 15:22:48 2003
+++ linux-2.6.0-test9-numafy_balance_dirty_pages/include/linux/writeback.h Fri Nov 21 11:16:09 2003
@@ -84,7 +84,7 @@ int dirty_writeback_centisecs_handler(st
void __user *, size_t *);

void page_writeback_init(void);
-int balance_dirty_pages_ratelimited(struct address_space *mapping);
+int balance_dirty_pages_ratelimited(struct address_space *mapping, struct page *page);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
ssize_t sync_page_range(struct inode *inode, struct address_space *mapping,
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-setup_perzone_counters/mm/filemap.c linux-2.6.0-test9-numafy_balance_dirty_pages/mm/filemap.c
--- linux-2.6.0-test9-setup_perzone_counters/mm/filemap.c Thu Nov 20 13:40:18 2003
+++ linux-2.6.0-test9-numafy_balance_dirty_pages/mm/filemap.c Fri Nov 21 11:16:48 2003
@@ -1991,7 +1991,7 @@ __generic_file_aio_write_nolock(struct k
page_cache_release(page);
if (status < 0)
break;
- status = balance_dirty_pages_ratelimited(mapping);
+ status = balance_dirty_pages_ratelimited(mapping, page);
if (status < 0) {
pr_debug("async balance_dirty_pages\n");
break;
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.0-test9-setup_perzone_counters/mm/page-writeback.c linux-2.6.0-test9-numafy_balance_dirty_pages/mm/page-writeback.c
--- linux-2.6.0-test9-setup_perzone_counters/mm/page-writeback.c Wed Nov 19 15:26:22 2003
+++ linux-2.6.0-test9-numafy_balance_dirty_pages/mm/page-writeback.c Fri Nov 21 15:15:30 2003
@@ -105,18 +105,34 @@ static void background_writeout(unsigned
* clamping level.
*/
static void
-get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
+get_dirty_limits_zone(struct page_state *ps, long *pbackground, long *pdirty, struct zone *zone)
{
int background_ratio; /* Percentages */
int dirty_ratio;
int unmapped_ratio;
long background;
long dirty;
+ long zone_total_pages;
struct task_struct *tsk;

- get_page_state(ps);
+ if (!zone) {
+ /* Getting system-wide dirty limits */
+ get_page_state(ps);
+ zone_total_pages = total_pages;
+ } else {
+ /* Getting dirty limits for a specific zone */
+ memset(ps, 0, sizeof(*ps));
+ get_page_state_zone(ps, zone);
+ zone_total_pages = zone->present_pages - zone->pages_high;
+ if (zone_total_pages <= 0)
+ /* Not sure about this. We really shouldn't be using
+ * any zones that don't have at least pages_high pages
+ * present... At least it won't blow up this way?
+ */
+ zone_total_pages = zone->present_pages;
+ }

- unmapped_ratio = 100 - (ps->nr_mapped * 100) / total_pages;
+ unmapped_ratio = 100 - (ps->nr_mapped * 100) / zone_total_pages;

dirty_ratio = vm_dirty_ratio;
if (dirty_ratio > unmapped_ratio / 2)
@@ -129,8 +145,8 @@ get_dirty_limits(struct page_state *ps,
if (background_ratio >= dirty_ratio)
background_ratio = dirty_ratio / 2;

- background = (background_ratio * total_pages) / 100;
- dirty = (dirty_ratio * total_pages) / 100;
+ background = (background_ratio * zone_total_pages) / 100;
+ dirty = (dirty_ratio * zone_total_pages) / 100;
tsk = current;
if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
background += background / 4;
@@ -140,14 +156,20 @@ get_dirty_limits(struct page_state *ps,
*pdirty = dirty;
}

+static inline void
+get_dirty_limits(struct page_state *ps, long *pbackground, long *pdirty)
+{
+ get_dirty_limits_zone(ps, pbackground, pdirty, NULL);
+}
+
/*
- * balance_dirty_pages() must be called by processes which are generating dirty
- * data. It looks at the number of dirty pages in the machine and will force
- * the caller to perform writeback if the system is over `vm_dirty_ratio'.
- * If we're over `background_thresh' then pdflush is woken to perform some
- * writeout.
+ * balance_dirty_pages_zone() must be called by processes which are generating
+ * dirty data. It looks at the number of dirty pages in the specified zone
+ * and will force the caller to perform writeback if the zone is over
+ * `vm_dirty_ratio'. If we're over `background_thresh' then pdflush is woken
+ * to perform some writeout.
*/
-static int balance_dirty_pages(struct address_space *mapping)
+static int balance_dirty_pages_zone(struct address_space *mapping, struct zone *zone)
{
struct page_state ps;
long nr_reclaimable;
@@ -167,7 +189,7 @@ static int balance_dirty_pages(struct ad
.nonblocking = !is_sync_wait(current->io_wait)
};

- get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
+ get_dirty_limits_zone(&ps, &background_thresh, &dirty_thresh, zone);
nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
break;
@@ -182,8 +204,8 @@ static int balance_dirty_pages(struct ad
*/
if (nr_reclaimable) {
writeback_inodes(&wbc);
- get_dirty_limits(&ps, &background_thresh,
- &dirty_thresh);
+ get_dirty_limits_zone(&ps, &background_thresh,
+ &dirty_thresh, zone);
nr_reclaimable = ps.nr_dirty + ps.nr_unstable;
if (nr_reclaimable + ps.nr_writeback <= dirty_thresh)
break;
@@ -210,6 +232,7 @@ static int balance_dirty_pages(struct ad
/**
* balance_dirty_pages_ratelimited - balance dirty memory state
* @mapping - address_space which was dirtied
+ * @page - page which was dirtied
*
* Processes which are dirtying memory should call in here once for each page
* which was newly dirtied. The function will periodically check the system's
@@ -220,14 +243,12 @@ static int balance_dirty_pages(struct ad
* decrease the ratelimiting by a lot, to prevent individual processes from
* overshooting the limit by (ratelimit_pages) each.
*/
-int balance_dirty_pages_ratelimited(struct address_space *mapping)
+int balance_dirty_pages_ratelimited(struct address_space *mapping, struct page *page)
{
static DEFINE_PER_CPU(int, ratelimits) = 0;
long ratelimit;

- ratelimit = ratelimit_pages;
- if (dirty_exceeded)
- ratelimit = 8;
+ ratelimit = dirty_exceeded ? 8 : ratelimit_pages;

/*
* Check the rate limiting. Also, we do not want to throttle real-time
@@ -236,7 +257,7 @@ int balance_dirty_pages_ratelimited(stru
if (get_cpu_var(ratelimits)++ >= ratelimit) {
__get_cpu_var(ratelimits) = 0;
put_cpu_var(ratelimits);
- return balance_dirty_pages(mapping);
+ return balance_dirty_pages_zone(mapping, page_zone(page));
}
put_cpu_var(ratelimits);
return 0;


Attachments:
numafy_balance_dirty_pages.patch (6.63 kB)