Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751350AbVJ2Fun (ORCPT ); Sat, 29 Oct 2005 01:50:43 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751356AbVJ2Frx (ORCPT ); Sat, 29 Oct 2005 01:47:53 -0400 Received: from ns.ustc.edu.cn ([202.38.64.1]:38795 "EHLO mx1.ustc.edu.cn") by vger.kernel.org with ESMTP id S1751350AbVJ2Frd (ORCPT ); Sat, 29 Oct 2005 01:47:33 -0400 Message-Id: <20051029060244.148591000@localhost.localdomain> References: <20051029060216.159380000@localhost.localdomain> Date: Sat, 29 Oct 2005 14:02:29 +0800 From: Wu Fengguang To: linux-kernel@vger.kernel.org Cc: Andrew Morton , Wu Fengguang Subject: [PATCH 13/13] readahead: page aging accounting Content-Disposition: inline; filename=readahead-account-aging.patch Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 7481 Lines: 227 The accuracy of stateful thrashing-threshold estimation depends largely on the measurement of cold page aging speed. A file named `pageaging' is created in debugfs to monitor the trace of two measurement variables: per-zone `nr_page_aging' and per-cpu `smooth_aging'. Their values and the jiffies are recorded each time one of them has a delta of 1, 1/2, 1/4, 1/16, 1/256, 1/4096 (nr_inactive + nr_free). Sample series of collected data shows that smooth_aging is more stable in small sampling granularity: time dt page_aging8 smooth_aging8 872765 26 520056 33 653782 163 872791 12 520089 132 653945 51 872803 4 520221 132 653996 66 872807 17 520353 165 654062 107 872824 22 520518 99 654169 74 872846 372 520617 99 654243 78 873218 294 520716 99 654321 73 873512 196 520815 99 654394 130 873708 15 520914 231 654524 28 873723 15 521145 198 654552 9 873738 881 521343 99 654561 182 874619 700 521442 0 654743 198 875319 384 521442 66 654941 110 875703 2119 521508 99 655051 1632 877822 3960 521607 0 656683 980 881782 904 521607 0 657663 216 time dt page_aging1 smooth_aging1 -90822 12418 5775 12999 33302 10767 -78404 17510 18774 10303 44069 10345 -60894 24757 29077 9871 54414 14615 -36137 19194 38948 10404 69029 13726 -16943 19636 49352 10440 82755 12865 2693 16299 59792 12453 95620 10734 18992 19851 72245 10073 106354 15960 38843 16099 82318 10767 122314 14059 54942 16094 93085 10041 136373 12117 71036 19888 103126 12595 148490 16155 90924 18452 115721 9782 164645 11705 109376 22395 125503 10214 176350 13679 131771 19310 135717 10759 190029 11843 151081 20793 146476 10699 201872 12595 171874 22308 157175 10321 214467 13157 194182 17954 167496 10773 227624 14803 212136 19946 178269 10554 242427 13391 232082 21051 188823 11179 255818 11783 Signed-off-by: Wu Fengguang --- mm/readahead.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 133 insertions(+), 1 deletion(-) --- linux-2.6.14-rc5-mm1.orig/mm/readahead.c +++ linux-2.6.14-rc5-mm1/mm/readahead.c @@ -207,11 +207,130 @@ static int ra_account_show(struct seq_fi return 0; } +/* + * Measure the aging progress of cold pages over time. + */ +#define AGING_INFO_SIZE (1 << 8) +#define AGING_INFO_MASK (AGING_INFO_SIZE - 1) +static int aging_info_shift[] = {0, 1, 2, 4, 8, 12}; +#define AGING_INFO_SHIFTS (sizeof(aging_info_shift)/\ + sizeof(aging_info_shift[0])) +static int aging_info_index[AGING_INFO_SHIFTS]; +static unsigned long aging_info[AGING_INFO_SIZE][AGING_INFO_SHIFTS*3]; +static spinlock_t aging_info_lock = SPIN_LOCK_UNLOCKED; + +static unsigned long nr_free_inactive(void); +static unsigned long nr_page_aging(void); +static unsigned long nr_smooth_aging(void); + +static void collect_aging_info(void) +{ + int i; + unsigned long mem; + unsigned long page_aging; + unsigned long smooth_aging; + + mem = nr_free_inactive(); + page_aging = nr_page_aging(); + smooth_aging = nr_smooth_aging(); + + spin_lock_irq(&aging_info_lock); + + for (i = AGING_INFO_SHIFTS - 1; i >= 0; i--) { + if (smooth_aging - aging_info[aging_info_index[i]][i*3+2] + + page_aging - aging_info[aging_info_index[i]][i*3+1] > + 2 * (mem >> aging_info_shift[i])) { + aging_info_index[i]++; + aging_info_index[i] &= AGING_INFO_MASK; + aging_info[aging_info_index[i]][i*3] = jiffies; + aging_info[aging_info_index[i]][i*3+1] = page_aging; + aging_info[aging_info_index[i]][i*3+2] = smooth_aging; + } else + break; + } + + spin_unlock_irq(&aging_info_lock); +} + +static void *aginginfo_start(struct seq_file *s, loff_t *pos) +{ + int n = *pos; + int i; + + spin_lock_irq(&aging_info_lock); + + if (!n) { + for (i = 0; i < AGING_INFO_SHIFTS; i++) { + seq_printf(s, "%12s %10s %18s%d %18s%d\t", "time","dt", + "page_aging", aging_info_shift[i], + "smooth_aging", aging_info_shift[i]); + } + seq_puts(s, "\n"); + } + + if (++n < AGING_INFO_SIZE) + return (void *)n; + else + return NULL; +} + +static void *aginginfo_next(struct seq_file *s, void *p, loff_t *pos) +{ + int n = (int)p; + + ++*pos; + return (void *)(++n < AGING_INFO_SIZE ? n : 0); +} + +static void aginginfo_stop(struct seq_file *s, void *p) +{ + spin_unlock_irq(&aging_info_lock); +} + +static int aginginfo_show(struct seq_file *s, void *p) +{ + int n = (int)p; + int i; + int index0; + int index1; + long time; + unsigned long nr1; + unsigned long nr2; + + for (i = 0; i < AGING_INFO_SHIFTS; i++) { + index0 = aging_info_index[i] + n; + index1 = aging_info_index[i] + n + 1; + index0 &= AGING_INFO_MASK; + index1 &= AGING_INFO_MASK; + time = aging_info[index0][i*3]; + nr1 = aging_info[index1][i*3+1] - aging_info[index0][i*3+1]; + nr2 = aging_info[index1][i*3+2] - aging_info[index0][i*3+2]; + seq_printf(s, "%12ld %10lu %10lu %8lu %10lu %8lu\t", + time, aging_info[index1][i*3] - time, + aging_info[index0][i*3+1], nr1, + aging_info[index0][i*3+2], nr2); + } + seq_puts(s, "\n"); + + return 0; +} + static struct dentry *readahead_dentry; +static struct dentry *pageaging_dentry; + +struct seq_operations aginginfo_ops = { + .start = aginginfo_start, + .next = aginginfo_next, + .stop = aginginfo_stop, + .show = aginginfo_show, +}; static int ra_debug_open(struct inode *inode, struct file *file) { - return single_open(file, ra_account_show, NULL); + if (file->f_dentry == readahead_dentry) + return single_open(file, ra_account_show, NULL); + else + return seq_open(file, &aginginfo_ops); } static ssize_t ra_debug_write(struct file *file, const char __user *buf, @@ -231,10 +350,20 @@ static struct file_operations ra_debug_f .release = single_release, }; +static struct file_operations aginginfo_fops = { + .open = ra_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + static int __init readahead_init(void) { readahead_dentry = debugfs_create_file("readahead", 0644, NULL, NULL, &ra_debug_fops); + pageaging_dentry = debugfs_create_file("pageaging", + 0644, NULL, NULL, &aginginfo_fops); return 0; } @@ -1219,6 +1348,9 @@ static inline unsigned long compute_thra else *remain = 0; +#ifdef DEBUG_READAHEAD + collect_aging_info(); +#endif ddprintk("compute_thrashing_threshold: " "ra=%lu=%lu*%lu/%lu, remain %lu for %lu\n", ra_size, stream_shift, global_size, global_shift, -- - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/