Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1760632AbZDRVlB (ORCPT ); Sat, 18 Apr 2009 17:41:01 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1759862AbZDRVi6 (ORCPT ); Sat, 18 Apr 2009 17:38:58 -0400 Received: from mail-fx0-f158.google.com ([209.85.220.158]:59909 "EHLO mail-fx0-f158.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759630AbZDRViy (ORCPT ); Sat, 18 Apr 2009 17:38:54 -0400 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; b=HUb4BTmC935Wmid6hlgE++jEHNbu6TnH8awHC+rlsaoyLDcermW0tfDalAUI8CZ0AU eypX4olr98gRH60hkBY1+7Y/OVBzZrqhzDWlUg7nByndFqAp3toYQB0GIEfeMwUyEDxI F4BJnNzD4VAihhnioK84puIr/TZ0wVxt3bU8Q= From: Andrea Righi To: Paul Menage Cc: Balbir Singh , Gui Jianfeng , KAMEZAWA Hiroyuki , agk@sourceware.org, akpm@linux-foundation.org, axboe@kernel.dk, baramsori72@gmail.com, Carl Henrik Lunde , dave@linux.vnet.ibm.com, Divyesh Shah , eric.rannaud@gmail.com, fernando@oss.ntt.co.jp, Hirokazu Takahashi , Li Zefan , matt@bluehost.com, dradford@bluehost.com, ngupta@google.com, randy.dunlap@oracle.com, roberto@unbit.it, Ryo Tsuruta , Satoshi UCHIDA , subrata@linux.vnet.ibm.com, yoshikawa.takuya@oss.ntt.co.jp, Nauman Rafique , fchecconi@gmail.com, paolo.valente@unimore.it, containers@lists.linux-foundation.org, linux-kernel@vger.kernel.org, Andrea Righi Subject: [PATCH 6/7] io-throttle instrumentation Date: Sat, 18 Apr 2009 23:38:31 +0200 Message-Id: <1240090712-1058-7-git-send-email-righi.andrea@gmail.com> X-Mailer: git-send-email 1.5.6.3 In-Reply-To: <1240090712-1058-1-git-send-email-righi.andrea@gmail.com> References: <1240090712-1058-1-git-send-email-righi.andrea@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8005 Lines: 256 Apply the io-throttle control and page tracking to the opportune kernel functions. Signed-off-by: Andrea Righi --- block/blk-core.c | 8 ++++++++ fs/aio.c | 12 ++++++++++++ fs/buffer.c | 2 ++ include/linux/sched.h | 7 +++++++ kernel/fork.c | 7 +++++++ mm/bounce.c | 2 ++ mm/filemap.c | 2 ++ mm/page-writeback.c | 2 ++ mm/readahead.c | 3 +++ 9 files changed, 45 insertions(+), 0 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 07ab754..4d7f9f6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1547,11 +1548,16 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { + unsigned long sleep = 0; + if (rw & WRITE) { count_vm_events(PGPGOUT, count); + sleep = cgroup_io_throttle(bio, + bio->bi_bdev, bio->bi_size); } else { task_io_account_read(bio->bi_size); count_vm_events(PGPGIN, count); + cgroup_io_throttle(NULL, bio->bi_bdev, bio->bi_size); } if (unlikely(block_dump)) { @@ -1562,6 +1568,8 @@ void submit_bio(int rw, struct bio *bio) (unsigned long long)bio->bi_sector, bdevname(bio->bi_bdev, b)); } + if (sleep && !iothrottle_make_request(bio, jiffies + sleep)) + return; } generic_make_request(bio); diff --git a/fs/aio.c b/fs/aio.c index 76da125..ab6c457 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1587,6 +1588,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, { struct kiocb *req; struct file *file; + struct block_device *bdev; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1609,6 +1611,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!file)) return -EBADF; + /* check if we're exceeding the IO throttling limits */ + bdev = as_to_bdev(file->f_mapping); + ret = cgroup_io_throttle(NULL, bdev, 0); + if (unlikely(ret)) { + fput(file); + return -EAGAIN; + } + req = aio_get_req(ctx); /* returns with 2 references to req */ if (unlikely(!req)) { fput(file); @@ -1652,12 +1662,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, goto out_put_req; spin_lock_irq(&ctx->ctx_lock); + set_in_aio(); aio_run_iocb(req); if (!list_empty(&ctx->run_list)) { /* drain the run list */ while (__aio_run_iocbs(ctx)) ; } + unset_in_aio(); spin_unlock_irq(&ctx->ctx_lock); aio_put_req(req); /* drop extra ref to req */ return 0; diff --git a/fs/buffer.c b/fs/buffer.c index b3e5be7..2eb581f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -668,6 +669,7 @@ static void __set_page_dirty(struct page *page, if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); + iothrottle_set_pagedirty_owner(page, current->mm); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } diff --git a/include/linux/sched.h b/include/linux/sched.h index b4c38bc..e0cd710 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1356,6 +1356,13 @@ struct task_struct { unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ struct task_io_accounting ioac; +#ifdef CONFIG_CGROUP_IO_THROTTLE + atomic_t in_aio; + unsigned long long io_throttle_bw_cnt; + unsigned long long io_throttle_bw_sleep; + unsigned long long io_throttle_iops_cnt; + unsigned long long io_throttle_iops_sleep; +#endif #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd..272c461 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1043,6 +1043,13 @@ static struct task_struct *copy_process(unsigned long clone_flags, task_io_accounting_init(&p->ioac); acct_clear_integrals(p); +#ifdef CONFIG_CGROUP_IO_THROTTLE + atomic_set(&p->in_aio, 0); + p->io_throttle_bw_cnt = 0; + p->io_throttle_bw_sleep = 0; + p->io_throttle_iops_cnt = 0; + p->io_throttle_iops_sleep = 0; +#endif posix_cpu_timers_init(p); p->lock_depth = -1; /* -1 = no lock */ diff --git a/mm/bounce.c b/mm/bounce.c index e590272..80bf52c 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -212,6 +213,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, to->bv_len = from->bv_len; to->bv_offset = from->bv_offset; inc_zone_page_state(to->bv_page, NR_BOUNCE); + iothrottle_copy_page_owner(to->bv_page, page); if (rw == WRITE) { char *vto, *vfrom; diff --git a/mm/filemap.c b/mm/filemap.c index 379ff0b..5498d1d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -464,6 +465,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, gfp_mask & GFP_RECLAIM_MASK); if (error) goto out; + iothrottle_set_page_owner(page, current->mm); error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (error == 0) { diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 30351f0..46cf92e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1243,6 +1244,7 @@ int __set_page_dirty_nobuffers(struct page *page) BUG_ON(mapping2 != mapping); WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page)); account_page_dirtied(page, mapping); + iothrottle_set_pagedirty_owner(page, current->mm); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } diff --git a/mm/readahead.c b/mm/readahead.c index 133b6d5..25cae4c 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -81,6 +82,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, int (*filler)(void *, struct page *), void *data) { struct page *page; + struct block_device *bdev = as_to_bdev(mapping); int ret = 0; while (!list_empty(pages)) { @@ -99,6 +101,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, break; } task_io_account_read(PAGE_CACHE_SIZE); + cgroup_io_throttle(NULL, bdev, PAGE_CACHE_SIZE); } return ret; } -- 1.5.6.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/