2008-01-11 00:36:57

by Andrea Righi

[permalink] [raw]
Subject: [RFC][PATCH] per-task I/O throttling

Allow to limit the bandwidth of I/O-intensive processes, like backup
tools running in background, large files copy, checksums on huge files,
etc.

This kind of processes can noticeably impact the system responsiveness
for some time and playing with tasks' priority is not always an
acceptable solution.

This patch allows to specify a maximum I/O rate in sectors per second
for each single process via /proc/<PID>/io_throttle (default is zero,
that specify no limit).

Signed-off-by: Andrea Righi <[email protected]>
---

diff -urpN linux-2.6.24-rc7/block/ll_rw_blk.c linux-2.6.24-rc7-task-io-throttle/block/ll_rw_blk.c
--- linux-2.6.24-rc7/block/ll_rw_blk.c 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/block/ll_rw_blk.c 2008-01-10 23:23:41.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
#include <linux/scatterlist.h>
+#include <linux/jiffies.h>

/*
* for max sense size
@@ -3184,6 +3185,41 @@ static inline int bio_check_eod(struct b
return 0;
}

+#ifdef CONFIG_TASK_IO_THROTTLE
+static inline void task_io_throttle(int nr_sectors)
+{
+ unsigned long delta;
+ long sleep;
+
+ if (!current->io_throttle) {
+ return;
+ }
+
+ if (!current->io_throttle_timestamp) {
+ current->io_throttle_timestamp = jiffies;
+ }
+ delta = jiffies_to_msecs((long)jiffies -
+ (long)(current->io_throttle_timestamp)) * 1000;
+
+ current->io_throttle_req += nr_sectors;
+
+ sleep = current->io_throttle_req -
+ current->io_throttle * max(delta, (unsigned long)1);
+ if (sleep > 0) {
+ schedule_timeout_uninterruptible(sleep);
+ }
+
+ if (delta) {
+ current->io_throttle_timestamp = jiffies;
+ current->io_throttle_req = 0;
+ }
+}
+#else
+static inline void task_io_throttle(int nr_sectors)
+{
+}
+#endif /* CONFIG_TASK_IO_THROTTLE */
+
/**
* generic_make_request: hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -3221,6 +3257,8 @@ static inline void __generic_make_reques
if (bio_check_eod(bio, nr_sectors))
goto end_io;

+ task_io_throttle(nr_sectors);
+
/*
* Resolve the mapping until finished. (drivers are
* still free to implement/resolve their own stacking
diff -urpN linux-2.6.24-rc7/fs/proc/base.c linux-2.6.24-rc7-task-io-throttle/fs/proc/base.c
--- linux-2.6.24-rc7/fs/proc/base.c 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/fs/proc/base.c 2008-01-10 23:24:43.000000000 +0100
@@ -864,6 +864,56 @@ static const struct file_operations proc
.write = oom_adjust_write,
};

+#ifdef CONFIG_TASK_IO_THROTTLE
+static ssize_t io_throttle_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
+ char buffer[PROC_NUMBUF];
+ size_t len;
+ unsigned long io_throttle;
+
+ if (!task)
+ return -ESRCH;
+ io_throttle = task->io_throttle;
+ put_task_struct(task);
+
+ len = snprintf(buffer, sizeof(buffer), "%lu\n", io_throttle);
+
+ return simple_read_from_buffer(buf, count, ppos, buffer, len);
+}
+
+static ssize_t io_throttle_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char buffer[PROC_NUMBUF], *end;
+ unsigned long io_throttle;
+
+ memset(buffer, 0, sizeof(buffer));
+ if (count > sizeof(buffer) - 1)
+ count = sizeof(buffer) - 1;
+ if (copy_from_user(buffer, buf, count))
+ return -EFAULT;
+ io_throttle = simple_strtoul(buffer, &end, 0);
+ if (*end == '\n')
+ end++;
+ task = get_proc_task(file->f_path.dentry->d_inode);
+ if (!task)
+ return -ESRCH;
+ task->io_throttle = io_throttle;
+ put_task_struct(task);
+ if (end - buffer == 0)
+ return -EIO;
+ return end - buffer;
+}
+
+static const struct file_operations proc_io_throttle_operations = {
+ .read = io_throttle_read,
+ .write = io_throttle_write,
+};
+#endif /* CONFIG_TASK_IO_THROTTLE */
+
#ifdef CONFIG_MMU
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
@@ -2250,6 +2300,9 @@ static const struct pid_entry tgid_base_
#ifdef CONFIG_TASK_IO_ACCOUNTING
INF("io", S_IRUGO, pid_io_accounting),
#endif
+#ifdef CONFIG_TASK_IO_THROTTLE
+ REG("io_throttle", S_IRUGO|S_IWUSR, io_throttle),
+#endif
};

static int proc_tgid_base_readdir(struct file * filp,
diff -urpN linux-2.6.24-rc7/include/linux/sched.h linux-2.6.24-rc7-task-io-throttle/include/linux/sched.h
--- linux-2.6.24-rc7/include/linux/sched.h 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/include/linux/sched.h 2008-01-10 23:23:41.000000000 +0100
@@ -1167,6 +1167,15 @@ struct task_struct {
atomic_t fs_excl; /* holding fs exclusive resources */
struct rcu_head rcu;

+#ifdef CONFIG_TASK_IO_THROTTLE
+ /*
+ * per-process I/O throttle
+ */
+ unsigned long io_throttle;
+ unsigned long io_throttle_req;
+ unsigned long io_throttle_timestamp;
+#endif
+
/*
* cache last used pipe for splice
*/
diff -urpN linux-2.6.24-rc7/init/Kconfig linux-2.6.24-rc7-task-io-throttle/init/Kconfig
--- linux-2.6.24-rc7/init/Kconfig 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-task-io-throttle/init/Kconfig 2008-01-10 23:23:41.000000000 +0100
@@ -206,6 +206,14 @@ config TASK_IO_ACCOUNTING

Say N if unsure.

+config TASK_IO_THROTTLE
+ bool "Enable per-task I/O throttling (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ help
+ Allow to limit the maximum I/O rate for specific process(es).
+
+ Say N if unsure.
+
config USER_NS
bool "User Namespaces (EXPERIMENTAL)"
default n


2008-01-11 01:26:41

by Bill Davidsen

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Andrea Righi wrote:
> Allow to limit the bandwidth of I/O-intensive processes, like backup
> tools running in background, large files copy, checksums on huge files,
> etc.
>
> This kind of processes can noticeably impact the system responsiveness
> for some time and playing with tasks' priority is not always an
> acceptable solution.
>
> This patch allows to specify a maximum I/O rate in sectors per second
> for each single process via /proc/<PID>/io_throttle (default is zero,
> that specify no limit).
>
It would seem to me that this would be vastly more useful in the real
world if there were a settable default, so that administrators could
avoid having to find and tune individual user processes. And it would
seem far less common that the admin would want to set the limit *up* for
a given process, and it's likely to be one known to the admin, at least
by name.

Of course if you want to do the effort to make it fully tunable, it
could have a default by UID or GID. Usful on machines shared by students
or managers.

--
Bill Davidsen <[email protected]>
"We have more to fear from the bungling of the incompetent than from
the machinations of the wicked." - from Slashdot

2008-01-11 10:29:07

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Bill Davidsen wrote:
> Andrea Righi wrote:
>> Allow to limit the bandwidth of I/O-intensive processes, like backup
>> tools running in background, large files copy, checksums on huge files,
>> etc.
>>
>> This kind of processes can noticeably impact the system responsiveness
>> for some time and playing with tasks' priority is not always an
>> acceptable solution.
>>
>> This patch allows to specify a maximum I/O rate in sectors per second
>> for each single process via /proc/<PID>/io_throttle (default is zero,
>> that specify no limit).
>>
> It would seem to me that this would be vastly more useful in the real
> world if there were a settable default, so that administrators could
> avoid having to find and tune individual user processes. And it would
> seem far less common that the admin would want to set the limit *up* for
> a given process, and it's likely to be one known to the admin, at least
> by name.
>
> Of course if you want to do the effort to make it fully tunable, it
> could have a default by UID or GID. Usful on machines shared by students
> or managers.

At the moment I'm simply using it to backup my PC by this wrapper:

$ cat iothrottle
#!/bin/sh
[ $# -lt 2 ] && echo "usage: $0 RATE CMD" && exit 1
rate=$1
shift
$* &
trap "kill -9 $!" SIGINT SIGTERM
[ -e /proc/$!/io_throttle ] && echo $rate >/proc/$!/io_throttle
wait %1
$ ./iothrottle 100 tar ...

But I totally agree with you that setting the limits per-UID/per-GID,
instead of per-task, would be actually more useful.

Maybe a nice approach would be to define the UID/GID upper bounds via
configfs (for example) and allow the users to tune the max I/O rate of
their single tasks according to the defined ranges. In this way it could
be even possible to define I/O shaping policies, i.e. give a bandwidth
of 10MB/s to user A, 20MB/s to user B, 30MB/s to group X, etc.

Anyway, I'm wondering if it's possible (and how) to already do this with
process containers...

-Andrea

2008-01-11 14:05:38

by David Newall

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Andrea Righi wrote:
> [I/O-intensive] processes can noticeably impact the system responsiveness
> for some time and playing with tasks' priority is not always an
> acceptable solution.
>

Why?

2008-01-11 14:21:18

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling


On Fri, 2008-01-11 at 11:28 +0100, Andrea Righi wrote:
> Bill Davidsen wrote:
> > Andrea Righi wrote:
> >> Allow to limit the bandwidth of I/O-intensive processes, like backup
> >> tools running in background, large files copy, checksums on huge files,
> >> etc.
> >>
> >> This kind of processes can noticeably impact the system responsiveness
> >> for some time and playing with tasks' priority is not always an
> >> acceptable solution.
> >>
> >> This patch allows to specify a maximum I/O rate in sectors per second
> >> for each single process via /proc/<PID>/io_throttle (default is zero,
> >> that specify no limit).
> >>
> > It would seem to me that this would be vastly more useful in the real
> > world if there were a settable default, so that administrators could
> > avoid having to find and tune individual user processes. And it would
> > seem far less common that the admin would want to set the limit *up* for
> > a given process, and it's likely to be one known to the admin, at least
> > by name.
> >
> > Of course if you want to do the effort to make it fully tunable, it
> > could have a default by UID or GID. Usful on machines shared by students
> > or managers.
>
> At the moment I'm simply using it to backup my PC by this wrapper:
>
> $ cat iothrottle
> #!/bin/sh
> [ $# -lt 2 ] && echo "usage: $0 RATE CMD" && exit 1
> rate=$1
> shift
> $* &
> trap "kill -9 $!" SIGINT SIGTERM
> [ -e /proc/$!/io_throttle ] && echo $rate >/proc/$!/io_throttle
> wait %1
> $ ./iothrottle 100 tar ...
>
> But I totally agree with you that setting the limits per-UID/per-GID,
> instead of per-task, would be actually more useful.
>
> Maybe a nice approach would be to define the UID/GID upper bounds via
> configfs (for example) and allow the users to tune the max I/O rate of
> their single tasks according to the defined ranges. In this way it could
> be even possible to define I/O shaping policies, i.e. give a bandwidth
> of 10MB/s to user A, 20MB/s to user B, 30MB/s to group X, etc.
>
> Anyway, I'm wondering if it's possible (and how) to already do this with
> process containers...

I think there is an IO controller somewhere based on CFQ.

I don't like this patch, because it throttles requests/s, and that
doesn't say much. If a task would generate a very seeky load it could
still tie up the disk even with a relatively low setting.

2008-01-11 15:30:11

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Peter Zijlstra wrote:
>>
>> Anyway, I'm wondering if it's possible (and how) to already do this with
>> process containers...
>
> I think there is an IO controller somewhere based on CFQ.
>
> I don't like this patch, because it throttles requests/s, and that
> doesn't say much. If a task would generate a very seeky load it could
> still tie up the disk even with a relatively low setting.
>

Very true. A seeky intensive process wouldn't be limited at all. And I'm
sure there're better ways/models to satisfy my needs.

A suggestion (off-list) has been to try with ionice that seems to be the
right solution to limit the I/O activity of single processes, but it
doens't allow to define policies based on UIDs or GIDs.

BTW I don't have any number to compare the effectiveness of the priority
approach vs the throttling approach. Here is a very quick test made on
my PC (not sure if glxgears is the right benchmark to evaluate
the system responsiveness):

>>>>>> starting: glxgears <<<<<<
3564 frames in 5.0 seconds = 711.722 FPS
3953 frames in 5.0 seconds = 790.598 FPS
3969 frames in 5.0 seconds = 793.794 FPS
>>>>>> starting: md5sum /usr/lib/* <<<<<<
3769 frames in 5.0 seconds = 753.189 FPS
2877 frames in 5.0 seconds = 572.843 FPS
3481 frames in 5.0 seconds = 696.071 FPS
3775 frames in 5.0 seconds = 751.404 FPS
2781 frames in 5.0 seconds = 556.118 FPS
3209 frames in 5.0 seconds = 641.064 FPS
2843 frames in 5.0 seconds = 565.697 FPS
>>>>>> starting: echo 100 > /proc/`pidof md5sum`/io_throttle <<<<<<
3652 frames in 5.0 seconds = 730.253 FPS
3669 frames in 5.0 seconds = 733.734 FPS
3797 frames in 5.0 seconds = 759.234 FPS
3883 frames in 5.0 seconds = 776.488 FPS
3895 frames in 5.0 seconds = 778.868 FPS
3845 frames in 5.0 seconds = 768.968 FPS
3829 frames in 5.0 seconds = 765.793 FPS

>>>>>> flush caches (/proc/sys/vm/drop_caches) <<<<<<

>>>>>> starting: glxgears <<<<<<
3763 frames in 5.0 seconds = 752.539 FPS
3818 frames in 5.0 seconds = 763.483 FPS
>>>>>> starting: ionice -c3 md5sum /usr/lib/* <<<<<<
3443 frames in 5.0 seconds = 688.597 FPS
3202 frames in 5.0 seconds = 640.390 FPS
3807 frames in 5.0 seconds = 761.391 FPS
3053 frames in 5.0 seconds = 610.539 FPS
2759 frames in 5.0 seconds = 551.790 FPS
2975 frames in 5.0 seconds = 594.873 FPS
2993 frames in 5.0 seconds = 596.709 FPS
3250 frames in 5.0 seconds = 649.857 FPS
3494 frames in 5.0 seconds = 698.688 FPS

-Andrea

2008-01-11 15:44:25

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

David Newall wrote:
> Andrea Righi wrote:
>> [I/O-intensive] processes can noticeably impact the system responsiveness
>> for some time and playing with tasks' priority is not always an
>> acceptable solution.
>>
>
> Why?
>

Well, I mean, we can't use 'nice' to grant less priority for the I/O
intensive app, because the I/O intensive app itself doesn't need a lot
of CPU. Instead, the I/O-bound app eats all the available I/O bandwidth,
that's a different issue.

-Andrea

2008-01-11 15:59:19

by Balbir Singh

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

On Jan 11, 2008 4:15 AM, Andrea Righi <[email protected]> wrote:
> Allow to limit the bandwidth of I/O-intensive processes, like backup
> tools running in background, large files copy, checksums on huge files,
> etc.
>
> This kind of processes can noticeably impact the system responsiveness
> for some time and playing with tasks' priority is not always an
> acceptable solution.
>
> This patch allows to specify a maximum I/O rate in sectors per second
> for each single process via /proc/<PID>/io_throttle (default is zero,
> that specify no limit).
>
> Signed-off-by: Andrea Righi <[email protected]>

Hi, Andrea,

We have been thinking of doing control group based I/O control. I have
not reviewed your patch in detail. I can suggest looking at openvz's
IO controller. I/O bandwidth control is definitely interesting. How
did you test your solution?

Balbir

2008-01-11 16:33:17

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Balbir Singh wrote:
> On Jan 11, 2008 4:15 AM, Andrea Righi <[email protected]> wrote:
>> Allow to limit the bandwidth of I/O-intensive processes, like backup
>> tools running in background, large files copy, checksums on huge files,
>> etc.
>>
>> This kind of processes can noticeably impact the system responsiveness
>> for some time and playing with tasks' priority is not always an
>> acceptable solution.
>>
>> This patch allows to specify a maximum I/O rate in sectors per second
>> for each single process via /proc/<PID>/io_throttle (default is zero,
>> that specify no limit).
>>
>> Signed-off-by: Andrea Righi <[email protected]>
>
> Hi, Andrea,
>
> We have been thinking of doing control group based I/O control. I have
> not reviewed your patch in detail. I can suggest looking at openvz's
> IO controller. I/O bandwidth control is definitely interesting. How
> did you test your solution?

I don't have meaningful values right now, just did some quick tests with
my pc. Regarding openvz it seems to use the CFQ priority-based approach
(with the 3 priority classes: real time, best effort and idle class).

The interesting feature is that it allows to set a priority for each
process container, but AFAIK it doesn't allow to "partition" the
bandwidth between different containers (that would be a nice feature
IMHO). For example it would be great to be able to define per-container
limits, like assign 10MB/s for processes in container A, 30MB/s to
container B, 20MB/s to container C, etc.

-Andrea

2008-01-12 04:59:21

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:

> The interesting feature is that it allows to set a priority for each
> process container, but AFAIK it doesn't allow to "partition" the
> bandwidth between different containers (that would be a nice feature
> IMHO). For example it would be great to be able to define per-container
> limits, like assign 10MB/s for processes in container A, 30MB/s to
> container B, 20MB/s to container C, etc.

Has anybody considered allocating based on *seeks* rather than bytes moved,
or counting seeks as "virtual bytes" for the purposes of accounting (if the
disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
of data)?


Attachments:
(No filename) (226.00 B)

2008-01-12 09:46:36

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling


On Fri, 2008-01-11 at 23:57 -0500, [email protected] wrote:
> On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:
>
> > The interesting feature is that it allows to set a priority for each
> > process container, but AFAIK it doesn't allow to "partition" the
> > bandwidth between different containers (that would be a nice feature
> > IMHO). For example it would be great to be able to define per-container
> > limits, like assign 10MB/s for processes in container A, 30MB/s to
> > container B, 20MB/s to container C, etc.
>
> Has anybody considered allocating based on *seeks* rather than bytes moved,
> or counting seeks as "virtual bytes" for the purposes of accounting (if the
> disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
> of data)?

I was considering a time scheduler, you can fill your time slot with
seeks or data, it might be what CFQ does, but I've never even read the
code.

2008-01-12 10:57:24

by Balbir Singh

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

* Peter Zijlstra <[email protected]> [2008-01-12 10:46:37]:

>
> On Fri, 2008-01-11 at 23:57 -0500, [email protected] wrote:
> > On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:
> >
> > > The interesting feature is that it allows to set a priority for each
> > > process container, but AFAIK it doesn't allow to "partition" the
> > > bandwidth between different containers (that would be a nice feature
> > > IMHO). For example it would be great to be able to define per-container
> > > limits, like assign 10MB/s for processes in container A, 30MB/s to
> > > container B, 20MB/s to container C, etc.
> >
> > Has anybody considered allocating based on *seeks* rather than bytes moved,
> > or counting seeks as "virtual bytes" for the purposes of accounting (if the
> > disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
> > of data)?
>
> I was considering a time scheduler, you can fill your time slot with
> seeks or data, it might be what CFQ does, but I've never even read the
> code.
>

So far the definition of I/O bandwidth has been w.r.t time. Not all IO
devices have sectors; I'd prefer bytes over a period of time.

--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL

2008-01-12 11:10:51

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling


On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote:
> * Peter Zijlstra <[email protected]> [2008-01-12 10:46:37]:
>
> >
> > On Fri, 2008-01-11 at 23:57 -0500, [email protected] wrote:
> > > On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:
> > >
> > > > The interesting feature is that it allows to set a priority for each
> > > > process container, but AFAIK it doesn't allow to "partition" the
> > > > bandwidth between different containers (that would be a nice feature
> > > > IMHO). For example it would be great to be able to define per-container
> > > > limits, like assign 10MB/s for processes in container A, 30MB/s to
> > > > container B, 20MB/s to container C, etc.
> > >
> > > Has anybody considered allocating based on *seeks* rather than bytes moved,
> > > or counting seeks as "virtual bytes" for the purposes of accounting (if the
> > > disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
> > > of data)?
> >
> > I was considering a time scheduler, you can fill your time slot with
> > seeks or data, it might be what CFQ does, but I've never even read the
> > code.
> >
>
> So far the definition of I/O bandwidth has been w.r.t time. Not all IO
> devices have sectors; I'd prefer bytes over a period of time.

Doing a time based one would only require knowing the (avg) delay of
seeks, whereas doing a bytes based one would also require knowing the
(avg) speed of the device.

That is, if you're also interested in providing a latency guarantee.
Because that'd force you to convert bytes to time again.

I'm not sure thats a good way to go with as long as a majority of
devices still have a non-0 seek penalty (SSDs just aren't there yet for
most of us).

2008-01-12 18:01:37

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Peter Zijlstra wrote:
> On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote:
>> * Peter Zijlstra <[email protected]> [2008-01-12 10:46:37]:
>>
>>> On Fri, 2008-01-11 at 23:57 -0500, [email protected] wrote:
>>>> On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:
>>>>
>>>>> The interesting feature is that it allows to set a priority for each
>>>>> process container, but AFAIK it doesn't allow to "partition" the
>>>>> bandwidth between different containers (that would be a nice feature
>>>>> IMHO). For example it would be great to be able to define per-container
>>>>> limits, like assign 10MB/s for processes in container A, 30MB/s to
>>>>> container B, 20MB/s to container C, etc.
>>>> Has anybody considered allocating based on *seeks* rather than bytes moved,
>>>> or counting seeks as "virtual bytes" for the purposes of accounting (if the
>>>> disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
>>>> of data)?
>>> I was considering a time scheduler, you can fill your time slot with
>>> seeks or data, it might be what CFQ does, but I've never even read the
>>> code.
>>>
>> So far the definition of I/O bandwidth has been w.r.t time. Not all IO
>> devices have sectors; I'd prefer bytes over a period of time.
>
> Doing a time based one would only require knowing the (avg) delay of
> seeks, whereas doing a bytes based one would also require knowing the
> (avg) speed of the device.
>
> That is, if you're also interested in providing a latency guarantee.
> Because that'd force you to convert bytes to time again.

So, what about considering both bytes/sec and io-operations/sec? In this
way we should be able to limit huge streams of data and seek storms (or
any mix of them).

Regarding CFQ, AFAIK it's only possible to configure an I/O priorty for
a process, but there's no way for example to limit the bandwidth (or I/O
operations/sec) for a particular user or group.

-Andrea

2008-01-13 04:46:40

by Balbir Singh

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

* Andrea Righi <[email protected]> [2008-01-12 19:01:14]:

> Peter Zijlstra wrote:
> > On Sat, 2008-01-12 at 16:27 +0530, Balbir Singh wrote:
> >> * Peter Zijlstra <[email protected]> [2008-01-12 10:46:37]:
> >>
> >>> On Fri, 2008-01-11 at 23:57 -0500, [email protected] wrote:
> >>>> On Fri, 11 Jan 2008 17:32:49 +0100, Andrea Righi said:
> >>>>
> >>>>> The interesting feature is that it allows to set a priority for each
> >>>>> process container, but AFAIK it doesn't allow to "partition" the
> >>>>> bandwidth between different containers (that would be a nice feature
> >>>>> IMHO). For example it would be great to be able to define per-container
> >>>>> limits, like assign 10MB/s for processes in container A, 30MB/s to
> >>>>> container B, 20MB/s to container C, etc.
> >>>> Has anybody considered allocating based on *seeks* rather than bytes moved,
> >>>> or counting seeks as "virtual bytes" for the purposes of accounting (if the
> >>>> disk can do 50mbytes/sec, and a seek takes 5millisecs, then count it as 100K
> >>>> of data)?
> >>> I was considering a time scheduler, you can fill your time slot with
> >>> seeks or data, it might be what CFQ does, but I've never even read the
> >>> code.
> >>>
> >> So far the definition of I/O bandwidth has been w.r.t time. Not all IO
> >> devices have sectors; I'd prefer bytes over a period of time.
> >
> > Doing a time based one would only require knowing the (avg) delay of
> > seeks, whereas doing a bytes based one would also require knowing the
> > (avg) speed of the device.
> >
> > That is, if you're also interested in providing a latency guarantee.
> > Because that'd force you to convert bytes to time again.
>
> So, what about considering both bytes/sec and io-operations/sec? In this
> way we should be able to limit huge streams of data and seek storms (or
> any mix of them).
>
> Regarding CFQ, AFAIK it's only possible to configure an I/O priorty for
> a process, but there's no way for example to limit the bandwidth (or I/O
> operations/sec) for a particular user or group.
>

Limiting usage is also a very useful feature. Andrea could you please
port your patches over to control groups.

--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL

2008-01-15 16:50:08

by Andrea Righi

[permalink] [raw]
Subject: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing
additional delays on those processes that exceed the limits defined in a
configfs tree.

Examples:

Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s:

root@linux:/config/io-throttle# mkdir uid:33
root@linux:/config/io-throttle# cd uid:33/
root@linux:/config/io-throttle/uid:33# cat io-rate
io-rate: 0 KiB/sec
requested: 0 KiB
last_request: 0 jiffies
delta: 388202 jiffies
root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate
root@linux:/config/io-throttle/uid:33# cat io-rate
io-rate: 4096 KiB/sec
requested: 0 KiB
last_request: 389271 jiffies
delta: 91 jiffies

Limit the I/O bandwidth of group backup (GID 34) to 512KB/s:

root@linux:/config/io-throttle# mkdir gid:34
root@linux:/config/io-throttle# cd gid:34/
root@linux:/config/io-throttle/gid:34# cat io-rate
io-rate: 0 KiB/sec
requested: 0 KiB
last_request: 0 jiffies
delta: 403160 jiffies
root@linux:/config/io-throttle/gid:34# echo 512 > io-rate
root@linux:/config/io-throttle/gid:34# cat io-rate
io-rate: 512 KiB/sec
requested: 0 KiB
last_request: 403618 jiffies
delta: 80 jiffies

Remove the I/O limit for user www-data:

root@linux:/config/io-throttle# echo 0 > uid:33/io-rate
root@linux:/config/io-throttle# cat uid:33/io-rate
io-rate: 0 KiB/sec
requested: 0 KiB
last_request: 419009 jiffies
delta: 568 jiffies

or:

root@linux:/config/io-throttle# rmdir uid:33

Future improvements:
* allow to limit also I/O operations per second (instead of KB/s only)
* extend grouping criteria (allow to define rules based on process containers,
process command, etc.)

Signed-off-by: Andrea Righi <[email protected]>
---

diff -urpN linux-2.6.24-rc7/block/io-throttle.c linux-2.6.24-rc7-io-throttle/block/io-throttle.c
--- linux-2.6.24-rc7/block/io-throttle.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc7-io-throttle/block/io-throttle.c 2008-01-15 17:25:06.000000000 +0100
@@ -0,0 +1,282 @@
+/*
+ * io-throttle.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Copyright (C) 2008 Andrea Righi <[email protected]>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/io-throttle.h>
+
+/* This should work for any reasonable size of uid_t */
+#define MAXUIDCHAR (sizeof(uid_t) * 5 / 2)
+
+/*
+ * Basic structure that identifies a single I/O throttling rule
+ */
+struct iothrottle {
+ struct config_item item;
+ unsigned long iorate;
+ unsigned long req;
+ unsigned long last_request;
+};
+
+/* Get an I/O-throttling item from a configfs item */
+static inline struct iothrottle *to_iothrottle(struct config_item *item)
+{
+ return item ? container_of(item, struct iothrottle, item) : NULL;
+}
+
+static ssize_t iothrottle_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page);
+static ssize_t iothrottle_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count);
+static struct config_item *iothrottle_make_item(struct config_group *group,
+ const char *name);
+static void iothrottle_release(struct config_item *item);
+
+/* I/O throttling item in configfs (identify a single I/O throttling rule) */
+static struct configfs_attribute iothrottle_attr_iorate = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "io-rate",
+ .ca_mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH,
+};
+
+/* I/O throttling element in configfs */
+static struct configfs_group_operations iothrottle_group_ops = {
+ .make_item = iothrottle_make_item,
+};
+
+static struct config_item_type iothrottle_group_type = {
+ .ct_group_ops = &iothrottle_group_ops,
+};
+
+/* Entire I/O throttling subsystem under configfs (/config/io-throttle) */
+struct configfs_subsystem iothrottle_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "io-throttle",
+ .ci_type = &iothrottle_group_type,
+ },
+ },
+};
+
+/* List of configfs elements per group */
+static struct configfs_attribute *iothrottle_attrs[] = {
+ &iothrottle_attr_iorate,
+ NULL,
+};
+
+static struct configfs_item_operations iothrottle_item_ops = {
+ .release = iothrottle_release,
+ .show_attribute = iothrottle_attr_show,
+ .store_attribute = iothrottle_attr_store,
+};
+
+static struct config_item_type iothrottle_type = {
+ .ct_item_ops = &iothrottle_item_ops,
+ .ct_attrs = iothrottle_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+/* Print a I/O throttling rule details */
+static ssize_t iothrottle_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ ssize_t count;
+ unsigned long delta;
+ struct iothrottle *iot = to_iothrottle(item);
+
+ delta = (long)jiffies - (long)iot->last_request;
+ /* Print additional debugging stuff */
+ count = sprintf(page, " io-rate: %lu KiB/sec\n"
+ " requested: %lu KiB\n"
+ "last_request: %lu jiffies\n"
+ " delta: %lu jiffies\n",
+ iot->iorate, iot->req << 1, iot->last_request, delta);
+
+ return count;
+}
+
+/* Configure the attributes of an I/O throttling rule */
+static ssize_t iothrottle_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count)
+{
+ struct iothrottle *iot = to_iothrottle(item);
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ iot->iorate = tmp;
+ iot->req = 0;
+ iot->last_request = jiffies;
+
+ return count;
+}
+
+/* Register a new I/O throttling rule */
+static struct config_item *iothrottle_make_item(struct config_group *group,
+ const char *name)
+{
+ struct iothrottle *iot;
+
+ iot = kzalloc(sizeof(*iot), GFP_KERNEL);
+ if (unlikely(!iot))
+ return NULL;
+
+ iot->last_request = jiffies;
+
+ config_item_init_type_name(&iot->item, name, &iothrottle_type);
+
+ return &iot->item;
+}
+
+/* Unregister an I/O throttling rule */
+static void iothrottle_release(struct config_item *item)
+{
+ kfree(to_iothrottle(item));
+}
+
+/* Get the opportune "struct iothrottle" item from configfs if present */
+static inline struct iothrottle *iothrottle_get_config(void)
+{
+ char idstr[MAXUIDCHAR + 1];
+ struct iothrottle *p = NULL;
+
+ memset(idstr, 0, sizeof(idstr));
+
+ /* UID-based rule */
+ snprintf(idstr, MAXUIDCHAR, "uid:%u", current->uid);
+ p = to_iothrottle(
+ config_group_find_item(&iothrottle_subsys.su_group, idstr));
+ if (p)
+ goto out_get_config;
+
+ /* GID-based rule */
+ snprintf(idstr, MAXUIDCHAR, "gid:%u", current->gid);
+ p = to_iothrottle(
+ config_group_find_item(&iothrottle_subsys.su_group, idstr));
+
+out_get_config:
+ return p;
+}
+
+/*
+ * Here is the main function of the I/O throttling mechanism.
+ *
+ * FIXME: potential race if the struct iothrottle *iot item is removed/modified
+ * in the middle of the io_throttle() execution.
+ */
+void io_throttle(int nr_sectors)
+{
+ struct iothrottle *iot;
+ unsigned long delta, n;
+ long sleep;
+
+ iot = iothrottle_get_config();
+ if (!iot || !iot->iorate)
+ return;
+
+ /*
+ * The concept is the following: evaluate the actual I/O rate of a
+ * process, looking at the sectors requested over the time elapsed from
+ * the last request. If the actual I/O rate is beyond the maximum
+ * allowed I/O rate then sleep the current task for the correct amount
+ * of time, in order to reduce the actual I/O rate under the allowed
+ * limit.
+ *
+ * The time to sleep is evaluated as:
+ *
+ * sleep = (sectors_requested / allowed_iorate) - time_elapsed
+ */
+ delta = (long)jiffies - (long)iot->last_request;
+ n = iot->req += nr_sectors;
+ do_div(n, iot->iorate);
+
+ /*
+ * Unable to evaluate delta (due to a too small interval of time
+ * between two requests) or n (due to a too small request).
+ *
+ * Account the requested sectors in iot->req and sum them to the
+ * sectors of the next request.
+ */
+ if (!delta || !n)
+ return;
+
+ /*
+ * Convert n in jiffies (remember that iot->iorate is in KB/s and we
+ * need to convert it in sectors/jiffies)
+ */
+ sleep = msecs_to_jiffies(n * 1000 / 2) - delta;
+ if (sleep > 0) {
+ printk(KERN_DEBUG
+ "io-throttle: process %i (%s) must sleep %lu jiffies\n",
+ task_pid_nr(current), current->comm, sleep);
+ schedule_timeout_uninterruptible(sleep);
+ }
+
+ /* OK, we stay under the limits. Reset statistics. */
+ iot->req = 0;
+ iot->last_request = jiffies;
+}
+EXPORT_SYMBOL(io_throttle);
+
+/* Register I/O throttling subsystem */
+static int __init iothrottle_init(void)
+{
+ int ret;
+
+ config_group_init(&iothrottle_subsys.su_group);
+ mutex_init(&iothrottle_subsys.su_mutex);
+ ret = configfs_register_subsystem(&iothrottle_subsys);
+ if (ret) {
+ printk(KERN_ERR "%s: error %d while registering subsystem\n",
+ iothrottle_subsys.su_group.cg_item.ci_namebuf,
+ ret);
+ goto out_unregister;
+ }
+
+ return 0;
+
+out_unregister:
+ configfs_unregister_subsystem(&iothrottle_subsys);
+
+ return ret;
+}
+
+/* Unregister I/O throttling subsystem */
+static void __exit iothrottle_exit(void)
+{
+ configfs_unregister_subsystem(&iothrottle_subsys);
+ mutex_destroy(&iothrottle_subsys.su_mutex);
+}
+
+module_init(iothrottle_init);
+module_exit(iothrottle_exit);
+MODULE_LICENSE("GPL");
diff -urpN linux-2.6.24-rc7/block/Kconfig linux-2.6.24-rc7-io-throttle/block/Kconfig
--- linux-2.6.24-rc7/block/Kconfig 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-io-throttle/block/Kconfig 2008-01-15 15:30:21.000000000 +0100
@@ -40,6 +40,16 @@ config BLK_DEV_IO_TRACE

git://brick.kernel.dk/data/git/blktrace.git

+config IO_THROTTLE
+ tristate "Enable I/O throttling (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+ select CONFIGFS_FS
+ help
+ This allows to limit the maximum I/O rate for specific UID(s) or
+ GID(s).
+
+ Say N if unsure.
+
config LSF
bool "Support for Large Single Files"
depends on !64BIT
diff -urpN linux-2.6.24-rc7/block/ll_rw_blk.c linux-2.6.24-rc7-io-throttle/block/ll_rw_blk.c
--- linux-2.6.24-rc7/block/ll_rw_blk.c 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-io-throttle/block/ll_rw_blk.c 2008-01-15 13:59:21.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
#include <linux/scatterlist.h>
+#include <linux/io-throttle.h>

/*
* for max sense size
@@ -3221,6 +3222,8 @@ static inline void __generic_make_reques
if (bio_check_eod(bio, nr_sectors))
goto end_io;

+ io_throttle(nr_sectors);
+
/*
* Resolve the mapping until finished. (drivers are
* still free to implement/resolve their own stacking
diff -urpN linux-2.6.24-rc7/block/Makefile linux-2.6.24-rc7-io-throttle/block/Makefile
--- linux-2.6.24-rc7/block/Makefile 2008-01-06 22:45:38.000000000 +0100
+++ linux-2.6.24-rc7-io-throttle/block/Makefile 2008-01-15 13:59:21.000000000 +0100
@@ -12,3 +12,5 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched

obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
+
+obj-$(CONFIG_IO_THROTTLE) += io-throttle.o
diff -urpN linux-2.6.24-rc7/include/linux/io-throttle.h linux-2.6.24-rc7-io-throttle/include/linux/io-throttle.h
--- linux-2.6.24-rc7/include/linux/io-throttle.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc7-io-throttle/include/linux/io-throttle.h 2008-01-15 13:59:21.000000000 +0100
@@ -0,0 +1,10 @@
+#ifndef IO_THROTTLE_H
+#define IO_THROTTLE_H
+
+#ifdef CONFIG_IO_THROTTLE
+extern void io_throttle(int nr_sectors);
+#else
+static inline void io_throttle(int nr_sectors) { }
+#endif /* CONFIG_IO_THROTTLE */
+
+#endif

2008-01-16 10:46:00

by Balbir Singh

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

* Andrea Righi <[email protected]> [2008-01-15 17:49:36]:

> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing
> additional delays on those processes that exceed the limits defined in a
> configfs tree.
>
> Examples:
>
> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s:
>
> root@linux:/config/io-throttle# mkdir uid:33
> root@linux:/config/io-throttle# cd uid:33/
> root@linux:/config/io-throttle/uid:33# cat io-rate
> io-rate: 0 KiB/sec
> requested: 0 KiB
> last_request: 0 jiffies
> delta: 388202 jiffies
> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate
> root@linux:/config/io-throttle/uid:33# cat io-rate
> io-rate: 4096 KiB/sec
> requested: 0 KiB
> last_request: 389271 jiffies
> delta: 91 jiffies
>
> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s:
>
> root@linux:/config/io-throttle# mkdir gid:34
> root@linux:/config/io-throttle# cd gid:34/
> root@linux:/config/io-throttle/gid:34# cat io-rate
> io-rate: 0 KiB/sec
> requested: 0 KiB
> last_request: 0 jiffies
> delta: 403160 jiffies
> root@linux:/config/io-throttle/gid:34# echo 512 > io-rate
> root@linux:/config/io-throttle/gid:34# cat io-rate
> io-rate: 512 KiB/sec
> requested: 0 KiB
> last_request: 403618 jiffies
> delta: 80 jiffies
>
> Remove the I/O limit for user www-data:
>
> root@linux:/config/io-throttle# echo 0 > uid:33/io-rate
> root@linux:/config/io-throttle# cat uid:33/io-rate
> io-rate: 0 KiB/sec
> requested: 0 KiB
> last_request: 419009 jiffies
> delta: 568 jiffies
>
> or:
>
> root@linux:/config/io-throttle# rmdir uid:33
>
> Future improvements:
> * allow to limit also I/O operations per second (instead of KB/s only)
> * extend grouping criteria (allow to define rules based on process containers,
> process command, etc.)
>
> Signed-off-by: Andrea Righi <[email protected]>

Hi, Andrea,

Thanks for doing this. I am going to review the patches in greater
detail and also test them. Why do you use configfs when we have a
control group filesystem available for grouping tasks and providing a
file system based interface for control and accounting?

--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL

2008-01-16 11:31:53

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

On Wed, 16 Jan 2008 16:15:41 +0530, Balbir Singh said:

> Thanks for doing this. I am going to review the patches in greater
> detail and also test them. Why do you use configfs when we have a
> control group filesystem available for grouping tasks and providing a
> file system based interface for control and accounting?

And here I thought "There's more than one way to do it" was the Perl slogan. :)

An equally valid question would be: "Why are we carrying around a control
group filesystem when we have configfs?" (Honestly, I didn't know we *were*
carrying around such a filesystem - and quite likely Andrea Righi didn't
either...)


Attachments:
(No filename) (226.00 B)

2008-01-16 12:06:00

by Balbir Singh

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

* [email protected] <[email protected]> [2008-01-16 06:30:31]:

> On Wed, 16 Jan 2008 16:15:41 +0530, Balbir Singh said:
>
> > Thanks for doing this. I am going to review the patches in greater
> > detail and also test them. Why do you use configfs when we have a
> > control group filesystem available for grouping tasks and providing a
> > file system based interface for control and accounting?
>
> And here I thought "There's more than one way to do it" was the Perl slogan. :)
>

Yes, there are several ways to do it, but the discussion over the last
year or so has been centered around control groups. We've discussed
all approaches on lkml on there was consensus on using control groups.
Please read the lkml archives for the discussion details.


> An equally valid question would be: "Why are we carrying around a control
> group filesystem when we have configfs?" (Honestly, I didn't know we *were*
> carrying around such a filesystem - and quite likely Andrea Righi didn't
> either...)

Control groups is derived from cpusets and for those interested in
grouping tasks for control, is the preferred method of providing
control.


--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL

2008-01-16 12:24:43

by Valdis Klētnieks

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

On Wed, 16 Jan 2008 17:35:33 +0530, Balbir Singh said:

> Control groups is derived from cpusets and for those interested in
> grouping tasks for control, is the preferred method of providing
> control.

Ahh, that's why I didn't notice it - "cpusets" didn't seem to do much for the 1
and 2 CPU systems I usually deal with so I filtered it out (including the parts
where it started becoming relevant to things I do). Often, the right tool for
a job is something you've never heard of because it originated in some other
specialized area...


Attachments:
(No filename) (226.00 B)

2008-01-16 12:58:28

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

Balbir Singh wrote:
> * Andrea Righi <[email protected]> [2008-01-15 17:49:36]:
>
>> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing
>> additional delays on those processes that exceed the limits defined in a
>> configfs tree.
>>
>> Examples:
>>
>> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s:
>>
>> root@linux:/config/io-throttle# mkdir uid:33
>> root@linux:/config/io-throttle# cd uid:33/
>> root@linux:/config/io-throttle/uid:33# cat io-rate
>> io-rate: 0 KiB/sec
>> requested: 0 KiB
>> last_request: 0 jiffies
>> delta: 388202 jiffies
>> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate
>> root@linux:/config/io-throttle/uid:33# cat io-rate
>> io-rate: 4096 KiB/sec
>> requested: 0 KiB
>> last_request: 389271 jiffies
>> delta: 91 jiffies
>>
>> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s:
>>
>> root@linux:/config/io-throttle# mkdir gid:34
>> root@linux:/config/io-throttle# cd gid:34/
>> root@linux:/config/io-throttle/gid:34# cat io-rate
>> io-rate: 0 KiB/sec
>> requested: 0 KiB
>> last_request: 0 jiffies
>> delta: 403160 jiffies
>> root@linux:/config/io-throttle/gid:34# echo 512 > io-rate
>> root@linux:/config/io-throttle/gid:34# cat io-rate
>> io-rate: 512 KiB/sec
>> requested: 0 KiB
>> last_request: 403618 jiffies
>> delta: 80 jiffies
>>
>> Remove the I/O limit for user www-data:
>>
>> root@linux:/config/io-throttle# echo 0 > uid:33/io-rate
>> root@linux:/config/io-throttle# cat uid:33/io-rate
>> io-rate: 0 KiB/sec
>> requested: 0 KiB
>> last_request: 419009 jiffies
>> delta: 568 jiffies
>>
>> or:
>>
>> root@linux:/config/io-throttle# rmdir uid:33
>>
>> Future improvements:
>> * allow to limit also I/O operations per second (instead of KB/s only)
>> * extend grouping criteria (allow to define rules based on process containers,
>> process command, etc.)
>>
>> Signed-off-by: Andrea Righi <[email protected]>
>
> Hi, Andrea,
>
> Thanks for doing this. I am going to review the patches in greater
> detail and also test them. Why do you use configfs when we have a
> control group filesystem available for grouping tasks and providing a
> file system based interface for control and accounting?
>

Well... I didn't choose configfs for a technical reason, but simply
because I'm more familiar with it, respect to the other equivalent ways
to implement this. But I'll try to look also at the control group
approach, I don't know in details all the advantages/disadvantages, but
it seems interesting anyway.

-Andrea

2008-01-16 19:21:17

by David Newall

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-task I/O throttling

Andrea Righi wrote:
> David Newall wrote:
>
>> Andrea Righi wrote:
>>
>>> [I/O-intensive] processes can noticeably impact the system responsiveness
>>> for some time and playing with tasks' priority is not always an
>>> acceptable solution.
>>>
>>>
>> Why?
>>
>>
>
> Well, I mean, we can't use 'nice' to grant less priority for the I/O
> intensive app, because the I/O intensive app itself doesn't need a lot
> of CPU. Instead, the I/O-bound app eats all the available I/O bandwidth,
> that's a different issue.

That's what I was thinking. Your original, "not always an acceptable
solution," made me wonder if you were referring to something obscure.

2008-01-23 12:48:59

by Pavel Machek

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

On Tue 2008-01-15 17:49:36, Andrea Righi wrote:
> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing
> additional delays on those processes that exceed the limits defined in a
> configfs tree.
>
> Examples:
>
> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s:
>
> root@linux:/config/io-throttle# mkdir uid:33
> root@linux:/config/io-throttle# cd uid:33/
> root@linux:/config/io-throttle/uid:33# cat io-rate
> io-rate: 0 KiB/sec
> requested: 0 KiB
> last_request: 0 jiffies
> delta: 388202 jiffies
> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate
> root@linux:/config/io-throttle/uid:33# cat io-rate
> io-rate: 4096 KiB/sec
> requested: 0 KiB
> last_request: 389271 jiffies
> delta: 91 jiffies
>
> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s:

Maybe ionice from cfq should be improved, instead?
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

2008-01-23 15:42:04

by Andrea Righi

[permalink] [raw]
Subject: Re: [RFC][PATCH] per-uid/gid I/O throttling (was Re: [RFC][PATCH] per-task I/O throttling)

Pavel Machek wrote:
> On Tue 2008-01-15 17:49:36, Andrea Righi wrote:
>> Allow to limit the I/O bandwidth for specific uid(s) or gid(s) imposing
>> additional delays on those processes that exceed the limits defined in a
>> configfs tree.
>>
>> Examples:
>>
>> Limit the I/O bandwidth for user www-data (UID 33) to 4MB/s:
>>
>> root@linux:/config/io-throttle# mkdir uid:33
>> root@linux:/config/io-throttle# cd uid:33/
>> root@linux:/config/io-throttle/uid:33# cat io-rate
>> io-rate: 0 KiB/sec
>> requested: 0 KiB
>> last_request: 0 jiffies
>> delta: 388202 jiffies
>> root@linux:/config/io-throttle/uid:33# echo 4096 > io-rate
>> root@linux:/config/io-throttle/uid:33# cat io-rate
>> io-rate: 4096 KiB/sec
>> requested: 0 KiB
>> last_request: 389271 jiffies
>> delta: 91 jiffies
>>
>> Limit the I/O bandwidth of group backup (GID 34) to 512KB/s:
>
> Maybe ionice from cfq should be improved, instead?

IMHO it would be interesting to have also a way to use the limiting
approach, instead of i/o priority-based only (i.e. checks to ensure that
servicing the requests will not cause the associated user's maximum
quality of service to be exceeded).

see also http://lkml.org/lkml/2008/1/20/157

-Andrea