2007-11-25 16:18:24

by Adrian Bunk

[permalink] [raw]
Subject: [2.6 patch] make I/O schedulers non-modular

There isn't any big advantage and doesn't seem to be much usage of
modular schedulers.

OTOH, the overhead made the kernel image of an x86 defconfig (that
doesn't use modular schedulers) bigger by nearly 2 kB.

Signed-off-by: Adrian Bunk <[email protected]>

---

block/Kconfig.iosched | 12 ++++----
block/as-iosched.c | 32 -----------------------
block/cfq-iosched.c | 29 --------------------
block/deadline-iosched.c | 10 -------
block/elevator.c | 54 +--------------------------------------
block/ll_rw_blk.c | 8 -----
block/noop-iosched.c | 11 -------
include/linux/elevator.h | 26 ------------------
kernel/sched.c | 1
9 files changed, 8 insertions(+), 175 deletions(-)

424b43bdb56389a6dd2f6bd5c3c6d519ff3ffe2d
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index 7e803fc..c720bee 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -13,7 +13,7 @@ config IOSCHED_NOOP
the kernel.

config IOSCHED_AS
- tristate "Anticipatory I/O scheduler"
+ bool "Anticipatory I/O scheduler"
default y
---help---
The anticipatory I/O scheduler is generally a good choice for most
@@ -22,7 +22,7 @@ config IOSCHED_AS
especially some database loads.

config IOSCHED_DEADLINE
- tristate "Deadline I/O scheduler"
+ bool "Deadline I/O scheduler"
default y
---help---
The deadline I/O scheduler is simple and compact, and is often as
@@ -32,7 +32,7 @@ config IOSCHED_DEADLINE
anticipatory I/O scheduler and so is a good choice.

config IOSCHED_CFQ
- tristate "CFQ I/O scheduler"
+ bool "CFQ I/O scheduler"
default y
---help---
The CFQ I/O scheduler tries to distribute bandwidth equally
@@ -48,13 +48,13 @@ choice
block devices.

config DEFAULT_AS
- bool "Anticipatory" if IOSCHED_AS=y
+ bool "Anticipatory" if IOSCHED_AS

config DEFAULT_DEADLINE
- bool "Deadline" if IOSCHED_DEADLINE=y
+ bool "Deadline" if IOSCHED_DEADLINE

config DEFAULT_CFQ
- bool "CFQ" if IOSCHED_CFQ=y
+ bool "CFQ" if IOSCHED_CFQ

config DEFAULT_NOOP
bool "No-op"
diff --git a/block/as-iosched.c b/block/as-iosched.c
index dc715a5..2ebf12e 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -149,9 +149,6 @@ enum arq_state {
#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2)
#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state)

-static DEFINE_PER_CPU(unsigned long, ioc_count);
-static struct completion *ioc_gone;
-
static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
static void as_antic_stop(struct as_data *ad);

@@ -163,16 +160,6 @@ static void as_antic_stop(struct as_data *ad);
static void free_as_io_context(struct as_io_context *aic)
{
kfree(aic);
- elv_ioc_count_dec(ioc_count);
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
-}
-
-static void as_trim(struct io_context *ioc)
-{
- if (ioc->aic)
- free_as_io_context(ioc->aic);
- ioc->aic = NULL;
}

/* Called when the task exits */
@@ -200,7 +187,6 @@ static struct as_io_context *alloc_as_io_context(void)
ret->seek_total = 0;
ret->seek_samples = 0;
ret->seek_mean = 0;
- elv_ioc_count_inc(ioc_count);
}

return ret;
@@ -1453,12 +1439,10 @@ static struct elevator_type iosched_as = {
.elevator_may_queue_fn = as_may_queue,
.elevator_init_fn = as_init_queue,
.elevator_exit_fn = as_exit_queue,
- .trim = as_trim,
},

.elevator_attrs = as_attrs,
.elevator_name = "anticipatory",
- .elevator_owner = THIS_MODULE,
};

static int __init as_init(void)
@@ -1466,21 +1450,5 @@ static int __init as_init(void)
return elv_register(&iosched_as);
}

-static void __exit as_exit(void)
-{
- DECLARE_COMPLETION_ONSTACK(all_gone);
- elv_unregister(&iosched_as);
- ioc_gone = &all_gone;
- /* ioc_gone's update must be visible before reading ioc_count */
- smp_wmb();
- if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
- synchronize_rcu();
-}
-
module_init(as_init);
-module_exit(as_exit);

-MODULE_AUTHOR("Nick Piggin");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("anticipatory IO scheduler");
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 0b4a479..72f6f8a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -43,9 +43,6 @@ static int cfq_slice_idle = HZ / 125;
static struct kmem_cache *cfq_pool;
static struct kmem_cache *cfq_ioc_pool;

-static DEFINE_PER_CPU(unsigned long, ioc_count);
-static struct completion *ioc_gone;
-
#define CFQ_PRIO_LISTS IOPRIO_BE_NR
#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
@@ -1184,11 +1181,6 @@ static void cfq_free_io_context(struct io_context *ioc)
kmem_cache_free(cfq_ioc_pool, __cic);
freed++;
}
-
- elv_ioc_count_mod(ioc_count, -freed);
-
- if (ioc_gone && !elv_ioc_count_read(ioc_count))
- complete(ioc_gone);
}

static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
@@ -1267,7 +1259,6 @@ cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
INIT_LIST_HEAD(&cic->queue_list);
cic->dtor = cfq_free_io_context;
cic->exit = cfq_exit_io_context;
- elv_ioc_count_inc(ioc_count);
}

return cic;
@@ -1483,7 +1474,6 @@ cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic)

rb_erase(&cic->rb_node, &ioc->cic_root);
kmem_cache_free(cfq_ioc_pool, cic);
- elv_ioc_count_dec(ioc_count);
}

static struct cfq_io_context *
@@ -2270,11 +2260,9 @@ static struct elevator_type iosched_cfq = {
.elevator_may_queue_fn = cfq_may_queue,
.elevator_init_fn = cfq_init_queue,
.elevator_exit_fn = cfq_exit_queue,
- .trim = cfq_free_io_context,
},
.elevator_attrs = cfq_attrs,
.elevator_name = "cfq",
- .elevator_owner = THIS_MODULE,
};

static int __init cfq_init(void)
@@ -2299,22 +2287,5 @@ static int __init cfq_init(void)
return ret;
}

-static void __exit cfq_exit(void)
-{
- DECLARE_COMPLETION_ONSTACK(all_gone);
- elv_unregister(&iosched_cfq);
- ioc_gone = &all_gone;
- /* ioc_gone's update must be visible before reading ioc_count */
- smp_wmb();
- if (elv_ioc_count_read(ioc_count))
- wait_for_completion(ioc_gone);
- synchronize_rcu();
- cfq_slab_kill();
-}
-
module_init(cfq_init);
-module_exit(cfq_exit);

-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Completely Fair Queueing IO scheduler");
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index a054eef..7538aa0 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -462,7 +462,6 @@ static struct elevator_type iosched_deadline = {

.elevator_attrs = deadline_attrs,
.elevator_name = "deadline",
- .elevator_owner = THIS_MODULE,
};

static int __init deadline_init(void)
@@ -470,14 +469,5 @@ static int __init deadline_init(void)
return elv_register(&iosched_deadline);
}

-static void __exit deadline_exit(void)
-{
- elv_unregister(&iosched_deadline);
-}
-
module_init(deadline_init);
-module_exit(deadline_exit);

-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("deadline IO scheduler");
diff --git a/block/elevator.c b/block/elevator.c
index 446aea2..77c4ed0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -90,7 +90,6 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)

return 1;
}
-EXPORT_SYMBOL(elv_rq_merge_ok);

static inline int elv_try_merge(struct request *__rq, struct bio *bio)
{
@@ -121,11 +120,6 @@ static struct elevator_type *elevator_find(const char *name)
return NULL;
}

-static void elevator_put(struct elevator_type *e)
-{
- module_put(e->elevator_owner);
-}
-
static struct elevator_type *elevator_get(const char *name)
{
struct elevator_type *e;
@@ -133,8 +127,6 @@ static struct elevator_type *elevator_get(const char *name)
spin_lock(&elv_list_lock);

e = elevator_find(name);
- if (e && !try_module_get(e->elevator_owner))
- e = NULL;

spin_unlock(&elv_list_lock);

@@ -201,7 +193,6 @@ static elevator_t *elevator_alloc(struct request_queue *q,
return eq;
err:
kfree(eq);
- elevator_put(e);
return NULL;
}

@@ -209,7 +200,6 @@ static void elevator_release(struct kobject *kobj)
{
elevator_t *e = container_of(kobj, elevator_t, kobj);

- elevator_put(e->elevator_type);
kfree(e->hash);
kfree(e);
}
@@ -356,8 +346,6 @@ struct request *elv_rb_add(struct rb_root *root, struct request *rq)
return NULL;
}

-EXPORT_SYMBOL(elv_rb_add);
-
void elv_rb_del(struct rb_root *root, struct request *rq)
{
BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
@@ -365,8 +353,6 @@ void elv_rb_del(struct rb_root *root, struct request *rq)
RB_CLEAR_NODE(&rq->rb_node);
}

-EXPORT_SYMBOL(elv_rb_del);
-
struct request *elv_rb_find(struct rb_root *root, sector_t sector)
{
struct rb_node *n = root->rb_node;
@@ -386,8 +372,6 @@ struct request *elv_rb_find(struct rb_root *root, sector_t sector)
return NULL;
}

-EXPORT_SYMBOL(elv_rb_find);
-
/*
* Insert rq into dispatch queue of q. Queue lock must be held on
* entry. rq is sort instead into the dispatch queue. To be used by
@@ -428,8 +412,6 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
list_add(&rq->queuelist, entry);
}

-EXPORT_SYMBOL(elv_dispatch_sort);
-
/*
* Insert rq into dispatch queue of q. Queue lock must be held on
* entry. rq is added to the back of the dispatch queue. To be used by
@@ -449,8 +431,6 @@ void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
list_add_tail(&rq->queuelist, &q->queue_head);
}

-EXPORT_SYMBOL(elv_dispatch_add_tail);
-
int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
{
elevator_t *e = q->elevator;
@@ -960,7 +940,7 @@ void elv_unregister_queue(struct request_queue *q)
__elv_unregister_queue(q->elevator);
}

-int elv_register(struct elevator_type *e)
+int __init elv_register(struct elevator_type *e)
{
char *def = "";

@@ -977,31 +957,6 @@ int elv_register(struct elevator_type *e)
printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name, def);
return 0;
}
-EXPORT_SYMBOL_GPL(elv_register);
-
-void elv_unregister(struct elevator_type *e)
-{
- struct task_struct *g, *p;
-
- /*
- * Iterate every thread in the process to remove the io contexts.
- */
- if (e->ops.trim) {
- read_lock(&tasklist_lock);
- do_each_thread(g, p) {
- task_lock(p);
- if (p->io_context)
- e->ops.trim(p->io_context);
- task_unlock(p);
- } while_each_thread(g, p);
- read_unlock(&tasklist_lock);
- }
-
- spin_lock(&elv_list_lock);
- list_del_init(&e->list);
- spin_unlock(&elv_list_lock);
-}
-EXPORT_SYMBOL_GPL(elv_unregister);

/*
* switch to new_e io scheduler. be careful not to introduce deadlocks -
@@ -1101,10 +1056,8 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
return -EINVAL;
}

- if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name)) {
- elevator_put(e);
+ if (!strcmp(elevator_name, q->elevator->elevator_type->elevator_name))
return count;
- }

if (!elevator_switch(q, e))
printk(KERN_ERR "elevator: switch to %s failed\n",elevator_name);
@@ -1142,8 +1095,6 @@ struct request *elv_rb_former_request(struct request_queue *q,
return NULL;
}

-EXPORT_SYMBOL(elv_rb_former_request);
-
struct request *elv_rb_latter_request(struct request_queue *q,
struct request *rq)
{
@@ -1155,4 +1106,3 @@ struct request *elv_rb_latter_request(struct request_queue *q,
return NULL;
}

-EXPORT_SYMBOL(elv_rb_latter_request);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 3b927be..5f74942 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2264,7 +2264,6 @@ void blk_start_queueing(struct request_queue *q)
else
__generic_unplug_device(q);
}
-EXPORT_SYMBOL(blk_start_queueing);

/**
* blk_requeue_request - put a request back on queue
@@ -3794,13 +3793,10 @@ int kblockd_schedule_work(struct work_struct *work)
return queue_work(kblockd_workqueue, work);
}

-EXPORT_SYMBOL(kblockd_schedule_work);
-
void kblockd_flush_work(struct work_struct *work)
{
cancel_work_sync(work);
}
-EXPORT_SYMBOL(kblockd_flush_work);

int __init blk_dev_init(void)
{
@@ -3858,7 +3854,6 @@ void put_io_context(struct io_context *ioc)
kmem_cache_free(iocontext_cachep, ioc);
}
}
-EXPORT_SYMBOL(put_io_context);

/* Called by the exitting task */
void exit_io_context(void)
@@ -3931,7 +3926,6 @@ struct io_context *get_io_context(gfp_t gfp_flags, int node)
atomic_inc(&ret->refcount);
return ret;
}
-EXPORT_SYMBOL(get_io_context);

void copy_io_context(struct io_context **pdst, struct io_context **psrc)
{
@@ -3945,7 +3939,6 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc)
*pdst = src;
}
}
-EXPORT_SYMBOL(copy_io_context);

void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
{
@@ -3954,7 +3947,6 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2)
*ioc1 = *ioc2;
*ioc2 = temp;
}
-EXPORT_SYMBOL(swap_io_context);

/*
* sysfs parts below
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index 7563d8a..be8ea5f 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -96,7 +96,6 @@ static struct elevator_type elevator_noop = {
.elevator_exit_fn = noop_exit_queue,
},
.elevator_name = "noop",
- .elevator_owner = THIS_MODULE,
};

static int __init noop_init(void)
@@ -104,15 +103,5 @@ static int __init noop_init(void)
return elv_register(&elevator_noop);
}

-static void __exit noop_exit(void)
-{
- elv_unregister(&elevator_noop);
-}
-
module_init(noop_init);
-module_exit(noop_exit);
-

-MODULE_AUTHOR("Jens Axboe");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("No-op IO scheduler");
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e8f4213..cc36d23 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -55,7 +55,6 @@ struct elevator_ops

elevator_init_fn *elevator_init_fn;
elevator_exit_fn *elevator_exit_fn;
- void (*trim)(struct io_context *);
};

#define ELV_NAME_MAX (16)
@@ -75,7 +74,6 @@ struct elevator_type
struct elevator_ops ops;
struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX];
- struct module *elevator_owner;
};

/*
@@ -120,7 +118,6 @@ extern void elv_put_request(struct request_queue *, struct request *);
* io scheduler registration
*/
extern int elv_register(struct elevator_type *);
-extern void elv_unregister(struct elevator_type *);

/*
* io scheduler sysfs switching
@@ -184,28 +181,5 @@ enum {
INIT_LIST_HEAD(&(rq)->donelist); \
} while (0)

-/*
- * io context count accounting
- */
-#define elv_ioc_count_mod(name, __val) \
- do { \
- preempt_disable(); \
- __get_cpu_var(name) += (__val); \
- preempt_enable(); \
- } while (0)
-
-#define elv_ioc_count_inc(name) elv_ioc_count_mod(name, 1)
-#define elv_ioc_count_dec(name) elv_ioc_count_mod(name, -1)
-
-#define elv_ioc_count_read(name) \
-({ \
- unsigned long __val = 0; \
- int __cpu; \
- smp_wmb(); \
- for_each_possible_cpu(__cpu) \
- __val += per_cpu(name, __cpu); \
- __val; \
-})
-
#endif /* CONFIG_BLOCK */
#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 38933ca..4a90d8c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4173,7 +4173,6 @@ int task_nice(const struct task_struct *p)
{
return TASK_NICE(p);
}
-EXPORT_SYMBOL_GPL(task_nice);

/**
* idle_cpu - is a given cpu idle currently?


2007-11-25 16:21:18

by Jens Axboe

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, Nov 25 2007, Adrian Bunk wrote:
> There isn't any big advantage and doesn't seem to be much usage of
> modular schedulers.
>
> OTOH, the overhead made the kernel image of an x86 defconfig (that
> doesn't use modular schedulers) bigger by nearly 2 kB.

Big nack, I use it all the time for testing. Just because you don't
happen to use it is not a reason to remove it.

--
Jens Axboe

2007-11-25 16:32:18

by Adrian Bunk

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, Nov 25, 2007 at 05:21:07PM +0100, Jens Axboe wrote:
> On Sun, Nov 25 2007, Adrian Bunk wrote:
> > There isn't any big advantage and doesn't seem to be much usage of
> > modular schedulers.
> >
> > OTOH, the overhead made the kernel image of an x86 defconfig (that
> > doesn't use modular schedulers) bigger by nearly 2 kB.
>
> Big nack, I use it all the time for testing.

OK.

> Just because you don't
> happen to use it is not a reason to remove it.

s/you/you and all distributions you checked/

> Jens Axboe

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-11-25 16:45:44

by Jens Axboe

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, Nov 25 2007, Adrian Bunk wrote:
> On Sun, Nov 25, 2007 at 05:21:07PM +0100, Jens Axboe wrote:
> > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > There isn't any big advantage and doesn't seem to be much usage of
> > > modular schedulers.
> > >
> > > OTOH, the overhead made the kernel image of an x86 defconfig (that
> > > doesn't use modular schedulers) bigger by nearly 2 kB.
> >
> > Big nack, I use it all the time for testing.
>
> OK.
>
> > Just because you don't
> > happen to use it is not a reason to remove it.
>
> s/you/you and all distributions you checked/

Well they should make them modules (two of them, that is). It's been a
long time since I considered a distro .config a benchmark/guideline of
any sort.

--
Jens Axboe

2007-11-25 16:57:18

by Adrian Bunk

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, Nov 25, 2007 at 05:45:32PM +0100, Jens Axboe wrote:
> On Sun, Nov 25 2007, Adrian Bunk wrote:
> > On Sun, Nov 25, 2007 at 05:21:07PM +0100, Jens Axboe wrote:
> > > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > > There isn't any big advantage and doesn't seem to be much usage of
> > > > modular schedulers.
> > > >
> > > > OTOH, the overhead made the kernel image of an x86 defconfig (that
> > > > doesn't use modular schedulers) bigger by nearly 2 kB.
> > >
> > > Big nack, I use it all the time for testing.
> >
> > OK.
> >
> > > Just because you don't
> > > happen to use it is not a reason to remove it.
> >
> > s/you/you and all distributions you checked/
>
> Well they should make them modules (two of them, that is).
>...

Is there any technical reason why we need 4 different schedulers at all?

I have the gut feeling that the usual thing happens and people e.g. not
report some cfq problems because as works for them...

> Jens Axboe

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-11-25 17:22:50

by Jens Axboe

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, Nov 25 2007, Adrian Bunk wrote:
> On Sun, Nov 25, 2007 at 05:45:32PM +0100, Jens Axboe wrote:
> > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > On Sun, Nov 25, 2007 at 05:21:07PM +0100, Jens Axboe wrote:
> > > > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > > > There isn't any big advantage and doesn't seem to be much usage of
> > > > > modular schedulers.
> > > > >
> > > > > OTOH, the overhead made the kernel image of an x86 defconfig (that
> > > > > doesn't use modular schedulers) bigger by nearly 2 kB.
> > > >
> > > > Big nack, I use it all the time for testing.
> > >
> > > OK.
> > >
> > > > Just because you don't
> > > > happen to use it is not a reason to remove it.
> > >
> > > s/you/you and all distributions you checked/
> >
> > Well they should make them modules (two of them, that is).
> >...
>
> Is there any technical reason why we need 4 different schedulers at all?

Until we have the perfect scheduler :-)

With some hard work and testing, we should be able to get rid of 'as'.
It still beats cfq for some of the workloads that deadline is good at,
so not quite yet.

> I have the gut feeling that the usual thing happens and people e.g. not
> report some cfq problems because as works for them...

There's always a risk with "duplicate", like several drivers for the
same hardware. I'm not disputing that.

--
Jens Axboe

2007-11-25 23:29:35

by Arjan van de Ven

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Sun, 25 Nov 2007 17:56:54 +0100
Adrian Bunk <[email protected]> wrote:

> Is there any technical reason why we need 4 different schedulers at
> all?
>

there is at least one technical reason to need more than one: certain
types of storage (both big EMC boxes as well as solid state disks)
don't behave like disks and have no seek penalty; any cpu time spent on
avoiding seeks is wasted on those, so for these devices one really
wants to use a different IO scheduler, one which is much lighter weight

2007-11-26 05:06:25

by Al Boldi

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Jens Axboe wrote:
> On Sun, Nov 25 2007, Adrian Bunk wrote:
> > Is there any technical reason why we need 4 different schedulers at all?
>
> Until we have the perfect scheduler :-)
>
> With some hard work and testing, we should be able to get rid of 'as'.
> It still beats cfq for some of the workloads that deadline is good at,
> so not quite yet.
>
> > I have the gut feeling that the usual thing happens and people e.g. not
> > report some cfq problems because as works for them...
>
> There's always a risk with "duplicate", like several drivers for the
> same hardware. I'm not disputing that.

Actually, both 'cfq' and 'as' are broken, and have been repeatedly reported
as such. Deadline is the only one that currently looks sane, and seems like
a good starting point for a more involved iosched. But keep in mind, the
fact that 'cfq' and 'as' are broken may also point to a lower-level block-io
problem. So, incrementally improving deadline may help discovering the
problems both 'cfq' and 'as' are plagued with.


Thanks!

--
Al

2007-11-26 05:13:34

by Andrew Morton

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular


(cc's lovingly restored. Please do not do that)

On Mon, 26 Nov 2007 07:57:00 +0300 Al Boldi <[email protected]> wrote:

> Jens Axboe wrote:
> > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > Is there any technical reason why we need 4 different schedulers at all?
> >
> > Until we have the perfect scheduler :-)
> >
> > With some hard work and testing, we should be able to get rid of 'as'.
> > It still beats cfq for some of the workloads that deadline is good at,
> > so not quite yet.
> >
> > > I have the gut feeling that the usual thing happens and people e.g. not
> > > report some cfq problems because as works for them...
> >
> > There's always a risk with "duplicate", like several drivers for the
> > same hardware. I'm not disputing that.
>
> Actually, both 'cfq' and 'as' are broken, and have been repeatedly reported
> as such. Deadline is the only one that currently looks sane, and seems like
> a good starting point for a more involved iosched. But keep in mind, the
> fact that 'cfq' and 'as' are broken may also point to a lower-level block-io
> problem. So, incrementally improving deadline may help discovering the
> problems both 'cfq' and 'as' are plagued with.
>

Sorry, but these are vague and unuseful assertions.

Please send bug reports, preferably with testcases which developers can use
when fixing the bugs.

2007-11-26 05:30:34

by Al Boldi

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Andrew Morton wrote:
> (cc's lovingly restored. Please do not do that)

Thanks! I'm replying off list.

> On Mon, 26 Nov 2007 07:57:00 +0300 Al Boldi <[email protected]> wrote:
> > Jens Axboe wrote:
> > > On Sun, Nov 25 2007, Adrian Bunk wrote:
> > > > Is there any technical reason why we need 4 different schedulers at
> > > > all?
> > >
> > > Until we have the perfect scheduler :-)
> > >
> > > With some hard work and testing, we should be able to get rid of 'as'.
> > > It still beats cfq for some of the workloads that deadline is good at,
> > > so not quite yet.
> > >
> > > > I have the gut feeling that the usual thing happens and people e.g.
> > > > not report some cfq problems because as works for them...
> > >
> > > There's always a risk with "duplicate", like several drivers for the
> > > same hardware. I'm not disputing that.
> >
> > Actually, both 'cfq' and 'as' are broken, and have been repeatedly
> > reported as such. Deadline is the only one that currently looks sane,
> > and seems like a good starting point for a more involved iosched. But
> > keep in mind, the fact that 'cfq' and 'as' are broken may also point to
> > a lower-level block-io problem. So, incrementally improving deadline
> > may help discovering the problems both 'cfq' and 'as' are plagued with.
>
> Sorry, but these are vague and unuseful assertions.
>
> Please send bug reports, preferably with testcases which developers can
> use when fixing the bugs.

http://bugzilla.kernel.org/show_bug.cgi?id=5900


Thanks again!

--
Al

2007-11-27 07:04:44

by Jarek Poplawski

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On 25-11-2007 18:22, Jens Axboe wrote:
> On Sun, Nov 25 2007, Adrian Bunk wrote:
...
>> Is there any technical reason why we need 4 different schedulers at all?
>
> Until we have the perfect scheduler :-)

IMHO this is not enough yet. There is something called "the right
of choice", and, it seems, things are usually far from perfect
where this right is not respected.

Regards,
Jarek P.

2007-11-27 16:47:42

by Adrian Bunk

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Tue, Nov 27, 2007 at 08:09:12AM +0100, Jarek Poplawski wrote:
> On 25-11-2007 18:22, Jens Axboe wrote:
> > On Sun, Nov 25 2007, Adrian Bunk wrote:
> ...
> >> Is there any technical reason why we need 4 different schedulers at all?
> >
> > Until we have the perfect scheduler :-)
>
> IMHO this is not enough yet. There is something called "the right
> of choice",

That's a common misconception about open source software:

There is nothing like a "right of choice".
There is a "right to change the source code".

This means you cannot demand from anyone to offer any choices, but you
can fork the code yourself and use and distribute modified code
containing any choices you consider reasonable.

> and, it seems, things are usually far from perfect
> where this right is not respected.

That's wrong.

It's actually often much worse to have different choices with different
features and bugfixes than having one version that contains all features
and all bugfixes.

> Regards,
> Jarek P.

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-11-27 22:13:20

by Jarek Poplawski

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Adrian Bunk wrote, On 11/27/2007 05:47 PM:

> On Tue, Nov 27, 2007 at 08:09:12AM +0100, Jarek Poplawski wrote:
>> On 25-11-2007 18:22, Jens Axboe wrote:
>>> On Sun, Nov 25 2007, Adrian Bunk wrote:
>> ...
>>>> Is there any technical reason why we need 4 different schedulers at all?
>>> Until we have the perfect scheduler :-)
>> IMHO this is not enough yet. There is something called "the right
>> of choice",
>
> That's a common misconception about open source software:
>
> There is nothing like a "right of choice".
> There is a "right to change the source code".


Maybe you are right, maybe I've used wrong words... But, e.g., google
pretends to know about this first right too. And I've meant generally,
not about open software.

>
> This means you cannot demand from anyone to offer any choices, but you
> can fork the code yourself and use and distribute modified code
> containing any choices you consider reasonable.


I don't demand anything. I've only expressed my personal opinion
that usually (if possible) the choice is better than no choice.
And, since I don't know anything in open source forbiding this, I
can ask, why you demand to take away offered choices; actually, I
think it would be much easier if you could fork the other way...

>> and, it seems, things are usually far from perfect
>> where this right is not respected.
>
> That's wrong.
>
> It's actually often much worse to have different choices with different
> features and bugfixes than having one version that contains all features
> and all bugfixes.
>


It's only a part of the theory: usually it's easier to find some bugs
if there is a possibility to compare a performance with other options;
there is also kind of stimulation and flow of new ideas between them.
Otherwise it's not so hard to overlook some stagnation.

Regards,
Jarek P.

2007-11-27 22:53:54

by Adrian Bunk

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Tue, Nov 27, 2007 at 11:15:48PM +0100, Jarek Poplawski wrote:
> Adrian Bunk wrote, On 11/27/2007 05:47 PM:
>
> > On Tue, Nov 27, 2007 at 08:09:12AM +0100, Jarek Poplawski wrote:
> >> On 25-11-2007 18:22, Jens Axboe wrote:
> >>> On Sun, Nov 25 2007, Adrian Bunk wrote:
> >> ...
> >>>> Is there any technical reason why we need 4 different schedulers at all?
> >>> Until we have the perfect scheduler :-)
> >> IMHO this is not enough yet. There is something called "the right
> >> of choice",
> >
> > That's a common misconception about open source software:
> >
> > There is nothing like a "right of choice".
> > There is a "right to change the source code".
>
> Maybe you are right, maybe I've used wrong words... But, e.g., google
> pretends to know about this first right too. And I've meant generally,
> not about open software.

Most Google hits are about abortion.

The fact that people use this term in some completely different
context does not give it the meaning you implied it had.

Oh, and this right of choice also does not exist in Poland...

> > This means you cannot demand from anyone to offer any choices, but you
> > can fork the code yourself and use and distribute modified code
> > containing any choices you consider reasonable.
>
> I don't demand anything. I've only expressed my personal opinion
> that usually (if possible) the choice is better than no choice.

And I'm trying to explain why your personal opinion is wrong in many
cases.

> And, since I don't know anything in open source forbiding this, I
> can ask, why you demand to take away offered choices; actually, I
> think it would be much easier if you could fork the other way...

There's nothing forbiding this, it's simply the question what results in
a better kernel (see below).

> >> and, it seems, things are usually far from perfect
> >> where this right is not respected.
> >
> > That's wrong.
> >
> > It's actually often much worse to have different choices with different
> > features and bugfixes than having one version that contains all features
> > and all bugfixes.
>
> It's only a part of the theory: usually it's easier to find some bugs
> if there is a possibility to compare a performance with other options;
> there is also kind of stimulation and flow of new ideas between them.
> Otherwise it's not so hard to overlook some stagnation.

Let's leave the theory.

As one of the most active code removers in the kernel [1], I can tell
you what actually happens in practice:

Given:
- two choices A and B
- user tried choice A and it has a problem (e.g. doesn't work or has
bad performance)

What happens:
- if choice B works, user uses choice B

What happens without choice B:
- user reports the problem and choice A gets fixed

It's always surprising how many people complain when you deprecate or
remove a choice B that choice A wouldn't work for them, and who had
never reported their problems before since choice B worked for them...

> Regards,
> Jarek P.

cu
Adrian

[1] http://lwn.net/Articles/247582/

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-11-27 22:59:24

by Jarek Poplawski

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Jarek Poplawski wrote, On 11/27/2007 11:15 PM:
...

> Otherwise it's not so hard to overlook some stagnation.

Btw., after this 'forking' thing etc. it seems I might have lost the point
a little: which removed choices should justify such a fork. But, I hope,
you didn't mean your patch only, because then e.g. this stagnation threat
looks like a bit exaggerated...

Jarek P.

2007-11-27 23:22:16

by Adrian Bunk

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

On Wed, Nov 28, 2007 at 12:02:08AM +0100, Jarek Poplawski wrote:
> Jarek Poplawski wrote, On 11/27/2007 11:15 PM:
> ...
>
> > Otherwise it's not so hard to overlook some stagnation.
>
> Btw., after this 'forking' thing etc. it seems I might have lost the point
> a little: which removed choices should justify such a fork.

Let me try to rephrase it:

If you think an open source project does something wrong you have the
right to fork it and offer an (in your opinion) better version.

This is the right you have.

But if you think open source gives you any legal or moral right to
demand any featurs or choices or whatever from developers you are
completely mistaken.

> But, I hope,
> you didn't mean your patch only, because then e.g. this stagnation threat
> looks like a bit exaggerated...

The question how many I/O schedulers we need is anyway in no direction
related to my patch.

> Jarek P.

cu
Adrian

--

"Is there not promise of rain?" Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
"Only a promise," Lao Er said.
Pearl S. Buck - Dragon Seed

2007-11-28 00:16:58

by Jarek Poplawski

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Adrian Bunk wrote, On 11/27/2007 11:53 PM:

> On Tue, Nov 27, 2007 at 11:15:48PM +0100, Jarek Poplawski wrote:
...
> Most Google hits are about abortion.
>
> The fact that people use this term in some completely different
> context does not give it the meaning you implied it had.
>
> Oh, and this right of choice also does not exist in Poland...


Anyway, your later arguments could suggest you've understood,

what I've meant. And maybe abortion isn't bad association here...

...

> As one of the most active code removers in the kernel [1], I can tell
> you what actually happens in practice:

...
> It's always surprising how many people complain when you deprecate or

> remove a choice B that choice A wouldn't work for them, and who had
> never reported their problems before since choice B worked for them...


Of course, all these choices should be reasonably limited, so the
opinions of users and maintainers should be always considered.

But, I was rather against something else: removing some maybe not very
popular, but still not buggy options, only to save a few kilobytes or
maintainers' time.


> [1] http://lwn.net/Articles/247582/


My congratulations! Of course, removing is something necessary, but I wish

you many problems! (== many users)

Thanks,
Jarek P.

2007-12-30 17:48:31

by Jarek Poplawski

[permalink] [raw]
Subject: Re: [2.6 patch] make I/O schedulers non-modular

Jarek Poplawski wrote, On 11/27/2007 11:15 PM:

> Adrian Bunk wrote, On 11/27/2007 05:47 PM:

...

>> There is nothing like a "right of choice".


(very late) PS:

...I was a bit confused with this, wondering: so, we've envied you
(the West) this "thing" for so many years, and now it seems, you have
no idea what's this all about?! Happily it was only my English:

http://en.wikipedia.org/wiki/The_Paradox_of_Choice:_Why_More_Is_Less


"Freedom of choice" was the right term!

Regards,
Jarek P.


PPS: But, of course, no need to discuss this more... unless we're
interested in the next Nobel Prize.