2003-09-08 16:47:43

by Dave Olien

[permalink] [raw]
Subject: Badness in as_completed_request warning


I'm getting a "badness" warning from as_iosched.c, line 917.
This is on 2.6.0-test4-mm6, running on a 2 cpu x86.

The warning occurs when I run three or more instances of mkfs.ext2, where
each mkfs is running on its own disk. If I run four or more mkfs's
on paritions all on the same disk I don't get this warning.
The more instances of mkfs I run (each on separate disks), the
more instances of this warning I receive.

The mkfs's all progress at the same rate. The warnings
occur as all mkfs's are "writing superblocks and filesystem
accounting information", near the end of the mkfs.

# Badness in as_completed_request at drivers/block/as-iosched.c:917

Call Trace:
[<c0220d0d>] as_completed_request+0x1ad/0x1b0
[<c021946f>] elv_completed_request+0x1f/0x30
[<c021b87c>] __blk_put_request+0x3c/0xc0
[<c021c83f>] end_that_request_last+0x5f/0xf0
[<c0234d02>] DAC960_V2_ProcessCompletedCommand+0x172/0xf50
[<c029712a>] ip_rcv+0x39a/0x520
[<c011ebf4>] recalc_task_prio+0xb4/0x1f0
[<c011f64b>] load_balance+0x1bb/0x430
[<c0235bfd>] DAC960_LP_InterruptHandler+0x6d/0xb0
[<c010e789>] handle_IRQ_event+0x49/0x80
[<c010eb00>] do_IRQ+0xa0/0x150
[<c010abd0>] default_idle+0x0/0x40
[<c02f2f8c>] common_interrupt+0x18/0x20
[<c010abd0>] default_idle+0x0/0x40
[<c010abfd>] default_idle+0x2d/0x40
[<c010ac96>] cpu_idle+0x46/0x50
[<c0107000>] rest_init+0x0/0x60
[<c039092d>] start_kernel+0x18d/0x1c0
[<c03904a0>] unknown_bootoption+0x0/0x120


2003-09-09 03:41:43

by Nick Piggin

[permalink] [raw]
Subject: Re: Badness in as_completed_request warning

linux-2.6-npiggin/drivers/block/as-iosched.c | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+)

diff -puN drivers/block/as-iosched.c~as-warn-fix drivers/block/as-iosched.c
--- linux-2.6/drivers/block/as-iosched.c~as-warn-fix 2003-09-09 13:01:04.000000000 +1000
+++ linux-2.6-npiggin/drivers/block/as-iosched.c 2003-09-09 13:21:12.000000000 +1000
@@ -381,8 +381,14 @@ static void as_add_arq_rb(struct as_data
arq->rb_key = rq_rb_key(rq);

/* This can be caused by direct IO */
+#if 0
while ((alias = __as_add_arq_rb(ad, arq)))
as_move_to_dispatch(ad, alias);
+#endif
+ if ((alias = __as_add_arq_rb(ad, arq))) {
+ list_add_tail(&arq->request->queuelist,
+ &alias->request->queuelist);
+ }

rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
}
@@ -1131,7 +1137,24 @@ static void as_move_to_dispatch(struct a
* take it off the sort and fifo list, add to dispatch queue
*/
as_remove_queued_request(ad->q, arq->request);
+
+ for (;;) {
+ struct list_head *alias = &arq->request->queuelist;
+
+ if (!list_empty(alias)) {
+ struct request *rq;
+ rq = list_entry_rq(alias->next);
+ list_del(&rq->queuelist);
+ list_add_tail(&rq->queuelist, ad->dispatch);
+
+ if (arq->io_context && arq->io_context->aic)
+ atomic_inc(&arq->io_context->aic->nr_dispatched);
+ } else
+ break;
+ }
+
list_add_tail(&arq->request->queuelist, ad->dispatch);
+
if (arq->io_context && arq->io_context->aic)
atomic_inc(&arq->io_context->aic->nr_dispatched);


_


Attachments:
as-warn-fix.patch (1.46 kB)

2003-09-09 06:12:31

by Dave Olien

[permalink] [raw]
Subject: Re: Badness in as_completed_request warning

On Tue, Sep 09, 2003 at 01:40:57PM +1000, Nick Piggin wrote:
> Hi Dave,
> Can you try 2.6.0-test5 with the following patch please. Thanks
>

I applied this patch, and gave it a shot. It seems to have made things
worse. Instead of the warnings occurring only at the end of the mkfs
runs, they now occur pretty much constantly, and it finally ends
with an Bug_On. Attached is the console output. There are 20
occurrances of Badness in as-iosched.c line 1020. There is one
occurrance of Badness in as-iosched.c line 1025. The Bug_On() is in
as-iosched.c line 1243.

Console output is attached.


Attachments:
(No filename) (599.00 B)
typescript (38.34 kB)
Download all attachments

2003-09-09 07:04:27

by Nick Piggin

[permalink] [raw]
Subject: Re: Badness in as_completed_request warning

linux-2.6-npiggin/drivers/block/as-iosched.c | 60 +++++++++++++++++++--------
1 files changed, 43 insertions(+), 17 deletions(-)

diff -puN drivers/block/as-iosched.c~as-warn-fix drivers/block/as-iosched.c
--- linux-2.6/drivers/block/as-iosched.c~as-warn-fix 2003-09-09 17:02:41.000000000 +1000
+++ linux-2.6-npiggin/drivers/block/as-iosched.c 2003-09-09 17:02:41.000000000 +1000
@@ -373,7 +373,7 @@ static void as_move_to_dispatch(struct a
* direct IO, then move the alias to the dispatch list and then add the
* request.
*/
-static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
+static int as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
{
struct as_rq *alias;
struct request *rq = arq->request;
@@ -381,10 +381,20 @@ static void as_add_arq_rb(struct as_data
arq->rb_key = rq_rb_key(rq);

/* This can be caused by direct IO */
+#if 0
while ((alias = __as_add_arq_rb(ad, arq)))
as_move_to_dispatch(ad, alias);
+#endif
+ if ((alias = __as_add_arq_rb(ad, arq))) {
+ arq->state = AS_RQ_NEW;
+ list_add_tail(&arq->request->queuelist,
+ &alias->request->queuelist);
+
+ return 1;
+ }

rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
+ return 0;
}

static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
@@ -1131,7 +1141,21 @@ static void as_move_to_dispatch(struct a
* take it off the sort and fifo list, add to dispatch queue
*/
as_remove_queued_request(ad->q, arq->request);
+
+ for (;;) {
+ struct list_head *alias = &arq->request->queuelist;
+
+ if (!list_empty(alias)) {
+ struct request *rq;
+ rq = list_entry_rq(alias->next);
+ list_del(&rq->queuelist);
+ list_add_tail(&rq->queuelist, ad->dispatch);
+ } else
+ break;
+ }
+
list_add_tail(&arq->request->queuelist, ad->dispatch);
+
if (arq->io_context && arq->io_context->aic)
atomic_inc(&arq->io_context->aic->nr_dispatched);

@@ -1303,22 +1327,22 @@ static void as_add_request(struct as_dat
arq->is_sync = 0;
data_dir = arq->is_sync;

- arq->io_context = as_get_io_context();
+ if (!as_add_arq_rb(ad, arq)) {
+ arq->io_context = as_get_io_context();

- if (arq->io_context) {
- atomic_inc(&arq->io_context->aic->nr_queued);
- as_update_iohist(arq->io_context->aic, arq->request);
- }
-
- as_add_arq_rb(ad, arq);
+ if (arq->io_context) {
+ atomic_inc(&arq->io_context->aic->nr_queued);
+ as_update_iohist(arq->io_context->aic, arq->request);
+ }

- /*
- * set expire time (only used for reads) and add to fifo list
- */
- arq->expires = jiffies + ad->fifo_expire[data_dir];
- list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
- arq->state = AS_RQ_QUEUED;
- as_update_arq(ad, arq); /* keep state machine up to date */
+ /*
+ * set expire time (only used for reads) and add to fifo list
+ */
+ arq->expires = jiffies + ad->fifo_expire[data_dir];
+ list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
+ arq->state = AS_RQ_QUEUED;
+ as_update_arq(ad, arq); /* keep state machine up to date */
+ }
}

/*
@@ -1495,7 +1519,8 @@ static void as_merged_request(request_qu
*/
if (rq_rb_key(req) != arq->rb_key) {
as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
+ if (as_add_arq_rb(ad, arq))
+ WARN_ON(1);
/*
* Note! At this stage of this and the next function, our next
* request may not be optimal - eg the request may have "grown"
@@ -1526,7 +1551,8 @@ as_merged_requests(request_queue_t *q, s

if (rq_rb_key(req) != arq->rb_key) {
as_del_arq_rb(ad, arq);
- as_add_arq_rb(ad, arq);
+ if (as_add_arq_rb(ad, arq))
+ WARN_ON(1);
}

/*

_


Attachments:
as-warn-fix.patch (3.48 kB)

2003-09-09 15:37:49

by Dave Olien

[permalink] [raw]
Subject: Re: Badness in as_completed_request warning


Sorry, this one Bug()'d really quickly. I applied this patch to
an unmodified test5 kernel. Console output is attached.

On Tue, Sep 09, 2003 at 05:04:10PM +1000, Nick Piggin wrote:
>
> Thanks Dave,
> Can you try this one? I can't reproduce the problem here as I don't have
> enough disks unfortunately. Thanks
>
> Nick

> linux-2.6-npiggin/drivers/block/as-iosched.c | 60 +++++++++++++++++++--------
> 1 files changed, 43 insertions(+), 17 deletions(-)
>
> diff -puN drivers/block/as-iosched.c~as-warn-fix drivers/block/as-iosched.c
> --- linux-2.6/drivers/block/as-iosched.c~as-warn-fix 2003-09-09 17:02:41.000000000 +1000
> +++ linux-2.6-npiggin/drivers/block/as-iosched.c 2003-09-09 17:02:41.000000000 +1000
> @@ -373,7 +373,7 @@ static void as_move_to_dispatch(struct a
> * direct IO, then move the alias to the dispatch list and then add the
> * request.
> */
> -static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
> +static int as_add_arq_rb(struct as_data *ad, struct as_rq *arq)
> {
> struct as_rq *alias;
> struct request *rq = arq->request;
> @@ -381,10 +381,20 @@ static void as_add_arq_rb(struct as_data
> arq->rb_key = rq_rb_key(rq);
>
> /* This can be caused by direct IO */
> +#if 0
> while ((alias = __as_add_arq_rb(ad, arq)))
> as_move_to_dispatch(ad, alias);
> +#endif
> + if ((alias = __as_add_arq_rb(ad, arq))) {
> + arq->state = AS_RQ_NEW;
> + list_add_tail(&arq->request->queuelist,
> + &alias->request->queuelist);
> +
> + return 1;
> + }
>
> rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq));
> + return 0;
> }
>
> static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq)
> @@ -1131,7 +1141,21 @@ static void as_move_to_dispatch(struct a
> * take it off the sort and fifo list, add to dispatch queue
> */
> as_remove_queued_request(ad->q, arq->request);
> +
> + for (;;) {
> + struct list_head *alias = &arq->request->queuelist;
> +
> + if (!list_empty(alias)) {
> + struct request *rq;
> + rq = list_entry_rq(alias->next);
> + list_del(&rq->queuelist);
> + list_add_tail(&rq->queuelist, ad->dispatch);
> + } else
> + break;
> + }
> +
> list_add_tail(&arq->request->queuelist, ad->dispatch);
> +
> if (arq->io_context && arq->io_context->aic)
> atomic_inc(&arq->io_context->aic->nr_dispatched);
>
> @@ -1303,22 +1327,22 @@ static void as_add_request(struct as_dat
> arq->is_sync = 0;
> data_dir = arq->is_sync;
>
> - arq->io_context = as_get_io_context();
> + if (!as_add_arq_rb(ad, arq)) {
> + arq->io_context = as_get_io_context();
>
> - if (arq->io_context) {
> - atomic_inc(&arq->io_context->aic->nr_queued);
> - as_update_iohist(arq->io_context->aic, arq->request);
> - }
> -
> - as_add_arq_rb(ad, arq);
> + if (arq->io_context) {
> + atomic_inc(&arq->io_context->aic->nr_queued);
> + as_update_iohist(arq->io_context->aic, arq->request);
> + }
>
> - /*
> - * set expire time (only used for reads) and add to fifo list
> - */
> - arq->expires = jiffies + ad->fifo_expire[data_dir];
> - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
> - arq->state = AS_RQ_QUEUED;
> - as_update_arq(ad, arq); /* keep state machine up to date */
> + /*
> + * set expire time (only used for reads) and add to fifo list
> + */
> + arq->expires = jiffies + ad->fifo_expire[data_dir];
> + list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
> + arq->state = AS_RQ_QUEUED;
> + as_update_arq(ad, arq); /* keep state machine up to date */
> + }
> }
>
> /*
> @@ -1495,7 +1519,8 @@ static void as_merged_request(request_qu
> */
> if (rq_rb_key(req) != arq->rb_key) {
> as_del_arq_rb(ad, arq);
> - as_add_arq_rb(ad, arq);
> + if (as_add_arq_rb(ad, arq))
> + WARN_ON(1);
> /*
> * Note! At this stage of this and the next function, our next
> * request may not be optimal - eg the request may have "grown"
> @@ -1526,7 +1551,8 @@ as_merged_requests(request_queue_t *q, s
>
> if (rq_rb_key(req) != arq->rb_key) {
> as_del_arq_rb(ad, arq);
> - as_add_arq_rb(ad, arq);
> + if (as_add_arq_rb(ad, arq))
> + WARN_ON(1);
> }
>
> /*
>
> _


Attachments:
(No filename) (4.03 kB)
typescript (2.10 kB)
Download all attachments

2003-09-09 16:43:35

by Dave Olien

[permalink] [raw]
Subject: Re: Badness in as_completed_request warning


On Tue, Sep 09, 2003 at 05:04:10PM +1000, Nick Piggin wrote:
>
> Thanks Dave,
> Can you try this one? I can't reproduce the problem here as I don't have
> enough disks unfortunately. Thanks
>
> Nick

One of OSDL's reasons for existance is to arrange for Linux developers
to have access to larger hardware configurations. I'm sure you could
get a project machine here at OSDL to do testing of I/O scheduler
modifications, or other testing on a larger machine. Several other
kernel developers have project machines here at OSDL.

Look at the web page

http://www.osdl.org/lab_activities/be_an_associate.html

Fill out the web page form to get a login. send email to [email protected]
and me, and we'll see that it gets handled quickly.

You'd be able to ssh into a front-end machine here at OSDL. It would
give you serial console access to a test machine. You'd also be able
to power on/power off and reset the test machine, and ssh into the
test machine from that front-end.

Dave