2009-06-26 09:05:42

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 0/2] cfq-iosched: get rid of __GFP_NOFAIL

Hi,

So these are the two patches that get rid of __GFP_NOFAIL in
the CFQ IO scheduler. It's been tested with injecting errors
into the cfqq allocation and verifying that it both triggers
and works as expected.

--
Jens Axboe


2009-06-26 09:06:14

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Setup an emergency fallback cfqq that we allocate at IO scheduler init
time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
never fails without having to ensure free memory.

Signed-off-by: Jens Axboe <[email protected]>
---
block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
1 files changed, 67 insertions(+), 57 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index c760ae7..91e7e0b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -71,6 +71,51 @@ struct cfq_rb_root {
#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }

/*
+ * Per process-grouping structure
+ */
+struct cfq_queue {
+ /* reference count */
+ atomic_t ref;
+ /* various state flags, see below */
+ unsigned int flags;
+ /* parent cfq_data */
+ struct cfq_data *cfqd;
+ /* service_tree member */
+ struct rb_node rb_node;
+ /* service_tree key */
+ unsigned long rb_key;
+ /* prio tree member */
+ struct rb_node p_node;
+ /* prio tree root we belong to, if any */
+ struct rb_root *p_root;
+ /* sorted list of pending requests */
+ struct rb_root sort_list;
+ /* if fifo isn't expired, next request to serve */
+ struct request *next_rq;
+ /* requests queued in sort_list */
+ int queued[2];
+ /* currently allocated requests */
+ int allocated[2];
+ /* fifo list of requests in sort_list */
+ struct list_head fifo;
+
+ unsigned long slice_end;
+ long slice_resid;
+ unsigned int slice_dispatch;
+
+ /* pending metadata requests */
+ int meta_pending;
+ /* number of requests that are on the dispatch list or inside driver */
+ int dispatched;
+
+ /* io prio of this group */
+ unsigned short ioprio, org_ioprio;
+ unsigned short ioprio_class, org_ioprio_class;
+
+ pid_t pid;
+};
+
+/*
* Per block device queue structure
*/
struct cfq_data {
@@ -135,51 +180,11 @@ struct cfq_data {
unsigned int cfq_slice_idle;

struct list_head cic_list;
-};

-/*
- * Per process-grouping structure
- */
-struct cfq_queue {
- /* reference count */
- atomic_t ref;
- /* various state flags, see below */
- unsigned int flags;
- /* parent cfq_data */
- struct cfq_data *cfqd;
- /* service_tree member */
- struct rb_node rb_node;
- /* service_tree key */
- unsigned long rb_key;
- /* prio tree member */
- struct rb_node p_node;
- /* prio tree root we belong to, if any */
- struct rb_root *p_root;
- /* sorted list of pending requests */
- struct rb_root sort_list;
- /* if fifo isn't expired, next request to serve */
- struct request *next_rq;
- /* requests queued in sort_list */
- int queued[2];
- /* currently allocated requests */
- int allocated[2];
- /* fifo list of requests in sort_list */
- struct list_head fifo;
-
- unsigned long slice_end;
- long slice_resid;
- unsigned int slice_dispatch;
-
- /* pending metadata requests */
- int meta_pending;
- /* number of requests that are on the dispatch list or inside driver */
- int dispatched;
-
- /* io prio of this group */
- unsigned short ioprio, org_ioprio;
- unsigned short ioprio_class, org_ioprio_class;
-
- pid_t pid;
+ /*
+ * Fallback dummy cfqq for extreme OOM conditions
+ */
+ struct cfq_queue oom_cfqq;
};

enum cfqq_state_flags {
@@ -1686,28 +1691,28 @@ retry:
*/
spin_unlock_irq(cfqd->queue->queue_lock);
new_cfqq = kmem_cache_alloc_node(cfq_pool,
- gfp_mask | __GFP_NOFAIL | __GFP_ZERO,
+ gfp_mask | __GFP_ZERO,
cfqd->queue->node);
spin_lock_irq(cfqd->queue->queue_lock);
- goto retry;
+ if (new_cfqq)
+ goto retry;
} else {
cfqq = kmem_cache_alloc_node(cfq_pool,
gfp_mask | __GFP_ZERO,
cfqd->queue->node);
- if (!cfqq)
- goto out;
}

- cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
- cfq_init_prio_data(cfqq, ioc);
- cfq_log_cfqq(cfqd, cfqq, "alloced");
+ if (cfqq) {
+ cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
+ cfq_init_prio_data(cfqq, ioc);
+ cfq_log_cfqq(cfqd, cfqq, "alloced");
+ } else
+ cfqq = &cfqd->oom_cfqq;
}

if (new_cfqq)
kmem_cache_free(cfq_pool, new_cfqq);

-out:
- WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
return cfqq;
}

@@ -1740,11 +1745,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
cfqq = *async_cfqq;
}

- if (!cfqq) {
+ if (!cfqq)
cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
- if (!cfqq)
- return NULL;
- }

/*
* pin the queue now that it's allocated, scheduler exit will prune it
@@ -2470,6 +2472,14 @@ static void *cfq_init_queue(struct request_queue *q)
for (i = 0; i < CFQ_PRIO_LISTS; i++)
cfqd->prio_trees[i] = RB_ROOT;

+ /*
+ * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
+ * Grab a permanent reference to it, so that the normal code flow
+ * will not attempt to free it.
+ */
+ cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
+ atomic_inc(&cfqd->oom_cfqq.ref);
+
INIT_LIST_HEAD(&cfqd->cic_list);

cfqd->queue = q;
--
1.6.3.2.306.g4f4fa

2009-06-26 09:05:55

by Jens Axboe

[permalink] [raw]
Subject: [PATCH 1/2] cfq-iosched: move cfqq initialization out of cfq_find_alloc_queue()

We're going to be needing that init code outside of that function
to get rid of the __GFP_NOFAIL in cfqq allocation.

Signed-off-by: Jens Axboe <[email protected]>
---
block/cfq-iosched.c | 37 +++++++++++++++++++++----------------
1 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18..c760ae7 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1641,6 +1641,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
ioc->ioprio_changed = 0;
}

+static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+ pid_t pid, int is_sync)
+{
+ RB_CLEAR_NODE(&cfqq->rb_node);
+ RB_CLEAR_NODE(&cfqq->p_node);
+ INIT_LIST_HEAD(&cfqq->fifo);
+
+ atomic_set(&cfqq->ref, 0);
+ cfqq->cfqd = cfqd;
+
+ cfq_mark_cfqq_prio_changed(cfqq);
+
+ if (is_sync) {
+ if (!cfq_class_idle(cfqq))
+ cfq_mark_cfqq_idle_window(cfqq);
+ cfq_mark_cfqq_sync(cfqq);
+ }
+ cfqq->pid = pid;
+}
+
static struct cfq_queue *
cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
struct io_context *ioc, gfp_t gfp_mask)
@@ -1678,23 +1698,8 @@ retry:
goto out;
}

- RB_CLEAR_NODE(&cfqq->rb_node);
- RB_CLEAR_NODE(&cfqq->p_node);
- INIT_LIST_HEAD(&cfqq->fifo);
-
- atomic_set(&cfqq->ref, 0);
- cfqq->cfqd = cfqd;
-
- cfq_mark_cfqq_prio_changed(cfqq);
-
+ cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
cfq_init_prio_data(cfqq, ioc);
-
- if (is_sync) {
- if (!cfq_class_idle(cfqq))
- cfq_mark_cfqq_idle_window(cfqq);
- cfq_mark_cfqq_sync(cfqq);
- }
- cfqq->pid = current->pid;
cfq_log_cfqq(cfqd, cfqq, "alloced");
}

--
1.6.3.2.306.g4f4fa

2009-06-26 16:05:29

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 0/2] cfq-iosched: get rid of __GFP_NOFAIL

Jens Axboe <[email protected]> writes:

> Hi,
>
> So these are the two patches that get rid of __GFP_NOFAIL in
> the CFQ IO scheduler. It's been tested with injecting errors
> into the cfqq allocation and verifying that it both triggers
> and works as expected.

You forgot to mention why you are doing this.

Cheers,
Jeff

2009-06-26 16:09:33

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 1/2] cfq-iosched: move cfqq initialization out of cfq_find_alloc_queue()

Jens Axboe <[email protected]> writes:

> We're going to be needing that init code outside of that function
> to get rid of the __GFP_NOFAIL in cfqq allocation.
>
> Signed-off-by: Jens Axboe <[email protected]>

This looks pretty straight-forward.

Reviewed-by: Jeff Moyer <[email protected]>

2009-06-26 16:26:31

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Jens Axboe <[email protected]> writes:

> Setup an emergency fallback cfqq that we allocate at IO scheduler init
> time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> never fails without having to ensure free memory.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> 1 files changed, 67 insertions(+), 57 deletions(-)
>
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index c760ae7..91e7e0b 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> + /*
> + * Fallback dummy cfqq for extreme OOM conditions
> + */
> + struct cfq_queue oom_cfqq;

OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
guess that's not too bad.

> + /*
> + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> + * Grab a permanent reference to it, so that the normal code flow
> + * will not attempt to free it.
> + */
> + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> + atomic_inc(&cfqd->oom_cfqq.ref);
> +

I guess this is so we never try to free it, good. ;)

One issue I have with this patch is that, if a task happens to run into
this condition, there is no way out. It will always have the oom_cfqq
as it's cfqq. Can't we fix that if we recover from the OOM condition?

Cheers,
Jeff

2009-06-27 18:26:22

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Fri, Jun 26 2009, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
>
> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> > never fails without having to ensure free memory.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
> > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> > 1 files changed, 67 insertions(+), 57 deletions(-)
> >
> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> > index c760ae7..91e7e0b 100644
> > --- a/block/cfq-iosched.c
> > +++ b/block/cfq-iosched.c
> > + /*
> > + * Fallback dummy cfqq for extreme OOM conditions
> > + */
> > + struct cfq_queue oom_cfqq;
>
> OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> guess that's not too bad.
>
> > + /*
> > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> > + * Grab a permanent reference to it, so that the normal code flow
> > + * will not attempt to free it.
> > + */
> > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> > + atomic_inc(&cfqd->oom_cfqq.ref);
> > +
>
> I guess this is so we never try to free it, good. ;)
>
> One issue I have with this patch is that, if a task happens to run into
> this condition, there is no way out. It will always have the oom_cfqq
> as it's cfqq. Can't we fix that if we recover from the OOM condition?

Yeah, I fixed that about an hour after posting the patches. See:

http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64

I didn't post the 3/2 patch though.

--
Jens Axboe

2009-06-29 13:46:59

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Jens Axboe <[email protected]> writes:

> On Fri, Jun 26 2009, Jeff Moyer wrote:
>> Jens Axboe <[email protected]> writes:
>>
>> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
>> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
>> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
>> > never fails without having to ensure free memory.
>> >
>> > Signed-off-by: Jens Axboe <[email protected]>
>> > ---
>> > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
>> > 1 files changed, 67 insertions(+), 57 deletions(-)
>> >
>> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> > index c760ae7..91e7e0b 100644
>> > --- a/block/cfq-iosched.c
>> > +++ b/block/cfq-iosched.c
>> > + /*
>> > + * Fallback dummy cfqq for extreme OOM conditions
>> > + */
>> > + struct cfq_queue oom_cfqq;
>>
>> OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
>> guess that's not too bad.
>>
>> > + /*
>> > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
>> > + * Grab a permanent reference to it, so that the normal code flow
>> > + * will not attempt to free it.
>> > + */
>> > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
>> > + atomic_inc(&cfqd->oom_cfqq.ref);
>> > +
>>
>> I guess this is so we never try to free it, good. ;)
>>
>> One issue I have with this patch is that, if a task happens to run into
>> this condition, there is no way out. It will always have the oom_cfqq
>> as it's cfqq. Can't we fix that if we recover from the OOM condition?
>
> Yeah, I fixed that about an hour after posting the patches. See:
>
> http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
>
> I didn't post the 3/2 patch though.

OK, that looks better. Are you reposting the series, then?

Cheers,
Jeff

2009-06-29 17:34:27

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Mon, Jun 29 2009, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
>
> > On Fri, Jun 26 2009, Jeff Moyer wrote:
> >> Jens Axboe <[email protected]> writes:
> >>
> >> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> >> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> >> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> >> > never fails without having to ensure free memory.
> >> >
> >> > Signed-off-by: Jens Axboe <[email protected]>
> >> > ---
> >> > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> >> > 1 files changed, 67 insertions(+), 57 deletions(-)
> >> >
> >> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> >> > index c760ae7..91e7e0b 100644
> >> > --- a/block/cfq-iosched.c
> >> > +++ b/block/cfq-iosched.c
> >> > + /*
> >> > + * Fallback dummy cfqq for extreme OOM conditions
> >> > + */
> >> > + struct cfq_queue oom_cfqq;
> >>
> >> OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> >> guess that's not too bad.
> >>
> >> > + /*
> >> > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> >> > + * Grab a permanent reference to it, so that the normal code flow
> >> > + * will not attempt to free it.
> >> > + */
> >> > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> >> > + atomic_inc(&cfqd->oom_cfqq.ref);
> >> > +
> >>
> >> I guess this is so we never try to free it, good. ;)
> >>
> >> One issue I have with this patch is that, if a task happens to run into
> >> this condition, there is no way out. It will always have the oom_cfqq
> >> as it's cfqq. Can't we fix that if we recover from the OOM condition?
> >
> > Yeah, I fixed that about an hour after posting the patches. See:
> >
> > http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
> >
> > I didn't post the 3/2 patch though.
>
> OK, that looks better. Are you reposting the series, then?

Yeah, I'll fold the last two patches together and repost.

--
Jens Axboe

2009-06-29 17:44:49

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Jens Axboe <[email protected]> writes:

> On Mon, Jun 29 2009, Jeff Moyer wrote:
>> Jens Axboe <[email protected]> writes:
>>
>> > On Fri, Jun 26 2009, Jeff Moyer wrote:
>> >> Jens Axboe <[email protected]> writes:
>> >>
>> >> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
>> >> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
>> >> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
>> >> > never fails without having to ensure free memory.
>> >> >
>> >> > Signed-off-by: Jens Axboe <[email protected]>
>> >> > ---
>> >> > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
>> >> > 1 files changed, 67 insertions(+), 57 deletions(-)
>> >> >
>> >> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
>> >> > index c760ae7..91e7e0b 100644
>> >> > --- a/block/cfq-iosched.c
>> >> > +++ b/block/cfq-iosched.c
>> >> > + /*
>> >> > + * Fallback dummy cfqq for extreme OOM conditions
>> >> > + */
>> >> > + struct cfq_queue oom_cfqq;
>> >>
>> >> OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
>> >> guess that's not too bad.
>> >>
>> >> > + /*
>> >> > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
>> >> > + * Grab a permanent reference to it, so that the normal code flow
>> >> > + * will not attempt to free it.
>> >> > + */
>> >> > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
>> >> > + atomic_inc(&cfqd->oom_cfqq.ref);
>> >> > +
>> >>
>> >> I guess this is so we never try to free it, good. ;)
>> >>
>> >> One issue I have with this patch is that, if a task happens to run into
>> >> this condition, there is no way out. It will always have the oom_cfqq
>> >> as it's cfqq. Can't we fix that if we recover from the OOM condition?
>> >
>> > Yeah, I fixed that about an hour after posting the patches. See:
>> >
>> > http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
>> >
>> > I didn't post the 3/2 patch though.
>>
>> OK, that looks better. Are you reposting the series, then?
>
> Yeah, I'll fold the last two patches together and repost.

When you do, you can add my:

Reviewed-by: Jeff Moyer <[email protected]>

Cheers,
Jeff

2009-06-29 17:48:18

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Mon, Jun 29 2009, Jeff Moyer wrote:
> Jens Axboe <[email protected]> writes:
>
> > On Mon, Jun 29 2009, Jeff Moyer wrote:
> >> Jens Axboe <[email protected]> writes:
> >>
> >> > On Fri, Jun 26 2009, Jeff Moyer wrote:
> >> >> Jens Axboe <[email protected]> writes:
> >> >>
> >> >> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> >> >> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> >> >> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> >> >> > never fails without having to ensure free memory.
> >> >> >
> >> >> > Signed-off-by: Jens Axboe <[email protected]>
> >> >> > ---
> >> >> > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> >> >> > 1 files changed, 67 insertions(+), 57 deletions(-)
> >> >> >
> >> >> > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> >> >> > index c760ae7..91e7e0b 100644
> >> >> > --- a/block/cfq-iosched.c
> >> >> > +++ b/block/cfq-iosched.c
> >> >> > + /*
> >> >> > + * Fallback dummy cfqq for extreme OOM conditions
> >> >> > + */
> >> >> > + struct cfq_queue oom_cfqq;
> >> >>
> >> >> OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> >> >> guess that's not too bad.
> >> >>
> >> >> > + /*
> >> >> > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> >> >> > + * Grab a permanent reference to it, so that the normal code flow
> >> >> > + * will not attempt to free it.
> >> >> > + */
> >> >> > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> >> >> > + atomic_inc(&cfqd->oom_cfqq.ref);
> >> >> > +
> >> >>
> >> >> I guess this is so we never try to free it, good. ;)
> >> >>
> >> >> One issue I have with this patch is that, if a task happens to run into
> >> >> this condition, there is no way out. It will always have the oom_cfqq
> >> >> as it's cfqq. Can't we fix that if we recover from the OOM condition?
> >> >
> >> > Yeah, I fixed that about an hour after posting the patches. See:
> >> >
> >> > http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
> >> >
> >> > I didn't post the 3/2 patch though.
> >>
> >> OK, that looks better. Are you reposting the series, then?
> >
> > Yeah, I'll fold the last two patches together and repost.
>
> When you do, you can add my:
>
> Reviewed-by: Jeff Moyer <[email protected]>

Will do, thanks!

--
Jens Axboe

2009-07-01 09:29:27

by Shan Wei

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Jens Axboe said:
> Setup an emergency fallback cfqq that we allocate at IO scheduler init
> time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> never fails without having to ensure free memory.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
>
> @@ -1740,11 +1745,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
> cfqq = *async_cfqq;
> }
>
> - if (!cfqq) {
> + if (!cfqq)
> cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
> - if (!cfqq)
> - return NULL;
> - }

I jsut reviewed the code and found that the check of cfqq is also redundant
after doing cfq_get_queue() in cfq_set_request.

The patch is based on Linus's main tree.

Signed-off-by: Shan Wei <[email protected]>
---
block/cfq-iosched.c | 4 ----
1 files changed, 0 insertions(+), 4 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18..c373237 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2307,10 +2307,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
cfqq = cic_to_cfqq(cic, is_sync);
if (!cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
-
- if (!cfqq)
- goto queue_fail;
-
cic_set_cfqq(cic, cfqq, is_sync);
}

2009-07-01 09:32:42

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Wed, Jul 01 2009, Shan Wei wrote:
> Jens Axboe said:
> > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> > never fails without having to ensure free memory.
> >
> > Signed-off-by: Jens Axboe <[email protected]>
> > ---
> >
> > @@ -1740,11 +1745,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
> > cfqq = *async_cfqq;
> > }
> >
> > - if (!cfqq) {
> > + if (!cfqq)
> > cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
> > - if (!cfqq)
> > - return NULL;
> > - }
>
> I jsut reviewed the code and found that the check of cfqq is also redundant
> after doing cfq_get_queue() in cfq_set_request.
>
> The patch is based on Linus's main tree.

It's not redundant in Linus' tree, cfq_get_queue() can return NULL for
!= __GFP_WAIT.


>
> Signed-off-by: Shan Wei <[email protected]>
> ---
> block/cfq-iosched.c | 4 ----
> 1 files changed, 0 insertions(+), 4 deletions(-)
>
> diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> index 833ec18..c373237 100644
> --- a/block/cfq-iosched.c
> +++ b/block/cfq-iosched.c
> @@ -2307,10 +2307,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
> cfqq = cic_to_cfqq(cic, is_sync);
> if (!cfqq) {
> cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
> -
> - if (!cfqq)
> - goto queue_fail;
> -
> cic_set_cfqq(cic, cfqq, is_sync);
> }
>

--
Jens Axboe

2009-07-02 00:51:18

by Shan Wei

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

Jens Axboe said:
> On Wed, Jul 01 2009, Shan Wei wrote:
>> Jens Axboe said:
>>> Setup an emergency fallback cfqq that we allocate at IO scheduler init
>>> time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
>>> punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
>>> never fails without having to ensure free memory.
>>>
>>> Signed-off-by: Jens Axboe <[email protected]>
>>> ---
>>>
>>> @@ -1740,11 +1745,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
>>> cfqq = *async_cfqq;
>>> }
>>>
>>> - if (!cfqq) {
>>> + if (!cfqq)
>>> cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
>>> - if (!cfqq)
>>> - return NULL;
>>> - }
>> I jsut reviewed the code and found that the check of cfqq is also redundant
>> after doing cfq_get_queue() in cfq_set_request.
>>
>> The patch is based on Linus's main tree.
>
> It's not redundant in Linus' tree, cfq_get_queue() can return NULL for
> != __GFP_WAIT.
>

Yes. So, the patch is only for "for-linus" branch of your tree, not for Linus's tree.

I noticed the patch is in your tree now, thanks.

2009-07-02 06:33:32

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Thu, Jul 02 2009, Shan Wei wrote:
> Jens Axboe said:
> > On Wed, Jul 01 2009, Shan Wei wrote:
> >> Jens Axboe said:
> >>> Setup an emergency fallback cfqq that we allocate at IO scheduler init
> >>> time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> >>> punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> >>> never fails without having to ensure free memory.
> >>>
> >>> Signed-off-by: Jens Axboe <[email protected]>
> >>> ---
> >>>
> >>> @@ -1740,11 +1745,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
> >>> cfqq = *async_cfqq;
> >>> }
> >>>
> >>> - if (!cfqq) {
> >>> + if (!cfqq)
> >>> cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
> >>> - if (!cfqq)
> >>> - return NULL;
> >>> - }
> >> I jsut reviewed the code and found that the check of cfqq is also redundant
> >> after doing cfq_get_queue() in cfq_set_request.
> >>
> >> The patch is based on Linus's main tree.
> >
> > It's not redundant in Linus' tree, cfq_get_queue() can return NULL for
> > != __GFP_WAIT.
> >
>
> Yes. So, the patch is only for "for-linus" branch of your tree, not for Linus's tree.
>
> I noticed the patch is in your tree now, thanks.

Sorry I should have been more clear as well, I did merge it for
for-linus. It's just your original wording said if was against Linus'
main tree, where it didn't apply (the patch would apply, but it would be
wrong :-)

It's in upstream now.

--
Jens Axboe

2009-07-09 15:45:41

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Sat, Jun 27, 2009 at 08:26:17PM +0200, Jens Axboe wrote:
> On Fri, Jun 26 2009, Jeff Moyer wrote:
> > Jens Axboe <[email protected]> writes:
> >
> > > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> > > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> > > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> > > never fails without having to ensure free memory.
> > >
> > > Signed-off-by: Jens Axboe <[email protected]>
> > > ---
> > > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> > > 1 files changed, 67 insertions(+), 57 deletions(-)
> > >
> > > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> > > index c760ae7..91e7e0b 100644
> > > --- a/block/cfq-iosched.c
> > > +++ b/block/cfq-iosched.c
> > > + /*
> > > + * Fallback dummy cfqq for extreme OOM conditions
> > > + */
> > > + struct cfq_queue oom_cfqq;
> >
> > OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> > guess that's not too bad.
> >
> > > + /*
> > > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> > > + * Grab a permanent reference to it, so that the normal code flow
> > > + * will not attempt to free it.
> > > + */
> > > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> > > + atomic_inc(&cfqd->oom_cfqq.ref);
> > > +
> >
> > I guess this is so we never try to free it, good. ;)
> >
> > One issue I have with this patch is that, if a task happens to run into
> > this condition, there is no way out. It will always have the oom_cfqq
> > as it's cfqq. Can't we fix that if we recover from the OOM condition?
>
> Yeah, I fixed that about an hour after posting the patches. See:
>
> http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
>

Hi Jens,

I think above patch might not fix the issue of an oom_cfqq getting stuck
with an io context. The reason being that once we allocate the cfqq, it
will be cached in cic and once next request comes, we will retrieve it
from cic and never call cfq_get_queue()/cfq_find_alloc_queue().

I think we probably need to do cfqq == oom_cfqq check in cfq_set_request()
also.


---
block/cfq-iosched.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux5/block/cfq-iosched.c
===================================================================
--- linux5.orig/block/cfq-iosched.c 2009-07-04 13:58:48.000000000 -0400
+++ linux5/block/cfq-iosched.c 2009-07-09 11:33:45.000000000 -0400
@@ -2311,7 +2311,7 @@ cfq_set_request(struct request_queue *q,
goto queue_fail;

cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
cic_set_cfqq(cic, cfqq, is_sync);
}

2009-07-09 17:38:32

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Thu, Jul 09 2009, Vivek Goyal wrote:
> On Sat, Jun 27, 2009 at 08:26:17PM +0200, Jens Axboe wrote:
> > On Fri, Jun 26 2009, Jeff Moyer wrote:
> > > Jens Axboe <[email protected]> writes:
> > >
> > > > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> > > > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> > > > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> > > > never fails without having to ensure free memory.
> > > >
> > > > Signed-off-by: Jens Axboe <[email protected]>
> > > > ---
> > > > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> > > > 1 files changed, 67 insertions(+), 57 deletions(-)
> > > >
> > > > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> > > > index c760ae7..91e7e0b 100644
> > > > --- a/block/cfq-iosched.c
> > > > +++ b/block/cfq-iosched.c
> > > > + /*
> > > > + * Fallback dummy cfqq for extreme OOM conditions
> > > > + */
> > > > + struct cfq_queue oom_cfqq;
> > >
> > > OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> > > guess that's not too bad.
> > >
> > > > + /*
> > > > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> > > > + * Grab a permanent reference to it, so that the normal code flow
> > > > + * will not attempt to free it.
> > > > + */
> > > > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> > > > + atomic_inc(&cfqd->oom_cfqq.ref);
> > > > +
> > >
> > > I guess this is so we never try to free it, good. ;)
> > >
> > > One issue I have with this patch is that, if a task happens to run into
> > > this condition, there is no way out. It will always have the oom_cfqq
> > > as it's cfqq. Can't we fix that if we recover from the OOM condition?
> >
> > Yeah, I fixed that about an hour after posting the patches. See:
> >
> > http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
> >
>
> Hi Jens,
>
> I think above patch might not fix the issue of an oom_cfqq getting stuck
> with an io context. The reason being that once we allocate the cfqq, it
> will be cached in cic and once next request comes, we will retrieve it
> from cic and never call cfq_get_queue()/cfq_find_alloc_queue().
>
> I think we probably need to do cfqq == oom_cfqq check in cfq_set_request()
> also.

Yes good catch, this is needed too! Can you please send as a "real"
patch with signed-off-by added? Thanks!

--
Jens Axboe

2009-07-09 20:00:17

by Vivek Goyal

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Thu, Jul 09, 2009 at 07:38:23PM +0200, Jens Axboe wrote:
> On Thu, Jul 09 2009, Vivek Goyal wrote:
> > On Sat, Jun 27, 2009 at 08:26:17PM +0200, Jens Axboe wrote:
> > > On Fri, Jun 26 2009, Jeff Moyer wrote:
> > > > Jens Axboe <[email protected]> writes:
> > > >
> > > > > Setup an emergency fallback cfqq that we allocate at IO scheduler init
> > > > > time. If the slab allocation fails in cfq_find_alloc_queue(), we'll just
> > > > > punt IO to that cfqq instead. This ensures that cfq_find_alloc_queue()
> > > > > never fails without having to ensure free memory.
> > > > >
> > > > > Signed-off-by: Jens Axboe <[email protected]>
> > > > > ---
> > > > > block/cfq-iosched.c | 124 +++++++++++++++++++++++++++-----------------------
> > > > > 1 files changed, 67 insertions(+), 57 deletions(-)
> > > > >
> > > > > diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
> > > > > index c760ae7..91e7e0b 100644
> > > > > --- a/block/cfq-iosched.c
> > > > > +++ b/block/cfq-iosched.c
> > > > > + /*
> > > > > + * Fallback dummy cfqq for extreme OOM conditions
> > > > > + */
> > > > > + struct cfq_queue oom_cfqq;
> > > >
> > > > OK, so you're embedding a cfqq into the cfqd. That's 136 bytes, so I
> > > > guess that's not too bad.
> > > >
> > > > > + /*
> > > > > + * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
> > > > > + * Grab a permanent reference to it, so that the normal code flow
> > > > > + * will not attempt to free it.
> > > > > + */
> > > > > + cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
> > > > > + atomic_inc(&cfqd->oom_cfqq.ref);
> > > > > +
> > > >
> > > > I guess this is so we never try to free it, good. ;)
> > > >
> > > > One issue I have with this patch is that, if a task happens to run into
> > > > this condition, there is no way out. It will always have the oom_cfqq
> > > > as it's cfqq. Can't we fix that if we recover from the OOM condition?
> > >
> > > Yeah, I fixed that about an hour after posting the patches. See:
> > >
> > > http://git.kernel.dk/?p=linux-2.6-block.git;a=commit;h=0370bc158cb1d5faa4b8a38c0de3211f0fd5bd64
> > >
> >
> > Hi Jens,
> >
> > I think above patch might not fix the issue of an oom_cfqq getting stuck
> > with an io context. The reason being that once we allocate the cfqq, it
> > will be cached in cic and once next request comes, we will retrieve it
> > from cic and never call cfq_get_queue()/cfq_find_alloc_queue().
> >
> > I think we probably need to do cfqq == oom_cfqq check in cfq_set_request()
> > also.
>
> Yes good catch, this is needed too! Can you please send as a "real"
> patch with signed-off-by added? Thanks!

Sure. Here you go.

In case memory is scarce, we now default to oom_cfqq. Once memory is
available again, we should allocate a new cfqq and stop using oom_cfqq for
a particular io context.

Once a new request comes in, check if we are using oom_cfqq, and if yes,
try to allocate a new cfqq.

Tested the patch by forcing the use of oom_cfqq and upon next request thread
realized that it was using oom_cfqq and it allocated a new cfqq.

Signed-off-by: Vivek Goyal <[email protected]>
---
block/cfq-iosched.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

Index: linux5/block/cfq-iosched.c
===================================================================
--- linux5.orig/block/cfq-iosched.c 2009-07-04 13:58:48.000000000 -0400
+++ linux5/block/cfq-iosched.c 2009-07-09 15:56:59.000000000 -0400
@@ -2311,7 +2311,7 @@ cfq_set_request(struct request_queue *q,
goto queue_fail;

cfqq = cic_to_cfqq(cic, is_sync);
- if (!cfqq) {
+ if (!cfqq || cfqq == &cfqd->oom_cfqq) {
cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
cic_set_cfqq(cic, cfqq, is_sync);
}

2009-07-09 20:15:25

by Jens Axboe

[permalink] [raw]
Subject: Re: [PATCH 2/2] cfq-iosched: get rid of the need for __GFP_FAIL in cfq_find_alloc_queue()

On Thu, Jul 09 2009, Vivek Goyal wrote:
> Sure. Here you go.
>
> In case memory is scarce, we now default to oom_cfqq. Once memory is
> available again, we should allocate a new cfqq and stop using oom_cfqq for
> a particular io context.
>
> Once a new request comes in, check if we are using oom_cfqq, and if yes,
> try to allocate a new cfqq.
>
> Tested the patch by forcing the use of oom_cfqq and upon next request thread
> realized that it was using oom_cfqq and it allocated a new cfqq.

Thanks, applied!

--
Jens Axboe