2013-03-04 16:47:29

by Paul Taysom

[permalink] [raw]
Subject: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

Changed the dm-verity prefetching to use a worker thread to avoid
a deadlock in dm-bufio.

If generic_make_request is called recursively, it queues the I/O
request on the current->bio_list without making the I/O request
and returns. The routine making the recursive call cannot wait
for the I/O to complete.

The deadlock occurred when one thread grabbed the bufio_client
mutex and waited for an I/O to complete but the I/O was queued
on another thread’s current->bio_list and it was waiting to get
the mutex held by the first thread.

The fix allows only one I/O request from dm-verity to dm-bufio
per thread. To do this, the prefetch requests were queued on worker
threads.

In addition to avoiding the deadlock, this fix made a slight
improvement in performance.

seconds_kernel_to_login:
with prefetch: 8.43s
without prefetch: 9.2s
worker prefetch: 8.28s

Signed-off-by: Paul Taysom <[email protected]>
---
drivers/md/dm-verity.c | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 52cde98..7313498 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -93,6 +93,13 @@ struct dm_verity_io {
*/
};

+struct dm_verity_prefetch_work {
+ struct work_struct work;
+ struct dm_bufio_client *bufio;
+ sector_t block;
+ unsigned n_blocks;
+};
+
static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
{
return (struct shash_desc *)(io + 1);
@@ -419,6 +426,17 @@ static void verity_end_io(struct bio *bio, int error)
queue_work(io->v->verify_wq, &io->work);
}

+
+static void do_verity_prefetch_work(struct work_struct *work)
+{
+ struct dm_verity_prefetch_work *vw =
+ container_of(work, struct dm_verity_prefetch_work, work);
+
+ dm_bufio_prefetch(vw->bufio, vw->block, vw->n_blocks);
+
+ kfree(vw);
+}
+
/*
* Prefetch buffers for the specified io.
* The root buffer is not prefetched, it is assumed that it will be cached
@@ -427,6 +445,7 @@ static void verity_end_io(struct bio *bio, int error)
static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
{
int i;
+ struct dm_verity_prefetch_work *vw;

for (i = v->levels - 2; i >= 0; i--) {
sector_t hash_block_start;
@@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
hash_block_end = v->hash_blocks - 1;
}
no_prefetch_cluster:
- dm_bufio_prefetch(v->bufio, hash_block_start,
- hash_block_end - hash_block_start + 1);
+ vw = kmalloc(sizeof(*vw), GFP_KERNEL);
+ if (!vw) /* Just prefetching, ignore errors */
+ return;
+ vw->bufio = v->bufio;
+ vw->block = hash_block_start;
+ vw->n_blocks = hash_block_end - hash_block_start + 1;
+ INIT_WORK(&vw->work, do_verity_prefetch_work);
+ queue_work(v->verify_wq, &vw->work);
}
}

--
1.8.1.3


2013-03-04 17:19:29

by Paul Menzel

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

Am Montag, den 04.03.2013, 08:45 -0800 schrieb Paul Taysom:
> Changed the dm-verity prefetching to use a worker thread to avoid
> a deadlock in dm-bufio.

Do you know of any bug reports about this? Searching for »dm-verity dead
lock« I found [1], which seems to be the issue, right? If yes, please
reference it.

> If generic_make_request is called recursively, it queues the I/O
> request on the current->bio_list without making the I/O request
> and returns. The routine making the recursive call cannot wait
> for the I/O to complete.
>
> The deadlock occurred when one thread grabbed the bufio_client
> mutex and waited for an I/O to complete but the I/O was queued
> on another thread’s current->bio_list and it was waiting to get
> the mutex held by the first thread.
>
> The fix allows only one I/O request from dm-verity to dm-bufio
> per thread. To do this, the prefetch requests were queued on worker
> threads.
>
> In addition to avoiding the deadlock, this fix made a slight
> improvement in performance.
>
> seconds_kernel_to_login:
> with prefetch: 8.43s
> without prefetch: 9.2s
> worker prefetch: 8.28s
>
> Signed-off-by: Paul Taysom <[email protected]>
> ---
> drivers/md/dm-verity.c | 29 +++++++++++++++++++++++++++--
> 1 file changed, 27 insertions(+), 2 deletions(-)

Acked-by: Paul Menzel <[email protected]>


Thanks,

Paul


[1] https://code.google.com/p/chromium-os/issues/detail?id=39148


Attachments:
signature.asc (198.00 B)
This is a digitally signed message part

2013-03-04 17:43:41

by Alasdair G Kergon

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
> hash_block_end = v->hash_blocks - 1;
> }
> no_prefetch_cluster:
> - dm_bufio_prefetch(v->bufio, hash_block_start,
> - hash_block_end - hash_block_start + 1);
> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);

kmalloc? mempool? ...

Alasdair

2013-03-04 21:21:34

by Paul Taysom

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

On Mon, Mar 4, 2013 at 9:19 AM, Paul Menzel <[email protected]> wrote:
>
> Am Montag, den 04.03.2013, 08:45 -0800 schrieb Paul Taysom:
> > Changed the dm-verity prefetching to use a worker thread to avoid
> > a deadlock in dm-bufio.
>
> Do you know of any bug reports about this? Searching for ?dm-verity dead
> lock? I found [1], which seems to be the issue, right? If yes, please
> reference it.
>
> > If generic_make_request is called recursively, it queues the I/O
> > request on the current->bio_list without making the I/O request
> > and returns. The routine making the recursive call cannot wait
> > for the I/O to complete.
> >
> > The deadlock occurred when one thread grabbed the bufio_client
> > mutex and waited for an I/O to complete but the I/O was queued
> > on another thread?s current->bio_list and it was waiting to get
> > the mutex held by the first thread.
> >
> > The fix allows only one I/O request from dm-verity to dm-bufio
> > per thread. To do this, the prefetch requests were queued on worker
> > threads.
> >
> > In addition to avoiding the deadlock, this fix made a slight
> > improvement in performance.
> >
> > seconds_kernel_to_login:
> > with prefetch: 8.43s
> > without prefetch: 9.2s
> > worker prefetch: 8.28s
> >
> > Signed-off-by: Paul Taysom <[email protected]>
> > ---
> > drivers/md/dm-verity.c | 29 +++++++++++++++++++++++++++--
> > 1 file changed, 27 insertions(+), 2 deletions(-)
>
> Acked-by: Paul Menzel <[email protected]>
>
>
> Thanks,
>
> Paul
>
>
> [1] https://code.google.com/p/chromium-os/issues/detail?id=39148

Yes, I should have included that this fix addresses http://crosbug.com/39148
Thanks,
Paul

2013-03-04 21:24:13

by Paul Taysom

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

On Mon, Mar 4, 2013 at 9:42 AM, Alasdair G Kergon <[email protected]> wrote:
> On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
>> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>> hash_block_end = v->hash_blocks - 1;
>> }
>> no_prefetch_cluster:
>> - dm_bufio_prefetch(v->bufio, hash_block_start,
>> - hash_block_end - hash_block_start + 1);
>> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);
>
> kmalloc? mempool? ...
>
> Alasdair
>
The use of mempool would be a separate patch that would have to be
measured for performance impact.
-Paul

2013-03-04 22:16:33

by Mikulas Patocka

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio



On Mon, 4 Mar 2013, Paul Taysom wrote:

> On Mon, Mar 4, 2013 at 9:42 AM, Alasdair G Kergon <[email protected]> wrote:
> > On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
> >> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
> >> hash_block_end = v->hash_blocks - 1;
> >> }
> >> no_prefetch_cluster:
> >> - dm_bufio_prefetch(v->bufio, hash_block_start,
> >> - hash_block_end - hash_block_start + 1);
> >> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);
> >
> > kmalloc? mempool? ...
> >
> > Alasdair
> >
> The use of mempool would be a separate patch that would have to be
> measured for performance impact.
> -Paul
>

You don't have to use mempool. Just avoid prefetching if there is not
enough memory for the prefetch structure.

I reworked the patch, is uses an allocation that can fail and it generates
just one workqueue entry for one request (the original patch generated one
workqueue entry for hash tree level).

Please test the patch and if it works and performs well, let's submit it.

Mikulas

---

Changed the dm-verity prefetching to use a worker thread to avoid
a deadlock in dm-bufio.

If generic_make_request is called recursively, it queues the I/O
request on the current->bio_list without making the I/O request
and returns. The routine making the recursive call cannot wait
for the I/O to complete.

The deadlock occurred when one thread grabbed the bufio_client
mutex and waited for an I/O to complete but the I/O was queued
on another thread's current->bio_list and it was waiting to get
the mutex held by the first thread.

The fix allows only one I/O request from dm-verity to dm-bufio
per thread. To do this, the prefetch requests were queued on worker
threads.

In addition to avoiding the deadlock, this fix made a slight
improvement in performance.

seconds_kernel_to_login:
with prefetch: 8.43s
without prefetch: 9.2s
worker prefetch: 8.28s

Signed-off-by: Paul Taysom <[email protected]>
Signed-off-by: Mikulas Patocka <[email protected]>
Cc: [email protected]
---
drivers/md/dm-bufio.c | 2 ++
drivers/md/dm-verity.c | 37 +++++++++++++++++++++++++++++++++----
2 files changed, 35 insertions(+), 4 deletions(-)

Index: linux-3.8-fast/drivers/md/dm-verity.c
===================================================================
--- linux-3.8-fast.orig/drivers/md/dm-verity.c 2013-03-04 22:49:20.000000000 +0100
+++ linux-3.8-fast/drivers/md/dm-verity.c 2013-03-04 23:10:45.000000000 +0100
@@ -93,6 +93,13 @@ struct dm_verity_io {
*/
};

+struct dm_verity_prefetch_work {
+ struct work_struct work;
+ struct dm_verity *v;
+ sector_t block;
+ unsigned n_blocks;
+};
+
static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
{
return (struct shash_desc *)(io + 1);
@@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bi
* The root buffer is not prefetched, it is assumed that it will be cached
* all the time.
*/
-static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
+static void verity_prefetch_io(struct work_struct *work)
{
+ struct dm_verity_prefetch_work *pw =
+ container_of(work, struct dm_verity_prefetch_work, work);
+ struct dm_verity *v = pw->v;
int i;

for (i = v->levels - 2; i >= 0; i--) {
sector_t hash_block_start;
sector_t hash_block_end;
- verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
- verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
+ verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
+ verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
if (!i) {
unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);

@@ -452,6 +462,25 @@ no_prefetch_cluster:
dm_bufio_prefetch(v->bufio, hash_block_start,
hash_block_end - hash_block_start + 1);
}
+
+ kfree(pw);
+}
+
+static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
+{
+ struct dm_verity_prefetch_work *pw;
+
+ pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
+ GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+
+ if (!pw)
+ return;
+
+ INIT_WORK(&pw->work, verity_prefetch_io);
+ pw->v = v;
+ pw->block = io->block;
+ pw->n_blocks = io->n_blocks;
+ queue_work(v->verify_wq, &pw->work);
}

/*
@@ -498,7 +527,7 @@ static int verity_map(struct dm_target *
memcpy(io->io_vec, bio_iovec(bio),
io->io_vec_size * sizeof(struct bio_vec));

- verity_prefetch_io(v, io);
+ verity_submit_prefetch(v, io);

generic_make_request(bio);

Index: linux-3.8-fast/drivers/md/dm-bufio.c
===================================================================
--- linux-3.8-fast.orig/drivers/md/dm-bufio.c 2013-03-04 23:03:14.000000000 +0100
+++ linux-3.8-fast/drivers/md/dm-bufio.c 2013-03-04 23:04:19.000000000 +0100
@@ -1026,6 +1026,8 @@ void dm_bufio_prefetch(struct dm_bufio_c
{
struct blk_plug plug;

+ BUG_ON(dm_bufio_in_request());
+
blk_start_plug(&plug);
dm_bufio_lock(c);

2013-03-05 23:30:33

by Paul Taysom

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

seconds_kernel_to_login: 8.17s
Passed all my other tests

On Mon, Mar 4, 2013 at 2:15 PM, Mikulas Patocka <[email protected]> wrote:
>
>
> On Mon, 4 Mar 2013, Paul Taysom wrote:
>
>> On Mon, Mar 4, 2013 at 9:42 AM, Alasdair G Kergon <[email protected]> wrote:
>> > On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
>> >> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>> >> hash_block_end = v->hash_blocks - 1;
>> >> }
>> >> no_prefetch_cluster:
>> >> - dm_bufio_prefetch(v->bufio, hash_block_start,
>> >> - hash_block_end - hash_block_start + 1);
>> >> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);
>> >
>> > kmalloc? mempool? ...
>> >
>> > Alasdair
>> >
>> The use of mempool would be a separate patch that would have to be
>> measured for performance impact.
>> -Paul
>>
>
> You don't have to use mempool. Just avoid prefetching if there is not
> enough memory for the prefetch structure.
>
> I reworked the patch, is uses an allocation that can fail and it generates
> just one workqueue entry for one request (the original patch generated one
> workqueue entry for hash tree level).
>
> Please test the patch and if it works and performs well, let's submit it.
>
> Mikulas
>
> ---
>
> Changed the dm-verity prefetching to use a worker thread to avoid
> a deadlock in dm-bufio.
>
> If generic_make_request is called recursively, it queues the I/O
> request on the current->bio_list without making the I/O request
> and returns. The routine making the recursive call cannot wait
> for the I/O to complete.
>
> The deadlock occurred when one thread grabbed the bufio_client
> mutex and waited for an I/O to complete but the I/O was queued
> on another thread's current->bio_list and it was waiting to get
> the mutex held by the first thread.
>
> The fix allows only one I/O request from dm-verity to dm-bufio
> per thread. To do this, the prefetch requests were queued on worker
> threads.
>
> In addition to avoiding the deadlock, this fix made a slight
> improvement in performance.
>
> seconds_kernel_to_login:
> with prefetch: 8.43s
> without prefetch: 9.2s
> worker prefetch: 8.28s
>
> Signed-off-by: Paul Taysom <[email protected]>
> Signed-off-by: Mikulas Patocka <[email protected]>
> Cc: [email protected]
> ---
> drivers/md/dm-bufio.c | 2 ++
> drivers/md/dm-verity.c | 37 +++++++++++++++++++++++++++++++++----
> 2 files changed, 35 insertions(+), 4 deletions(-)
>
> Index: linux-3.8-fast/drivers/md/dm-verity.c
> ===================================================================
> --- linux-3.8-fast.orig/drivers/md/dm-verity.c 2013-03-04 22:49:20.000000000 +0100
> +++ linux-3.8-fast/drivers/md/dm-verity.c 2013-03-04 23:10:45.000000000 +0100
> @@ -93,6 +93,13 @@ struct dm_verity_io {
> */
> };
>
> +struct dm_verity_prefetch_work {
> + struct work_struct work;
> + struct dm_verity *v;
> + sector_t block;
> + unsigned n_blocks;
> +};
> +
> static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
> {
> return (struct shash_desc *)(io + 1);
> @@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bi
> * The root buffer is not prefetched, it is assumed that it will be cached
> * all the time.
> */
> -static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
> +static void verity_prefetch_io(struct work_struct *work)
> {
> + struct dm_verity_prefetch_work *pw =
> + container_of(work, struct dm_verity_prefetch_work, work);
> + struct dm_verity *v = pw->v;
> int i;
>
> for (i = v->levels - 2; i >= 0; i--) {
> sector_t hash_block_start;
> sector_t hash_block_end;
> - verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
> - verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
> + verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
> + verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
> if (!i) {
> unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
>
> @@ -452,6 +462,25 @@ no_prefetch_cluster:
> dm_bufio_prefetch(v->bufio, hash_block_start,
> hash_block_end - hash_block_start + 1);
> }
> +
> + kfree(pw);
> +}
> +
> +static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
> +{
> + struct dm_verity_prefetch_work *pw;
> +
> + pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
> + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
> +
> + if (!pw)
> + return;
> +
> + INIT_WORK(&pw->work, verity_prefetch_io);
> + pw->v = v;
> + pw->block = io->block;
> + pw->n_blocks = io->n_blocks;
> + queue_work(v->verify_wq, &pw->work);
> }
>
> /*
> @@ -498,7 +527,7 @@ static int verity_map(struct dm_target *
> memcpy(io->io_vec, bio_iovec(bio),
> io->io_vec_size * sizeof(struct bio_vec));
>
> - verity_prefetch_io(v, io);
> + verity_submit_prefetch(v, io);
>
> generic_make_request(bio);
>
> Index: linux-3.8-fast/drivers/md/dm-bufio.c
> ===================================================================
> --- linux-3.8-fast.orig/drivers/md/dm-bufio.c 2013-03-04 23:03:14.000000000 +0100
> +++ linux-3.8-fast/drivers/md/dm-bufio.c 2013-03-04 23:04:19.000000000 +0100
> @@ -1026,6 +1026,8 @@ void dm_bufio_prefetch(struct dm_bufio_c
> {
> struct blk_plug plug;
>
> + BUG_ON(dm_bufio_in_request());
> +
> blk_start_plug(&plug);
> dm_bufio_lock(c);
>

2013-03-12 14:46:42

by Paul Taysom

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

Looks good to me.

On Tue, Mar 5, 2013 at 3:30 PM, Paul Taysom <[email protected]> wrote:
> seconds_kernel_to_login: 8.17s
> Passed all my other tests
>
> On Mon, Mar 4, 2013 at 2:15 PM, Mikulas Patocka <[email protected]> wrote:
>>
>>
>> On Mon, 4 Mar 2013, Paul Taysom wrote:
>>
>>> On Mon, Mar 4, 2013 at 9:42 AM, Alasdair G Kergon <[email protected]> wrote:
>>> > On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
>>> >> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>>> >> hash_block_end = v->hash_blocks - 1;
>>> >> }
>>> >> no_prefetch_cluster:
>>> >> - dm_bufio_prefetch(v->bufio, hash_block_start,
>>> >> - hash_block_end - hash_block_start + 1);
>>> >> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);
>>> >
>>> > kmalloc? mempool? ...
>>> >
>>> > Alasdair
>>> >
>>> The use of mempool would be a separate patch that would have to be
>>> measured for performance impact.
>>> -Paul
>>>
>>
>> You don't have to use mempool. Just avoid prefetching if there is not
>> enough memory for the prefetch structure.
>>
>> I reworked the patch, is uses an allocation that can fail and it generates
>> just one workqueue entry for one request (the original patch generated one
>> workqueue entry for hash tree level).
>>
>> Please test the patch and if it works and performs well, let's submit it.
>>
>> Mikulas
>>
>> ---
>>
>> Changed the dm-verity prefetching to use a worker thread to avoid
>> a deadlock in dm-bufio.
>>
>> If generic_make_request is called recursively, it queues the I/O
>> request on the current->bio_list without making the I/O request
>> and returns. The routine making the recursive call cannot wait
>> for the I/O to complete.
>>
>> The deadlock occurred when one thread grabbed the bufio_client
>> mutex and waited for an I/O to complete but the I/O was queued
>> on another thread's current->bio_list and it was waiting to get
>> the mutex held by the first thread.
>>
>> The fix allows only one I/O request from dm-verity to dm-bufio
>> per thread. To do this, the prefetch requests were queued on worker
>> threads.
>>
>> In addition to avoiding the deadlock, this fix made a slight
>> improvement in performance.
>>
>> seconds_kernel_to_login:
>> with prefetch: 8.43s
>> without prefetch: 9.2s
>> worker prefetch: 8.28s
>>
>> Signed-off-by: Paul Taysom <[email protected]>
>> Signed-off-by: Mikulas Patocka <[email protected]>
>> Cc: [email protected]
>> ---
>> drivers/md/dm-bufio.c | 2 ++
>> drivers/md/dm-verity.c | 37 +++++++++++++++++++++++++++++++++----
>> 2 files changed, 35 insertions(+), 4 deletions(-)
>>
>> Index: linux-3.8-fast/drivers/md/dm-verity.c
>> ===================================================================
>> --- linux-3.8-fast.orig/drivers/md/dm-verity.c 2013-03-04 22:49:20.000000000 +0100
>> +++ linux-3.8-fast/drivers/md/dm-verity.c 2013-03-04 23:10:45.000000000 +0100
>> @@ -93,6 +93,13 @@ struct dm_verity_io {
>> */
>> };
>>
>> +struct dm_verity_prefetch_work {
>> + struct work_struct work;
>> + struct dm_verity *v;
>> + sector_t block;
>> + unsigned n_blocks;
>> +};
>> +
>> static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
>> {
>> return (struct shash_desc *)(io + 1);
>> @@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bi
>> * The root buffer is not prefetched, it is assumed that it will be cached
>> * all the time.
>> */
>> -static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>> +static void verity_prefetch_io(struct work_struct *work)
>> {
>> + struct dm_verity_prefetch_work *pw =
>> + container_of(work, struct dm_verity_prefetch_work, work);
>> + struct dm_verity *v = pw->v;
>> int i;
>>
>> for (i = v->levels - 2; i >= 0; i--) {
>> sector_t hash_block_start;
>> sector_t hash_block_end;
>> - verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
>> - verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
>> + verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
>> + verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
>> if (!i) {
>> unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
>>
>> @@ -452,6 +462,25 @@ no_prefetch_cluster:
>> dm_bufio_prefetch(v->bufio, hash_block_start,
>> hash_block_end - hash_block_start + 1);
>> }
>> +
>> + kfree(pw);
>> +}
>> +
>> +static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
>> +{
>> + struct dm_verity_prefetch_work *pw;
>> +
>> + pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
>> + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
>> +
>> + if (!pw)
>> + return;
>> +
>> + INIT_WORK(&pw->work, verity_prefetch_io);
>> + pw->v = v;
>> + pw->block = io->block;
>> + pw->n_blocks = io->n_blocks;
>> + queue_work(v->verify_wq, &pw->work);
>> }
>>
>> /*
>> @@ -498,7 +527,7 @@ static int verity_map(struct dm_target *
>> memcpy(io->io_vec, bio_iovec(bio),
>> io->io_vec_size * sizeof(struct bio_vec));
>>
>> - verity_prefetch_io(v, io);
>> + verity_submit_prefetch(v, io);
>>
>> generic_make_request(bio);
>>
>> Index: linux-3.8-fast/drivers/md/dm-bufio.c
>> ===================================================================
>> --- linux-3.8-fast.orig/drivers/md/dm-bufio.c 2013-03-04 23:03:14.000000000 +0100
>> +++ linux-3.8-fast/drivers/md/dm-bufio.c 2013-03-04 23:04:19.000000000 +0100
>> @@ -1026,6 +1026,8 @@ void dm_bufio_prefetch(struct dm_bufio_c
>> {
>> struct blk_plug plug;
>>
>> + BUG_ON(dm_bufio_in_request());
>> +
>> blk_start_plug(&plug);
>> dm_bufio_lock(c);
>>

2013-03-13 20:18:10

by Paul Taysom

[permalink] [raw]
Subject: Re: [PATCH] md: dm-verity: Fix to avoid a deadlock in dm-bufio

On Tue, Mar 12, 2013 at 7:46 AM, Paul Taysom <[email protected]> wrote:
> Looks good to me.
>
> On Tue, Mar 5, 2013 at 3:30 PM, Paul Taysom <[email protected]> wrote:
>> seconds_kernel_to_login: 8.17s
>> Passed all my other tests
>>
>> On Mon, Mar 4, 2013 at 2:15 PM, Mikulas Patocka <[email protected]> wrote:
>>>
>>>
>>> On Mon, 4 Mar 2013, Paul Taysom wrote:
>>>
>>>> On Mon, Mar 4, 2013 at 9:42 AM, Alasdair G Kergon <[email protected]> wrote:
>>>> > On Mon, Mar 04, 2013 at 08:45:48AM -0800, Paul Taysom wrote:
>>>> >> @@ -449,8 +468,14 @@ static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>>>> >> hash_block_end = v->hash_blocks - 1;
>>>> >> }
>>>> >> no_prefetch_cluster:
>>>> >> - dm_bufio_prefetch(v->bufio, hash_block_start,
>>>> >> - hash_block_end - hash_block_start + 1);
>>>> >> + vw = kmalloc(sizeof(*vw), GFP_KERNEL);
>>>> >
>>>> > kmalloc? mempool? ...
>>>> >
>>>> > Alasdair
>>>> >
>>>> The use of mempool would be a separate patch that would have to be
>>>> measured for performance impact.
>>>> -Paul
>>>>
>>>
>>> You don't have to use mempool. Just avoid prefetching if there is not
>>> enough memory for the prefetch structure.
>>>
>>> I reworked the patch, is uses an allocation that can fail and it generates
>>> just one workqueue entry for one request (the original patch generated one
>>> workqueue entry for hash tree level).
>>>
>>> Please test the patch and if it works and performs well, let's submit it.
>>>
>>> Mikulas
>>>
>>> ---
>>>
>>> Changed the dm-verity prefetching to use a worker thread to avoid
>>> a deadlock in dm-bufio.
>>>
>>> If generic_make_request is called recursively, it queues the I/O
>>> request on the current->bio_list without making the I/O request
>>> and returns. The routine making the recursive call cannot wait
>>> for the I/O to complete.
>>>
>>> The deadlock occurred when one thread grabbed the bufio_client
>>> mutex and waited for an I/O to complete but the I/O was queued
>>> on another thread's current->bio_list and it was waiting to get
>>> the mutex held by the first thread.
>>>
>>> The fix allows only one I/O request from dm-verity to dm-bufio
>>> per thread. To do this, the prefetch requests were queued on worker
>>> threads.
>>>
>>> In addition to avoiding the deadlock, this fix made a slight
>>> improvement in performance.
>>>
>>> seconds_kernel_to_login:
>>> with prefetch: 8.43s
>>> without prefetch: 9.2s
>>> worker prefetch: 8.28s
>>>
>>> Signed-off-by: Paul Taysom <[email protected]>
>>> Signed-off-by: Mikulas Patocka <[email protected]>
>>> Cc: [email protected]
>>> ---
>>> drivers/md/dm-bufio.c | 2 ++
>>> drivers/md/dm-verity.c | 37 +++++++++++++++++++++++++++++++++----
>>> 2 files changed, 35 insertions(+), 4 deletions(-)
>>>
>>> Index: linux-3.8-fast/drivers/md/dm-verity.c
>>> ===================================================================
>>> --- linux-3.8-fast.orig/drivers/md/dm-verity.c 2013-03-04 22:49:20.000000000 +0100
>>> +++ linux-3.8-fast/drivers/md/dm-verity.c 2013-03-04 23:10:45.000000000 +0100
>>> @@ -93,6 +93,13 @@ struct dm_verity_io {
>>> */
>>> };
>>>
>>> +struct dm_verity_prefetch_work {
>>> + struct work_struct work;
>>> + struct dm_verity *v;
>>> + sector_t block;
>>> + unsigned n_blocks;
>>> +};
>>> +
>>> static struct shash_desc *io_hash_desc(struct dm_verity *v, struct dm_verity_io *io)
>>> {
>>> return (struct shash_desc *)(io + 1);
>>> @@ -424,15 +431,18 @@ static void verity_end_io(struct bio *bi
>>> * The root buffer is not prefetched, it is assumed that it will be cached
>>> * all the time.
>>> */
>>> -static void verity_prefetch_io(struct dm_verity *v, struct dm_verity_io *io)
>>> +static void verity_prefetch_io(struct work_struct *work)
>>> {
>>> + struct dm_verity_prefetch_work *pw =
>>> + container_of(work, struct dm_verity_prefetch_work, work);
>>> + struct dm_verity *v = pw->v;
>>> int i;
>>>
>>> for (i = v->levels - 2; i >= 0; i--) {
>>> sector_t hash_block_start;
>>> sector_t hash_block_end;
>>> - verity_hash_at_level(v, io->block, i, &hash_block_start, NULL);
>>> - verity_hash_at_level(v, io->block + io->n_blocks - 1, i, &hash_block_end, NULL);
>>> + verity_hash_at_level(v, pw->block, i, &hash_block_start, NULL);
>>> + verity_hash_at_level(v, pw->block + pw->n_blocks - 1, i, &hash_block_end, NULL);
>>> if (!i) {
>>> unsigned cluster = ACCESS_ONCE(dm_verity_prefetch_cluster);
>>>
>>> @@ -452,6 +462,25 @@ no_prefetch_cluster:
>>> dm_bufio_prefetch(v->bufio, hash_block_start,
>>> hash_block_end - hash_block_start + 1);
>>> }
>>> +
>>> + kfree(pw);
>>> +}
>>> +
>>> +static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
>>> +{
>>> + struct dm_verity_prefetch_work *pw;
>>> +
>>> + pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
>>> + GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
>>> +
>>> + if (!pw)
>>> + return;
>>> +
>>> + INIT_WORK(&pw->work, verity_prefetch_io);
>>> + pw->v = v;
>>> + pw->block = io->block;
>>> + pw->n_blocks = io->n_blocks;
>>> + queue_work(v->verify_wq, &pw->work);
>>> }
>>>
>>> /*
>>> @@ -498,7 +527,7 @@ static int verity_map(struct dm_target *
>>> memcpy(io->io_vec, bio_iovec(bio),
>>> io->io_vec_size * sizeof(struct bio_vec));
>>>
>>> - verity_prefetch_io(v, io);
>>> + verity_submit_prefetch(v, io);
>>>
>>> generic_make_request(bio);
>>>
>>> Index: linux-3.8-fast/drivers/md/dm-bufio.c
>>> ===================================================================
>>> --- linux-3.8-fast.orig/drivers/md/dm-bufio.c 2013-03-04 23:03:14.000000000 +0100
>>> +++ linux-3.8-fast/drivers/md/dm-bufio.c 2013-03-04 23:04:19.000000000 +0100
>>> @@ -1026,6 +1026,8 @@ void dm_bufio_prefetch(struct dm_bufio_c
>>> {
>>> struct blk_plug plug;
>>>
>>> + BUG_ON(dm_bufio_in_request());
>>> +
>>> blk_start_plug(&plug);
>>> dm_bufio_lock(c);
>>>

Signed-off-by: Paul Taysom <[email protected]>