2010-07-01 10:51:38

by FUJITA Tomonori

[permalink] [raw]
Subject:

This patchset fixes page leak issue in discard commands with unprep
facility that James posted:

http://marc.info/?l=linux-scsi&m=127791727508214&w=2

The 1/3 patch adds unprep facility to the block layer (identical to
what James posted).

The 2/3 patch frees a page for discard commands by using the unprep
facility. James' original patch doesn't work since it accesses to
rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
called when all the data buffer (req->bio and scsi_data_buffer) in the
request is freed.

I use rq->buffer to keep track of an allocated page as the block layer
sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
it lots. Any other way to do that?

The 3/3 path just removes the dead code.

This is against Jens' for-2.6.36.

The git tree is also available:

git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git unprep

I'll update the discard FS request conversion on the top of this soon. But this can be applied independently (and fixes the memory leak).

=
block/blk-core.c | 25 +++++++++++++++++++++++++
block/blk-settings.c | 17 +++++++++++++++++
drivers/scsi/scsi_lib.c | 2 +-
drivers/scsi/sd.c | 25 +++++++++++++++----------
include/linux/blkdev.h | 4 ++++
5 files changed, 62 insertions(+), 11 deletions(-)



2010-07-01 10:50:59

by FUJITA Tomonori

[permalink] [raw]
Subject: [PATCH 2/3] scsi: add sd_unprep_fn to free discard page

This fixes discard page leak by using q->unprep_rq_fn facility.

q->unprep_rq_fn is called when all the data buffer (req->bio and
scsi_data_buffer) in the request is freed.

sd_unprep() uses rq->buffer to free discard page allocated in
sd_prepare_discard().

Signed-off-by: FUJITA Tomonori <[email protected]>
---
drivers/scsi/sd.c | 12 +++++++++++-
1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 86da819..2d4e3a8 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -425,6 +425,7 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
sector_t sector = bio->bi_sector;
unsigned int nr_sectors = bio_sectors(bio);
unsigned int len;
+ int ret;
struct page *page;

if (sdkp->device->sector_size == 4096) {
@@ -465,7 +466,15 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
}

blk_add_request_payload(rq, page, len);
- return scsi_setup_blk_pc_cmnd(sdp, rq);
+ ret = scsi_setup_blk_pc_cmnd(sdp, rq);
+ rq->buffer = page_address(page);
+ return ret;
+}
+
+static void sd_unprep_fn(struct request_queue *q, struct request *rq)
+{
+ if (rq->cmd_flags & REQ_DISCARD)
+ __free_page(virt_to_page(rq->buffer));
}

/**
@@ -2242,6 +2251,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
sd_revalidate_disk(gd);

blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
+ blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);

gd->driverfs_dev = &sdp->sdev_gendev;
gd->flags = GENHD_FL_EXT_DEVT;
--
1.6.5

2010-07-01 10:51:22

by FUJITA Tomonori

[permalink] [raw]
Subject: [PATCH 1/3] block: implement an unprep function corresponding directly to prep

From: James Bottomley <[email protected]>

Reviewed-by: FUJITA Tomonori <[email protected]>
---
block/blk-core.c | 25 +++++++++++++++++++++++++
block/blk-settings.c | 17 +++++++++++++++++
drivers/scsi/scsi_lib.c | 2 +-
include/linux/blkdev.h | 4 ++++
4 files changed, 47 insertions(+), 1 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 3c37894..5ab3ac2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -608,6 +608,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,

q->request_fn = rfn;
q->prep_rq_fn = NULL;
+ q->unprep_rq_fn = NULL;
q->unplug_fn = generic_unplug_device;
q->queue_flags = QUEUE_FLAG_DEFAULT;
q->queue_lock = lock;
@@ -2133,6 +2134,26 @@ static bool blk_update_bidi_request(struct request *rq, int error,
return false;
}

+/**
+ * blk_unprep_request - unprepare a request
+ * @req: the request
+ *
+ * This function makes a request ready for complete resubmission (or
+ * completion). It happens only after all error handling is complete,
+ * so represents the appropriate moment to deallocate any resources
+ * that were allocated to the request in the prep_rq_fn. The queue
+ * lock is held when calling this.
+ */
+void blk_unprep_request(struct request *req)
+{
+ struct request_queue *q = req->q;
+
+ req->cmd_flags &= ~REQ_DONTPREP;
+ if (q->unprep_rq_fn)
+ q->unprep_rq_fn(q, req);
+}
+EXPORT_SYMBOL_GPL(blk_unprep_request);
+
/*
* queue lock must be held
*/
@@ -2148,6 +2169,10 @@ static void blk_finish_request(struct request *req, int error)

blk_delete_timer(req);

+ if (req->cmd_flags & REQ_DONTPREP)
+ blk_unprep_request(req);
+
+
blk_account_io_done(req);

if (req->end_io)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index f5ed5a1..a234f4b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -37,6 +37,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
EXPORT_SYMBOL(blk_queue_prep_rq);

/**
+ * blk_queue_unprep_rq - set an unprepare_request function for queue
+ * @q: queue
+ * @ufn: unprepare_request function
+ *
+ * It's possible for a queue to register an unprepare_request callback
+ * which is invoked before the request is finally completed. The goal
+ * of the function is to deallocate any data that was allocated in the
+ * prepare_request callback.
+ *
+ */
+void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
+{
+ q->unprep_rq_fn = ufn;
+}
+EXPORT_SYMBOL(blk_queue_unprep_rq);
+
+/**
* blk_queue_merge_bvec - set a merge_bvec function for queue
* @q: queue
* @mbfn: merge_bvec_fn
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5f11608..ee83619 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -85,7 +85,7 @@ static void scsi_unprep_request(struct request *req)
{
struct scsi_cmnd *cmd = req->special;

- req->cmd_flags &= ~REQ_DONTPREP;
+ blk_unprep_request(req);
req->special = NULL;

scsi_put_command(cmd);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 204fbe2..6bba04c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -200,6 +200,7 @@ struct request_pm_state
typedef void (request_fn_proc) (struct request_queue *q);
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
typedef void (unplug_fn) (struct request_queue *);

struct bio_vec;
@@ -282,6 +283,7 @@ struct request_queue
request_fn_proc *request_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
+ unprep_rq_fn *unprep_rq_fn;
unplug_fn *unplug_fn;
merge_bvec_fn *merge_bvec_fn;
prepare_flush_fn *prepare_flush_fn;
@@ -841,6 +843,7 @@ extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
extern void blk_abort_queue(struct request_queue *);
+extern void blk_unprep_request(struct request *);

/*
* Access functions for manipulating queue properties
@@ -885,6 +888,7 @@ extern int blk_queue_dma_drain(struct request_queue *q,
extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
+extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
--
1.6.5

2010-07-01 10:51:37

by FUJITA Tomonori

[permalink] [raw]
Subject: [PATCH 3/3] scsi: remove unused free discard page in sd_done

- sd_done isn't called for pc request so we never call the code.
- we use sd_unprep to free discard page now.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
drivers/scsi/sd.c | 9 ---------
1 files changed, 0 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2d4e3a8..aa6b48b 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1179,15 +1179,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
int sense_valid = 0;
int sense_deferred = 0;

- /*
- * If this is a discard request that originated from the kernel
- * we need to free our payload here. Note that we need to check
- * the request flag as the normal payload rules apply for
- * pass-through UNMAP / WRITE SAME requests.
- */
- if (SCpnt->request->cmd_flags & REQ_DISCARD)
- __free_page(bio_page(SCpnt->request->bio));
-
if (result) {
sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
if (sense_valid)
--
1.6.5

2010-07-01 12:29:51

by Jens Axboe

[permalink] [raw]
Subject: Re:

On 2010-07-01 12:49, FUJITA Tomonori wrote:
> This patchset fixes page leak issue in discard commands with unprep
> facility that James posted:
>
> http://marc.info/?l=linux-scsi&m=127791727508214&w=2
>
> The 1/3 patch adds unprep facility to the block layer (identical to
> what James posted).
>
> The 2/3 patch frees a page for discard commands by using the unprep
> facility. James' original patch doesn't work since it accesses to
> rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
> called when all the data buffer (req->bio and scsi_data_buffer) in the
> request is freed.
>
> I use rq->buffer to keep track of an allocated page as the block layer
> sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
> use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
> it lots. Any other way to do that?
>
> The 3/3 path just removes the dead code.

I've queued up these three for 2.6.36.

--
Jens Axboe

2010-07-01 13:03:47

by Mike Snitzer

[permalink] [raw]
Subject: [PATCH] scsi: address leak in the error path of discard page allocation

On Thu, Jul 01 2010 at 6:49am -0400,
FUJITA Tomonori <[email protected]> wrote:

> This fixes discard page leak by using q->unprep_rq_fn facility.
>
> q->unprep_rq_fn is called when all the data buffer (req->bio and
> scsi_data_buffer) in the request is freed.
>
> sd_unprep() uses rq->buffer to free discard page allocated in
> sd_prepare_discard().
>
> Signed-off-by: FUJITA Tomonori <[email protected]>

Thanks for sorting this out Tomo, all 3 patches work great!

BTW, there is one remaining (rare) leak in the allocation path.

The following patch serves to fix it but I'm not sure if there is a more
elegant way to address this.

An alternative would be to check if the page is already allocated
(before allocating the page in scsi_setup_discard_cmnd)?

Please advise, thanks.
Mike



From: Mike Snitzer <[email protected]>
Subject: scsi: address leak in the error path of discard page allocation

Be sure to free the discard page if scsi_setup_blk_pc_cmnd fails.
E.g. Returning BLKPREP_DEFER from scsi_setup_blk_pc_cmnd will not cause
the request to be processed by sd_unprep_fn before the request is
retried (preparation included).

Signed-off-by: Mike Snitzer <[email protected]>

---
block/blk-core.c | 23 +++++++++++++++++++++++
drivers/scsi/sd.c | 6 +++++-
include/linux/blkdev.h | 1 +
3 files changed, 29 insertions(+), 1 deletion(-)

Index: linux-2.6/drivers/scsi/sd.c
===================================================================
--- linux-2.6.orig/drivers/scsi/sd.c
+++ linux-2.6/drivers/scsi/sd.c
@@ -466,7 +466,11 @@ static int scsi_setup_discard_cmnd(struc

blk_add_request_payload(rq, page, len);
ret = scsi_setup_blk_pc_cmnd(sdp, rq);
- rq->buffer = page_address(page);
+ if (ret != BLKPREP_OK) {
+ blk_clear_request_payload(rq);
+ __free_page(page);
+ } else
+ rq->buffer = page_address(page);
return ret;
}

Index: linux-2.6/block/blk-core.c
===================================================================
--- linux-2.6.orig/block/blk-core.c
+++ linux-2.6/block/blk-core.c
@@ -1164,6 +1164,29 @@ void blk_add_request_payload(struct requ
}
EXPORT_SYMBOL_GPL(blk_add_request_payload);

+/**
+ * blk_clear_request_payload - clear a request's payload
+ * @rq: request to update
+ *
+ * The driver needs to take care of freeing the payload itself.
+ */
+void blk_clear_request_payload(struct request *rq)
+{
+ struct bio *bio = rq->bio;
+
+ rq->__data_len = rq->resid_len = 0;
+ rq->nr_phys_segments = 0;
+ rq->buffer = NULL;
+
+ bio->bi_size = 0;
+ bio->bi_vcnt = 0;
+ bio->bi_phys_segments = 0;
+
+ bio->bi_io_vec->bv_page = NULL;
+ bio->bi_io_vec->bv_len = 0;
+}
+EXPORT_SYMBOL_GPL(blk_clear_request_payload);
+
void init_request_from_bio(struct request *req, struct bio *bio)
{
req->cpu = bio->bi_comp_cpu;
Index: linux-2.6/include/linux/blkdev.h
===================================================================
--- linux-2.6.orig/include/linux/blkdev.h
+++ linux-2.6/include/linux/blkdev.h
@@ -781,6 +781,7 @@ extern void blk_insert_request(struct re
extern void blk_requeue_request(struct request_queue *, struct request *);
extern void blk_add_request_payload(struct request *rq, struct page *page,
unsigned int len);
+extern void blk_clear_request_payload(struct request *rq);
extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
extern int blk_lld_busy(struct request_queue *q);
extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2010-07-01 13:30:27

by James Bottomley

[permalink] [raw]
Subject: Re: [PATCH 1/3] block: implement an unprep function corresponding directly to prep

On Thu, 2010-07-01 at 19:49 +0900, FUJITA Tomonori wrote:
> From: James Bottomley <[email protected]>
>
> Reviewed-by: FUJITA Tomonori <[email protected]>

Well, I forgot this, but otherwise looks fine:

Signed-off-by: James Bottomley <[email protected]>

> ---
> block/blk-core.c | 25 +++++++++++++++++++++++++
> block/blk-settings.c | 17 +++++++++++++++++
> drivers/scsi/scsi_lib.c | 2 +-
> include/linux/blkdev.h | 4 ++++
> 4 files changed, 47 insertions(+), 1 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 3c37894..5ab3ac2 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -608,6 +608,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn,
>
> q->request_fn = rfn;
> q->prep_rq_fn = NULL;
> + q->unprep_rq_fn = NULL;
> q->unplug_fn = generic_unplug_device;
> q->queue_flags = QUEUE_FLAG_DEFAULT;
> q->queue_lock = lock;
> @@ -2133,6 +2134,26 @@ static bool blk_update_bidi_request(struct request *rq, int error,
> return false;
> }
>
> +/**
> + * blk_unprep_request - unprepare a request
> + * @req: the request
> + *
> + * This function makes a request ready for complete resubmission (or
> + * completion). It happens only after all error handling is complete,
> + * so represents the appropriate moment to deallocate any resources
> + * that were allocated to the request in the prep_rq_fn. The queue
> + * lock is held when calling this.
> + */
> +void blk_unprep_request(struct request *req)
> +{
> + struct request_queue *q = req->q;
> +
> + req->cmd_flags &= ~REQ_DONTPREP;
> + if (q->unprep_rq_fn)
> + q->unprep_rq_fn(q, req);
> +}
> +EXPORT_SYMBOL_GPL(blk_unprep_request);
> +
> /*
> * queue lock must be held
> */
> @@ -2148,6 +2169,10 @@ static void blk_finish_request(struct request *req, int error)
>
> blk_delete_timer(req);
>
> + if (req->cmd_flags & REQ_DONTPREP)
> + blk_unprep_request(req);
> +
> +
> blk_account_io_done(req);
>
> if (req->end_io)
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index f5ed5a1..a234f4b 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -37,6 +37,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
> EXPORT_SYMBOL(blk_queue_prep_rq);
>
> /**
> + * blk_queue_unprep_rq - set an unprepare_request function for queue
> + * @q: queue
> + * @ufn: unprepare_request function
> + *
> + * It's possible for a queue to register an unprepare_request callback
> + * which is invoked before the request is finally completed. The goal
> + * of the function is to deallocate any data that was allocated in the
> + * prepare_request callback.
> + *
> + */
> +void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
> +{
> + q->unprep_rq_fn = ufn;
> +}
> +EXPORT_SYMBOL(blk_queue_unprep_rq);
> +
> +/**
> * blk_queue_merge_bvec - set a merge_bvec function for queue
> * @q: queue
> * @mbfn: merge_bvec_fn
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 5f11608..ee83619 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -85,7 +85,7 @@ static void scsi_unprep_request(struct request *req)
> {
> struct scsi_cmnd *cmd = req->special;
>
> - req->cmd_flags &= ~REQ_DONTPREP;
> + blk_unprep_request(req);
> req->special = NULL;
>
> scsi_put_command(cmd);
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 204fbe2..6bba04c 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -200,6 +200,7 @@ struct request_pm_state
> typedef void (request_fn_proc) (struct request_queue *q);
> typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
> typedef int (prep_rq_fn) (struct request_queue *, struct request *);
> +typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
> typedef void (unplug_fn) (struct request_queue *);
>
> struct bio_vec;
> @@ -282,6 +283,7 @@ struct request_queue
> request_fn_proc *request_fn;
> make_request_fn *make_request_fn;
> prep_rq_fn *prep_rq_fn;
> + unprep_rq_fn *unprep_rq_fn;
> unplug_fn *unplug_fn;
> merge_bvec_fn *merge_bvec_fn;
> prepare_flush_fn *prepare_flush_fn;
> @@ -841,6 +843,7 @@ extern void blk_complete_request(struct request *);
> extern void __blk_complete_request(struct request *);
> extern void blk_abort_request(struct request *);
> extern void blk_abort_queue(struct request_queue *);
> +extern void blk_unprep_request(struct request *);
>
> /*
> * Access functions for manipulating queue properties
> @@ -885,6 +888,7 @@ extern int blk_queue_dma_drain(struct request_queue *q,
> extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
> extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
> extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
> +extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
> extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
> extern void blk_queue_dma_alignment(struct request_queue *, int);
> extern void blk_queue_update_dma_alignment(struct request_queue *, int);

2010-07-01 13:40:58

by Boaz Harrosh

[permalink] [raw]
Subject: Re: add sd_unprep_fn to free discard page

On 07/01/2010 01:49 PM, FUJITA Tomonori wrote:
> This patchset fixes page leak issue in discard commands with unprep
> facility that James posted:
>
> http://marc.info/?l=linux-scsi&m=127791727508214&w=2
>
> The 1/3 patch adds unprep facility to the block layer (identical to
> what James posted).
>

Alternatively to this patch you could also call scsi_driver->done()
on all commands. There are only two users (sd sr) it's not that bad to add
an if (blk_pc_req()) inside these ->done() function.

> The 2/3 patch frees a page for discard commands by using the unprep
> facility. James' original patch doesn't work since it accesses to
> rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
> called when all the data buffer (req->bio and scsi_data_buffer) in the
> request is freed.
>
> I use rq->buffer to keep track of an allocated page as the block layer
> sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
> use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
> it lots. Any other way to do that?
>

rq->buffer is intended for block-driver use as well as req->special.
sd+scsi-ml is the block-driver here. req->special is used by scsi-ml
and rq->buffer is set to NULL inside the call to
scsi_setup_blk_pc_cmnd/scsi_setup_fs_cmnd. Since you set the ->buffer
after the call to scsi_setup_blk_pc_cmnd you should be in the clear.

I think scsi-ml should stop setting rq->buffer to NULL and leave it
be for ULD use. It is left from the time that LLDs where converted
to use BIOs, just to make sure out-of-tree drivers crash.

Boaz
> The 3/3 path just removes the dead code.
>
> This is against Jens' for-2.6.36.
>
> The git tree is also available:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/tomo/linux-2.6-misc.git unprep
>
> I'll update the discard FS request conversion on the top of this soon. But this can be applied independently (and fixes the memory leak).
>
> =
> block/blk-core.c | 25 +++++++++++++++++++++++++
> block/blk-settings.c | 17 +++++++++++++++++
> drivers/scsi/scsi_lib.c | 2 +-
> drivers/scsi/sd.c | 25 +++++++++++++++----------
> include/linux/blkdev.h | 4 ++++
> 5 files changed, 62 insertions(+), 11 deletions(-)
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html

2010-07-01 13:57:48

by James Bottomley

[permalink] [raw]
Subject: Re: add sd_unprep_fn to free discard page

On Thu, 2010-07-01 at 16:40 +0300, Boaz Harrosh wrote:
> On 07/01/2010 01:49 PM, FUJITA Tomonori wrote:
> > This patchset fixes page leak issue in discard commands with unprep
> > facility that James posted:
> >
> > http://marc.info/?l=linux-scsi&m=127791727508214&w=2
> >
> > The 1/3 patch adds unprep facility to the block layer (identical to
> > what James posted).
> >
>
> Alternatively to this patch you could also call scsi_driver->done()
> on all commands. There are only two users (sd sr) it's not that bad to add
> an if (blk_pc_req()) inside these ->done() function.

We could, but it wouldn't fix the problem. The system issue is that
->done isn't symmetrical to ->prep_rq_fn() ... ->done() sits in the
middle of the SCSI completion path and we could decide to requeue the
command after calling it (that's why we can't free the discard page
either in ->done() or in the midlayer where ->done() is called).

The system solution is a symmetrical pair for ->prep_rq_fn() which is
only called either after all error handling is complete, or we decide to
completely strip the command down for a retry that goes through prep
again.

> > The 2/3 patch frees a page for discard commands by using the unprep
> > facility. James' original patch doesn't work since it accesses to
> > rq->bio in q->unprep_rq_fn. We hit oops since q->unprep_rq_fn is
> > called when all the data buffer (req->bio and scsi_data_buffer) in the
> > request is freed.
> >
> > I use rq->buffer to keep track of an allocated page as the block layer
> > sets rq->buffer to the address of bio's page. scsi-ml (and llds) don't
> > use rq->buffer (rq->buffer is set to NULL). So I can't say that I like
> > it lots. Any other way to do that?
> >
>
> rq->buffer is intended for block-driver use as well as req->special.
> sd+scsi-ml is the block-driver here. req->special is used by scsi-ml
> and rq->buffer is set to NULL inside the call to
> scsi_setup_blk_pc_cmnd/scsi_setup_fs_cmnd. Since you set the ->buffer
> after the call to scsi_setup_blk_pc_cmnd you should be in the clear.
>
> I think scsi-ml should stop setting rq->buffer to NULL and leave it
> be for ULD use. It is left from the time that LLDs where converted
> to use BIOs, just to make sure out-of-tree drivers crash.

So I buy this more. There is a slight assymmetry in that we have the
bio in prep, but not in unprep. Since requests can complete partially
(freeing the bios on the way), there's no real way to avoid this. I
think the use of ->buffer is a bit unsavoury but it looks acceptable.

James

2010-07-01 20:15:29

by Mike Snitzer

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Thu, Jul 01 2010 at 9:03am -0400,
Mike Snitzer <[email protected]> wrote:

> On Thu, Jul 01 2010 at 6:49am -0400,
> FUJITA Tomonori <[email protected]> wrote:
>
> > This fixes discard page leak by using q->unprep_rq_fn facility.
> >
> > q->unprep_rq_fn is called when all the data buffer (req->bio and
> > scsi_data_buffer) in the request is freed.
> >
> > sd_unprep() uses rq->buffer to free discard page allocated in
> > sd_prepare_discard().
> >
> > Signed-off-by: FUJITA Tomonori <[email protected]>
>
> Thanks for sorting this out Tomo, all 3 patches work great!
>
> BTW, there is one remaining (rare) leak in the allocation path.
>
> The following patch serves to fix it but I'm not sure if there is a more
> elegant way to address this.

I've continued to look at this to arrive at alternative implementation.
Here is a summary of the problem:

A 'scsi_setup_discard_cmnd' return other than BLKPREP_OK will not cause
a discard request to get completely stripped down ('blk_finish_request'
isn't calling 'blk_unprep_request' because REQ_DONTPREP is not set by
'scsi_prep_return' for none BLKPREP_OK return). Therefore the discard
request's page will _not_ get cleaned up.

Aside from code inspection, I confirmed this by adding some test code to
force a one-time initial BLKPREP_DEFER return from
'scsi_setup_discard_cmnd'.

> An alternative would be to check if the page is already allocated
> (before allocating the page in scsi_setup_discard_cmnd)?

Unfortunatey this "alternative" won't work because it completely ignores
the case where BLKPREP_KILL is returned from scsi_setup_discard_cmnd'.

> Please advise, thanks.

In short, I'm not too happy that the following patch doesn't allow for
centralized cleanup of the discard request's page (via sd_unprep_fn).
But in order to do that we'd likely have to:
1) relax blk_finish_request's REQ_DONTPREP constraint
2) add other weird conditionals within blk_unprep_request because
the discard request wasn't _really_ prepared?

So given this I'm inclined to stick with the following patch.

Jens and/or James, what do you think?

Mike

> From: Mike Snitzer <[email protected]>
> Subject: scsi: address leak in the error path of discard page allocation
>
> Be sure to free the discard page if scsi_setup_blk_pc_cmnd fails.
> E.g. Returning BLKPREP_DEFER from scsi_setup_blk_pc_cmnd will not cause
> the request to be processed by sd_unprep_fn before the request is
> retried (preparation included).
>
> Signed-off-by: Mike Snitzer <[email protected]>
>
> ---
> block/blk-core.c | 23 +++++++++++++++++++++++
> drivers/scsi/sd.c | 6 +++++-
> include/linux/blkdev.h | 1 +
> 3 files changed, 29 insertions(+), 1 deletion(-)
>
> Index: linux-2.6/drivers/scsi/sd.c
> ===================================================================
> --- linux-2.6.orig/drivers/scsi/sd.c
> +++ linux-2.6/drivers/scsi/sd.c
> @@ -466,7 +466,11 @@ static int scsi_setup_discard_cmnd(struc
>
> blk_add_request_payload(rq, page, len);
> ret = scsi_setup_blk_pc_cmnd(sdp, rq);
> - rq->buffer = page_address(page);
> + if (ret != BLKPREP_OK) {
> + blk_clear_request_payload(rq);
> + __free_page(page);
> + } else
> + rq->buffer = page_address(page);
> return ret;
> }
>
> Index: linux-2.6/block/blk-core.c
> ===================================================================
> --- linux-2.6.orig/block/blk-core.c
> +++ linux-2.6/block/blk-core.c
> @@ -1164,6 +1164,29 @@ void blk_add_request_payload(struct requ
> }
> EXPORT_SYMBOL_GPL(blk_add_request_payload);
>
> +/**
> + * blk_clear_request_payload - clear a request's payload
> + * @rq: request to update
> + *
> + * The driver needs to take care of freeing the payload itself.
> + */
> +void blk_clear_request_payload(struct request *rq)
> +{
> + struct bio *bio = rq->bio;
> +
> + rq->__data_len = rq->resid_len = 0;
> + rq->nr_phys_segments = 0;
> + rq->buffer = NULL;
> +
> + bio->bi_size = 0;
> + bio->bi_vcnt = 0;
> + bio->bi_phys_segments = 0;
> +
> + bio->bi_io_vec->bv_page = NULL;
> + bio->bi_io_vec->bv_len = 0;
> +}
> +EXPORT_SYMBOL_GPL(blk_clear_request_payload);
> +
> void init_request_from_bio(struct request *req, struct bio *bio)
> {
> req->cpu = bio->bi_comp_cpu;
> Index: linux-2.6/include/linux/blkdev.h
> ===================================================================
> --- linux-2.6.orig/include/linux/blkdev.h
> +++ linux-2.6/include/linux/blkdev.h
> @@ -781,6 +781,7 @@ extern void blk_insert_request(struct re
> extern void blk_requeue_request(struct request_queue *, struct request *);
> extern void blk_add_request_payload(struct request *rq, struct page *page,
> unsigned int len);
> +extern void blk_clear_request_payload(struct request *rq);
> extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
> extern int blk_lld_busy(struct request_queue *q);
> extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,

2010-07-01 20:19:19

by James Bottomley

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Thu, 2010-07-01 at 16:15 -0400, Mike Snitzer wrote:
> On Thu, Jul 01 2010 at 9:03am -0400,
> Mike Snitzer <[email protected]> wrote:
>
> > On Thu, Jul 01 2010 at 6:49am -0400,
> > FUJITA Tomonori <[email protected]> wrote:
> >
> > > This fixes discard page leak by using q->unprep_rq_fn facility.
> > >
> > > q->unprep_rq_fn is called when all the data buffer (req->bio and
> > > scsi_data_buffer) in the request is freed.
> > >
> > > sd_unprep() uses rq->buffer to free discard page allocated in
> > > sd_prepare_discard().
> > >
> > > Signed-off-by: FUJITA Tomonori <[email protected]>
> >
> > Thanks for sorting this out Tomo, all 3 patches work great!
> >
> > BTW, there is one remaining (rare) leak in the allocation path.
> >
> > The following patch serves to fix it but I'm not sure if there is a more
> > elegant way to address this.
>
> I've continued to look at this to arrive at alternative implementation.
> Here is a summary of the problem:
>
> A 'scsi_setup_discard_cmnd' return other than BLKPREP_OK will not cause
> a discard request to get completely stripped down ('blk_finish_request'
> isn't calling 'blk_unprep_request' because REQ_DONTPREP is not set by
> 'scsi_prep_return' for none BLKPREP_OK return). Therefore the discard
> request's page will _not_ get cleaned up.
>
> Aside from code inspection, I confirmed this by adding some test code to
> force a one-time initial BLKPREP_DEFER return from
> 'scsi_setup_discard_cmnd'.
>
> > An alternative would be to check if the page is already allocated
> > (before allocating the page in scsi_setup_discard_cmnd)?
>
> Unfortunatey this "alternative" won't work because it completely ignores
> the case where BLKPREP_KILL is returned from scsi_setup_discard_cmnd'.
>
> > Please advise, thanks.
>
> In short, I'm not too happy that the following patch doesn't allow for
> centralized cleanup of the discard request's page (via sd_unprep_fn).
> But in order to do that we'd likely have to:
> 1) relax blk_finish_request's REQ_DONTPREP constraint
> 2) add other weird conditionals within blk_unprep_request because
> the discard request wasn't _really_ prepared?
>
> So given this I'm inclined to stick with the following patch.
>
> Jens and/or James, what do you think?

The rules are pretty clear: Unprep is only called if the request gets
prepped ... that means you have to return BLKPREP_OK. Defer or kill
assume there's no teardown to do, so the allocation (if it took place)
must be reversed before returning them

James

2010-07-01 21:08:12

by Mike Snitzer

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Thu, Jul 01 2010 at 4:19pm -0400,
James Bottomley <[email protected]> wrote:

> On Thu, 2010-07-01 at 16:15 -0400, Mike Snitzer wrote:
> > On Thu, Jul 01 2010 at 9:03am -0400,
> > Mike Snitzer <[email protected]> wrote:
> >
> > > On Thu, Jul 01 2010 at 6:49am -0400,
> > > FUJITA Tomonori <[email protected]> wrote:
> > >
> > > > This fixes discard page leak by using q->unprep_rq_fn facility.
> > > >
> > > > q->unprep_rq_fn is called when all the data buffer (req->bio and
> > > > scsi_data_buffer) in the request is freed.
> > > >
> > > > sd_unprep() uses rq->buffer to free discard page allocated in
> > > > sd_prepare_discard().
> > > >
> > > > Signed-off-by: FUJITA Tomonori <[email protected]>
> > >
> > > Thanks for sorting this out Tomo, all 3 patches work great!
> > >
> > > BTW, there is one remaining (rare) leak in the allocation path.
> > >
> > > The following patch serves to fix it but I'm not sure if there is a more
> > > elegant way to address this.
> >
> > I've continued to look at this to arrive at alternative implementation.
> > Here is a summary of the problem:
> >
> > A 'scsi_setup_discard_cmnd' return other than BLKPREP_OK will not cause
> > a discard request to get completely stripped down ('blk_finish_request'
> > isn't calling 'blk_unprep_request' because REQ_DONTPREP is not set by
> > 'scsi_prep_return' for none BLKPREP_OK return). Therefore the discard
> > request's page will _not_ get cleaned up.
> >
> > Aside from code inspection, I confirmed this by adding some test code to
> > force a one-time initial BLKPREP_DEFER return from
> > 'scsi_setup_discard_cmnd'.
> >
> > > An alternative would be to check if the page is already allocated
> > > (before allocating the page in scsi_setup_discard_cmnd)?
> >
> > Unfortunatey this "alternative" won't work because it completely ignores
> > the case where BLKPREP_KILL is returned from scsi_setup_discard_cmnd'.
> >
> > > Please advise, thanks.
> >
> > In short, I'm not too happy that the following patch doesn't allow for
> > centralized cleanup of the discard request's page (via sd_unprep_fn).
> > But in order to do that we'd likely have to:
> > 1) relax blk_finish_request's REQ_DONTPREP constraint
> > 2) add other weird conditionals within blk_unprep_request because
> > the discard request wasn't _really_ prepared?
> >
> > So given this I'm inclined to stick with the following patch.
> >
> > Jens and/or James, what do you think?
>
> The rules are pretty clear: Unprep is only called if the request gets
> prepped ... that means you have to return BLKPREP_OK. Defer or kill
> assume there's no teardown to do, so the allocation (if it took place)
> must be reversed before returning them

OK, thanks for clarifying. This confirms that the general approach I
took in this patch is correct. It remains to be seen if Jens is
agreeable with blk_clear_request_payload.

I know Christoph thought my introduction and use of
blk_clear_request_payload was reasonable. Christoph, please feel free
to add your Ack to this patch if you approve.

I look forward to feedback from Tomo and Jens now too. Hopefully Jens
will pick this patch up.

regards,
Mike

2010-07-02 04:53:45

by FUJITA Tomonori

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Thu, 01 Jul 2010 15:19:08 -0500
James Bottomley <[email protected]> wrote:

> On Thu, 2010-07-01 at 16:15 -0400, Mike Snitzer wrote:
> > On Thu, Jul 01 2010 at 9:03am -0400,
> > Mike Snitzer <[email protected]> wrote:
> >
> > > On Thu, Jul 01 2010 at 6:49am -0400,
> > > FUJITA Tomonori <[email protected]> wrote:
> > >
> > > > This fixes discard page leak by using q->unprep_rq_fn facility.
> > > >
> > > > q->unprep_rq_fn is called when all the data buffer (req->bio and
> > > > scsi_data_buffer) in the request is freed.
> > > >
> > > > sd_unprep() uses rq->buffer to free discard page allocated in
> > > > sd_prepare_discard().
> > > >
> > > > Signed-off-by: FUJITA Tomonori <[email protected]>
> > >
> > > Thanks for sorting this out Tomo, all 3 patches work great!
> > >
> > > BTW, there is one remaining (rare) leak in the allocation path.
> > >
> > > The following patch serves to fix it but I'm not sure if there is a more
> > > elegant way to address this.
> >
> > I've continued to look at this to arrive at alternative implementation.
> > Here is a summary of the problem:
> >
> > A 'scsi_setup_discard_cmnd' return other than BLKPREP_OK will not cause
> > a discard request to get completely stripped down ('blk_finish_request'
> > isn't calling 'blk_unprep_request' because REQ_DONTPREP is not set by
> > 'scsi_prep_return' for none BLKPREP_OK return). Therefore the discard
> > request's page will _not_ get cleaned up.
> >
> > Aside from code inspection, I confirmed this by adding some test code to
> > force a one-time initial BLKPREP_DEFER return from
> > 'scsi_setup_discard_cmnd'.
> >
> > > An alternative would be to check if the page is already allocated
> > > (before allocating the page in scsi_setup_discard_cmnd)?
> >
> > Unfortunatey this "alternative" won't work because it completely ignores
> > the case where BLKPREP_KILL is returned from scsi_setup_discard_cmnd'.
> >
> > > Please advise, thanks.
> >
> > In short, I'm not too happy that the following patch doesn't allow for
> > centralized cleanup of the discard request's page (via sd_unprep_fn).
> > But in order to do that we'd likely have to:
> > 1) relax blk_finish_request's REQ_DONTPREP constraint
> > 2) add other weird conditionals within blk_unprep_request because
> > the discard request wasn't _really_ prepared?
> >
> > So given this I'm inclined to stick with the following patch.
> >
> > Jens and/or James, what do you think?
>
> The rules are pretty clear: Unprep is only called if the request gets
> prepped ... that means you have to return BLKPREP_OK. Defer or kill
> assume there's no teardown to do, so the allocation (if it took place)
> must be reversed before returning them

Seems that scsi-ml calls scsi_unprep_request() for not-prepped
requests in scsi_init_io error path. So we could move that
scsi_unprep_request() to the error path in scsi_prep_return(). Then we
can free discard page in the single place.

Applying the rule strictly is fine by me too; we remove
scsi_unprep_request() in scsi_init_io error path and clean up things
in each prep function's error path.


Btw, blk_clear_request_payload() is necessary?

Making sure that a request is clean is not a bad idea but if we hit
BLKPREP_KILL or BLKPREP_DEFER, we call
blk_end_request(). blk_end_request() can free a request properly even
if we don't do something like blk_clear_request_payload?

2010-07-02 10:48:24

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 2/3] scsi: add sd_unprep_fn to free discard page

> sd_unprep() uses rq->buffer to free discard page allocated in
> sd_prepare_discard().

Eeek. Accessing it using the bio in both haves seems a lot cleaner than
abusing this. Especially as we don't really need a mapped page anyway
at least for WRITE SAME implementation.

> - return scsi_setup_blk_pc_cmnd(sdp, rq);
> + ret = scsi_setup_blk_pc_cmnd(sdp, rq);
> + rq->buffer = page_address(page);
> + return ret;

In addition I don't think this is quite correct. You still need to undo
the payload addition manually if scsi_setup_blk_pc_cmnd fails, as we
haven't marked the request as REQ_DONTPREP at that point - it's only
done by scsi_prep_return for the BLKPREP_OK case.

2010-07-02 10:48:42

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH] scsi: address leak in the error path of discard page allocation

On Thu, Jul 01, 2010 at 09:03:28AM -0400, Mike Snitzer wrote:
> On Thu, Jul 01 2010 at 6:49am -0400,
> FUJITA Tomonori <[email protected]> wrote:
>
> > This fixes discard page leak by using q->unprep_rq_fn facility.
> >
> > q->unprep_rq_fn is called when all the data buffer (req->bio and
> > scsi_data_buffer) in the request is freed.
> >
> > sd_unprep() uses rq->buffer to free discard page allocated in
> > sd_prepare_discard().
> >
> > Signed-off-by: FUJITA Tomonori <[email protected]>
>
> Thanks for sorting this out Tomo, all 3 patches work great!
>
> BTW, there is one remaining (rare) leak in the allocation path.
>
> The following patch serves to fix it but I'm not sure if there is a more
> elegant way to address this.
>
> An alternative would be to check if the page is already allocated
> (before allocating the page in scsi_setup_discard_cmnd)?

Ah, should have read your mail first, sorry..

2010-07-02 10:49:58

by Christoph Hellwig

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Thu, Jul 01, 2010 at 05:07:49PM -0400, Mike Snitzer wrote:
> OK, thanks for clarifying. This confirms that the general approach I
> took in this patch is correct. It remains to be seen if Jens is
> agreeable with blk_clear_request_payload.
>
> I know Christoph thought my introduction and use of
> blk_clear_request_payload was reasonable. Christoph, please feel free
> to add your Ack to this patch if you approve.

Yes, I think that's the right way to do it.


Btw, guys - what happened to the idea of trimming useless context in
mail replies? This thread got almost unreadable because of the quoting
at this point.

2010-07-02 10:53:00

by Christoph Hellwig

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Fri, Jul 02, 2010 at 01:53:14PM +0900, FUJITA Tomonori wrote:
> Seems that scsi-ml calls scsi_unprep_request() for not-prepped
> requests in scsi_init_io error path. So we could move that
> scsi_unprep_request() to the error path in scsi_prep_return(). Then we
> can free discard page in the single place.
>
> Applying the rule strictly is fine by me too; we remove
> scsi_unprep_request() in scsi_init_io error path and clean up things
> in each prep function's error path.

That would be my preference. Making sure a function cleans up all
allocations / state changes on errors means code is a lot fragile and
easier to understand.

> Btw, blk_clear_request_payload() is necessary?
>
> Making sure that a request is clean is not a bad idea but if we hit
> BLKPREP_KILL or BLKPREP_DEFER, we call
> blk_end_request(). blk_end_request() can free a request properly even
> if we don't do something like blk_clear_request_payload?

For BLKPREP_KILL we do call __blk_end_request_all, but for
BLKPREP_DEFER we don't. In that case we just leave it on the queue for
a later retry. So we either have to clean it up, or leave the detect
the case of a partially constructed command in ->prep_fn. I think
cleaning up properly and having defined state when entering ->prep_fn is
the better variant.

2010-07-02 10:53:17

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH 3/3] scsi: remove unused free discard page in sd_done

On Thu, Jul 01, 2010 at 07:49:19PM +0900, FUJITA Tomonori wrote:
> - sd_done isn't called for pc request so we never call the code.
> - we use sd_unprep to free discard page now.
>
> Signed-off-by: FUJITA Tomonori <[email protected]>

Looks good,


Reviewed-by: Christoph Hellwig <[email protected]>

2010-07-02 13:09:13

by Mike Snitzer

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Fri, Jul 02 2010 at 6:52am -0400,
Christoph Hellwig <[email protected]> wrote:

> On Fri, Jul 02, 2010 at 01:53:14PM +0900, FUJITA Tomonori wrote:
> > Seems that scsi-ml calls scsi_unprep_request() for not-prepped
> > requests in scsi_init_io error path. So we could move that
> > scsi_unprep_request() to the error path in scsi_prep_return(). Then we
> > can free discard page in the single place.
> >
> > Applying the rule strictly is fine by me too; we remove
> > scsi_unprep_request() in scsi_init_io error path and clean up things
> > in each prep function's error path.
>
> That would be my preference. Making sure a function cleans up all
> allocations / state changes on errors means code is a lot fragile and
> easier to understand.
>
> > Btw, blk_clear_request_payload() is necessary?
> >
> > Making sure that a request is clean is not a bad idea but if we hit
> > BLKPREP_KILL or BLKPREP_DEFER, we call
> > blk_end_request(). blk_end_request() can free a request properly even
> > if we don't do something like blk_clear_request_payload?
>
> For BLKPREP_KILL we do call __blk_end_request_all, but for
> BLKPREP_DEFER we don't. In that case we just leave it on the queue for
> a later retry. So we either have to clean it up, or leave the detect
> the case of a partially constructed command in ->prep_fn. I think
> cleaning up properly and having defined state when entering ->prep_fn is
> the better variant.

Right, I shared the same opinion earlier in this thread, so please let
your ACKs fly now that we seem to all be in agreement.

I'd like Jens to pick this patch up now-ish ;)

Mike

2010-07-05 04:00:55

by FUJITA Tomonori

[permalink] [raw]
Subject: Re: scsi: address leak in the error path of discard page allocation

On Fri, 2 Jul 2010 09:08:56 -0400
Mike Snitzer <[email protected]> wrote:

> On Fri, Jul 02 2010 at 6:52am -0400,
> Christoph Hellwig <[email protected]> wrote:
>
> > On Fri, Jul 02, 2010 at 01:53:14PM +0900, FUJITA Tomonori wrote:
> > > Seems that scsi-ml calls scsi_unprep_request() for not-prepped
> > > requests in scsi_init_io error path. So we could move that
> > > scsi_unprep_request() to the error path in scsi_prep_return(). Then we
> > > can free discard page in the single place.
> > >
> > > Applying the rule strictly is fine by me too; we remove
> > > scsi_unprep_request() in scsi_init_io error path and clean up things
> > > in each prep function's error path.
> >
> > That would be my preference. Making sure a function cleans up all
> > allocations / state changes on errors means code is a lot fragile and
> > easier to understand.
> >
> > > Btw, blk_clear_request_payload() is necessary?
> > >
> > > Making sure that a request is clean is not a bad idea but if we hit
> > > BLKPREP_KILL or BLKPREP_DEFER, we call
> > > blk_end_request(). blk_end_request() can free a request properly even
> > > if we don't do something like blk_clear_request_payload?
> >
> > For BLKPREP_KILL we do call __blk_end_request_all, but for
> > BLKPREP_DEFER we don't. In that case we just leave it on the queue for
> > a later retry. So we either have to clean it up, or leave the detect
> > the case of a partially constructed command in ->prep_fn. I think
> > cleaning up properly and having defined state when entering ->prep_fn is
> > the better variant.

I think that as long as we free an allocated page for discard,
scsi_setup_discard_cmnd or the block layer don't need to detect the
case of a partially constructed command.


> Right, I shared the same opinion earlier in this thread, so please let
> your ACKs fly now that we seem to all be in agreement.

As I wrote earlier, I think that we need to clean up (and fix) the
error handling of the prep path. Currently, it's just messy. Some
errors are handled in the prep functions while some are in
scsi_prep_return().

- we call scsi_put_command in two places (scsi_init_io and scsi_prep_return).

- scsi_init_io calls scsi_put_command directly for BLKPREP_KILL but
calls it indirectly via scsi_unprep_request for BLKPREP_DEFER.

- If scsi_init_io() hits BLKPREP_KILL internally, we hit kernel oops
in scsi_prep_return since we call scsi_put_command twice. (luckily,
scsi_init_sgtable and scsi_alloc_sgtable in scsi_init_io return only
DEFER).

you could add more if you like.

I think that handling all the errors in scsi_prep_return() is
cleaner. I'll send a patch.

If we prefer to make sure that a request is completely reset (as
blk_clear_request_payload does), then we can add it on the top of the
patch.

2010-07-05 10:07:32

by Boaz Harrosh

[permalink] [raw]
Subject: Re: [PATCH 2/3] scsi: add sd_unprep_fn to free discard page

On 07/01/2010 01:49 PM, FUJITA Tomonori wrote:
> This fixes discard page leak by using q->unprep_rq_fn facility.
>
> q->unprep_rq_fn is called when all the data buffer (req->bio and
> scsi_data_buffer) in the request is freed.
>
> sd_unprep() uses rq->buffer to free discard page allocated in
> sd_prepare_discard().
>
> Signed-off-by: FUJITA Tomonori <[email protected]>

Sorry for thinking about this just now. It was on the tip of my
mind, but I was too busy with other things to notice.

There is a simple more STD way to solve all this without the need
for any new API.

*Use the bio destructor to also deallocate it's pages*

Now that is not such a novel idea, it's used all over the place.
Well it's the proper way to submit a bio and be notified on done.
Since there are no bio leaks, we know all error paths are just
as covered.

For this to work all these patches could be removed and the
blk_add_request_payload need to change to recieve a bio destructor
pointer.

If you ask me I hate what was done with discard at the block layer
since it's only in Jens tree for-next I think it is good time to change
it. What is this alloc the bio in one place and add it's pages in another
place. Rrrrr

to quote from the author himself:
<block-core.c>
/**
* blk_add_request_payload - add a payload to a request
<snip>
*
* Note that this is a quite horrible hack and nothing but handling of
* discard requests should ever use it.
*/
void blk_add_request_payload(struct request *rq, struct page *page,
unsigned int len)
</block-core.c>

The block-layer users think of a discard as zero-load, so should the
block-layer. The block driver like sd that needs to add a payload can
set in a bio just like any other place. when allocating the bio, set
it's destructor and be done with it. All allocations and cleanups are
done at one place, at same level.

As it stands now blk_add_request_payload will have to either chain the
bio destructors and/or if it knows where it's allocated then chain to
the proper bio_free, after calling the block-driver supplied callback.

BTW where is that zero-pages discard bio allocated. Why is it not allocated
inside blk_add_request_payload? What is the meaning of zero length bio?
I though everywhere we check for "if (req->bio)" not "if (bio_size(req_bio))"
isn't all this asking for trouble?

my $0.017
Boaz

> ---
> drivers/scsi/sd.c | 12 +++++++++++-
> 1 files changed, 11 insertions(+), 1 deletions(-)
>
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 86da819..2d4e3a8 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -425,6 +425,7 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
> sector_t sector = bio->bi_sector;
> unsigned int nr_sectors = bio_sectors(bio);
> unsigned int len;
> + int ret;
> struct page *page;
>
> if (sdkp->device->sector_size == 4096) {
> @@ -465,7 +466,15 @@ static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
> }
>
> blk_add_request_payload(rq, page, len);
> - return scsi_setup_blk_pc_cmnd(sdp, rq);
> + ret = scsi_setup_blk_pc_cmnd(sdp, rq);
> + rq->buffer = page_address(page);
> + return ret;
> +}
> +
> +static void sd_unprep_fn(struct request_queue *q, struct request *rq)
> +{
> + if (rq->cmd_flags & REQ_DISCARD)
> + __free_page(virt_to_page(rq->buffer));
> }
>
> /**
> @@ -2242,6 +2251,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
> sd_revalidate_disk(gd);
>
> blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
> + blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
>
> gd->driverfs_dev = &sdp->sdev_gendev;
> gd->flags = GENHD_FL_EXT_DEVT;