2014-02-26 05:23:03

by Joonsoo Kim

[permalink] [raw]
Subject: [PATCH v2] zram: support REQ_DISCARD

zram is ram based block device and can be used by backend of filesystem.
When filesystem deletes a file, it normally doesn't do anything on data
block of that file. It just marks on metadata of that file. This behavior
has no problem on disk based block device, but has problems on ram based
block device, since we can't free memory used for data block. To overcome
this disadvantage, there is REQ_DISCARD functionality. If block device
support REQ_DISCARD and filesystem is mounted with discard option,
filesystem sends REQ_DISCARD to block device whenever some data blocks are
discarded. All we have to do is to handle this request.

This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
REQ_DISCARD request. With it, we can free memory used by zram if it isn't
used.

v2: handle unaligned case commented by Jerome

Signed-off-by: Joonsoo Kim <[email protected]>

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 5ec61be..5364c1e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
return ret;
}

+static void zram_bio_discard(struct zram *zram, struct bio *bio)
+{
+ u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ size_t n = bio->bi_iter.bi_size;
+ size_t misalign;
+
+ /*
+ * On some arch, logical block (4096) aligned request couldn't be
+ * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
+ * Therefore we should handle this misaligned case here.
+ */
+ misalign = (bio->bi_iter.bi_sector &
+ (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+ if (misalign) {
+ if (n < misalign)
+ return;
+
+ n -= misalign;
+ index++;
+ }
+
+ while (n >= PAGE_SIZE) {
+ write_lock(&zram->meta->tb_lock);
+ zram_free_page(zram, index);
+ write_unlock(&zram->meta->tb_lock);
+ index++;
+ n -= PAGE_SIZE;
+ }
+}
+
static void zram_reset_device(struct zram *zram, bool reset_capacity)
{
size_t index;
@@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
struct bio_vec bvec;
struct bvec_iter iter;

+ if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+ zram_bio_discard(zram, bio);
+ bio_endio(bio, 0);
+ return;
+ }
+
index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
offset = (bio->bi_iter.bi_sector &
(SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
@@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
ZRAM_LOGICAL_BLOCK_SIZE);
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
+ zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
+ zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
+ zram->disk->queue->limits.discard_zeroes_data = 1;
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);

add_disk(zram->disk);

--
1.7.9.5


2014-02-26 08:07:03

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

Hi Joonsoo,

On Wed, Feb 26, 2014 at 02:23:15PM +0900, Joonsoo Kim wrote:
> zram is ram based block device and can be used by backend of filesystem.
> When filesystem deletes a file, it normally doesn't do anything on data
> block of that file. It just marks on metadata of that file. This behavior
> has no problem on disk based block device, but has problems on ram based
> block device, since we can't free memory used for data block. To overcome
> this disadvantage, there is REQ_DISCARD functionality. If block device
> support REQ_DISCARD and filesystem is mounted with discard option,
> filesystem sends REQ_DISCARD to block device whenever some data blocks are
> discarded. All we have to do is to handle this request.
>
> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
> used.
>
> v2: handle unaligned case commented by Jerome
>
> Signed-off-by: Joonsoo Kim <[email protected]>
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 5ec61be..5364c1e 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> return ret;
> }
>
> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
> +{
> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> + size_t n = bio->bi_iter.bi_size;

Nitpick:
Please use more meaningful name(ex, len) rather than 'n'.

> + size_t misalign;
> +
> + * On some arch, logical block (4096) aligned request couldn't be
> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
> + * Therefore we should handle this misaligned case here.
> + */
> + misalign = (bio->bi_iter.bi_sector &
> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> + if (misalign) {
> + if (n < misalign)
> + return;
> +
> + n -= misalign;
> + index++;
> + }
> +
> + while (n >= PAGE_SIZE) {
> + write_lock(&zram->meta->tb_lock);
> + zram_free_page(zram, index);
> + write_unlock(&zram->meta->tb_lock);
> + index++;
> + n -= PAGE_SIZE;
> + }
> +}
> +
> static void zram_reset_device(struct zram *zram, bool reset_capacity)
> {
> size_t index;
> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
> struct bio_vec bvec;
> struct bvec_iter iter;
>
> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
> + zram_bio_discard(zram, bio);
> + bio_endio(bio, 0);
> + return;
> + }
> +
> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> offset = (bio->bi_iter.bi_sector &
> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
> ZRAM_LOGICAL_BLOCK_SIZE);
> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
> + zram->disk->queue->limits.discard_zeroes_data = 1;

I don't know what discard_zeroes_data does mean. It seems we should
make sure zram should return zero pages for discarded block on next
time but prolblem could happen if you bail out in discard logic
due to misalign but caller seem to know it was successful?

What happens in this case?

> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
>
> add_disk(zram->disk);
>
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Kind regards,
Minchan Kim

2014-02-26 13:20:06

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

Hello,

On (02/26/14 14:23), Joonsoo Kim wrote:
> zram is ram based block device and can be used by backend of filesystem.
> When filesystem deletes a file, it normally doesn't do anything on data
> block of that file. It just marks on metadata of that file. This behavior
> has no problem on disk based block device, but has problems on ram based
> block device, since we can't free memory used for data block. To overcome
> this disadvantage, there is REQ_DISCARD functionality. If block device
> support REQ_DISCARD and filesystem is mounted with discard option,
> filesystem sends REQ_DISCARD to block device whenever some data blocks are
> discarded. All we have to do is to handle this request.
>
> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
> used.
>
> v2: handle unaligned case commented by Jerome
>
> Signed-off-by: Joonsoo Kim <[email protected]>
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 5ec61be..5364c1e 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> return ret;
> }
>
> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
> +{
> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> + size_t n = bio->bi_iter.bi_size;
> + size_t misalign;
> +
> + /*
> + * On some arch, logical block (4096) aligned request couldn't be
> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
> + * Therefore we should handle this misaligned case here.
> + */
> + misalign = (bio->bi_iter.bi_sector &
> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> + if (misalign) {
> + if (n < misalign)
> + return;
> +
> + n -= misalign;
> + index++;
> + }
> +
> + while (n >= PAGE_SIZE) {
> + write_lock(&zram->meta->tb_lock);
> + zram_free_page(zram, index);
> + write_unlock(&zram->meta->tb_lock);
> + index++;
> + n -= PAGE_SIZE;
> + }
> +}
> +

a side note, do we need zram_bio_discard() function? I mean, can we handle
discard request in zram_bvec_rw(), where we already know index, etc. (passed
from __zram_make_request())?

for example:

@@ -510,6 +510,11 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
ret = zram_bvec_write(zram, bvec, index, offset);
}

+ if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+ write_lock(&zram->meta->tb_lock);
+ zram_free_page(zram, index);
+ write_unlock(&zram->meta->tb_lock);
+ }
return ret;
}

-ss

> static void zram_reset_device(struct zram *zram, bool reset_capacity)
> {
> size_t index;
> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
> struct bio_vec bvec;
> struct bvec_iter iter;
>
> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
> + zram_bio_discard(zram, bio);
> + bio_endio(bio, 0);
> + return;
> + }
> +
> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> offset = (bio->bi_iter.bi_sector &
> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
> ZRAM_LOGICAL_BLOCK_SIZE);
> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
> + zram->disk->queue->limits.discard_zeroes_data = 1;
> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
>
> add_disk(zram->disk);
>
> --
> 1.7.9.5
>

2014-02-26 13:44:19

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

On 02/26/2014 02:16 PM, Sergey Senozhatsky wrote:
> Hello,
>
> On (02/26/14 14:23), Joonsoo Kim wrote:
>> zram is ram based block device and can be used by backend of filesystem.
>> When filesystem deletes a file, it normally doesn't do anything on data
>> block of that file. It just marks on metadata of that file. This behavior
>> has no problem on disk based block device, but has problems on ram based
>> block device, since we can't free memory used for data block. To overcome
>> this disadvantage, there is REQ_DISCARD functionality. If block device
>> support REQ_DISCARD and filesystem is mounted with discard option,
>> filesystem sends REQ_DISCARD to block device whenever some data blocks are
>> discarded. All we have to do is to handle this request.
>>
>> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
>> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
>> used.
>>
>> v2: handle unaligned case commented by Jerome
>>
>> Signed-off-by: Joonsoo Kim <[email protected]>
>>
>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>> index 5ec61be..5364c1e 100644
>> --- a/drivers/block/zram/zram_drv.c
>> +++ b/drivers/block/zram/zram_drv.c
>> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>> return ret;
>> }
>>
>> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
>> +{
>> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>> + size_t n = bio->bi_iter.bi_size;
>> + size_t misalign;
>> +
>> + /*
>> + * On some arch, logical block (4096) aligned request couldn't be
>> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
>> + * Therefore we should handle this misaligned case here.
>> + */
>> + misalign = (bio->bi_iter.bi_sector &
>> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>> + if (misalign) {
>> + if (n < misalign)
>> + return;
>> +
>> + n -= misalign;
>> + index++;
>> + }
>> +
>> + while (n >= PAGE_SIZE) {
>> + write_lock(&zram->meta->tb_lock);
>> + zram_free_page(zram, index);
>> + write_unlock(&zram->meta->tb_lock);
>> + index++;
>> + n -= PAGE_SIZE;
>> + }
>> +}
>> +
>
> a side note, do we need zram_bio_discard() function? I mean, can we handle
> discard request in zram_bvec_rw(), where we already know index, etc. (passed
> from __zram_make_request())?
>

We'd still have to make sure not to discard pages that are still partially
used, but it might simplify the code: __zram_make_request() already takes
care of splitting the request.

> for example:
>
> @@ -510,6 +510,11 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> ret = zram_bvec_write(zram, bvec, index, offset);
> }
>
> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {

+ if (!is_partial_io(bvec) {

> + write_lock(&zram->meta->tb_lock);
> + zram_free_page(zram, index);
> + write_unlock(&zram->meta->tb_lock);

+ }

Also this code might still call zram_bvec_read() and increase num_reads
for discard request: I guess bio_data_dir(bio) == READ == 0 in this case.

Btw, why __zram_make_request() has an that rw argument? All the information
it needs is passed by the bio argument already. I kind of recollect to have
seen a cleanup patch that get rid of it or is it just my imagination?

Jerome

> + }
> return ret;
> }
>
> -ss
>
>> static void zram_reset_device(struct zram *zram, bool reset_capacity)
>> {
>> size_t index;
>> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
>> struct bio_vec bvec;
>> struct bvec_iter iter;
>>
>> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
>> + zram_bio_discard(zram, bio);
>> + bio_endio(bio, 0);
>> + return;
>> + }
>> +
>> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>> offset = (bio->bi_iter.bi_sector &
>> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
>> ZRAM_LOGICAL_BLOCK_SIZE);
>> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
>> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
>> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
>> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
>> + zram->disk->queue->limits.discard_zeroes_data = 1;
>> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
>>
>> add_disk(zram->disk);
>>
>> --
>> 1.7.9.5
>>

2014-02-26 14:01:19

by Sergey Senozhatsky

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

On (02/26/14 14:44), Jerome Marchand wrote:
> On 02/26/2014 02:16 PM, Sergey Senozhatsky wrote:
> > Hello,
> >
> > On (02/26/14 14:23), Joonsoo Kim wrote:
> >> zram is ram based block device and can be used by backend of filesystem.
> >> When filesystem deletes a file, it normally doesn't do anything on data
> >> block of that file. It just marks on metadata of that file. This behavior
> >> has no problem on disk based block device, but has problems on ram based
> >> block device, since we can't free memory used for data block. To overcome
> >> this disadvantage, there is REQ_DISCARD functionality. If block device
> >> support REQ_DISCARD and filesystem is mounted with discard option,
> >> filesystem sends REQ_DISCARD to block device whenever some data blocks are
> >> discarded. All we have to do is to handle this request.
> >>
> >> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
> >> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
> >> used.
> >>
> >> v2: handle unaligned case commented by Jerome
> >>
> >> Signed-off-by: Joonsoo Kim <[email protected]>
> >>
> >> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> >> index 5ec61be..5364c1e 100644
> >> --- a/drivers/block/zram/zram_drv.c
> >> +++ b/drivers/block/zram/zram_drv.c
> >> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> >> return ret;
> >> }
> >>
> >> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
> >> +{
> >> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> >> + size_t n = bio->bi_iter.bi_size;
> >> + size_t misalign;
> >> +
> >> + /*
> >> + * On some arch, logical block (4096) aligned request couldn't be
> >> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
> >> + * Therefore we should handle this misaligned case here.
> >> + */
> >> + misalign = (bio->bi_iter.bi_sector &
> >> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> >> + if (misalign) {
> >> + if (n < misalign)
> >> + return;
> >> +
> >> + n -= misalign;
> >> + index++;
> >> + }
> >> +
> >> + while (n >= PAGE_SIZE) {
> >> + write_lock(&zram->meta->tb_lock);
> >> + zram_free_page(zram, index);
> >> + write_unlock(&zram->meta->tb_lock);
> >> + index++;
> >> + n -= PAGE_SIZE;
> >> + }
> >> +}
> >> +
> >
> > a side note, do we need zram_bio_discard() function? I mean, can we handle
> > discard request in zram_bvec_rw(), where we already know index, etc. (passed
> > from __zram_make_request())?
> >
>
> We'd still have to make sure not to discard pages that are still partially
> used, but it might simplify the code: __zram_make_request() already takes
> care of splitting the request.
>
> > for example:
> >
> > @@ -510,6 +510,11 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> > ret = zram_bvec_write(zram, bvec, index, offset);
> > }
> >
> > + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
>
> + if (!is_partial_io(bvec) {
>
> > + write_lock(&zram->meta->tb_lock);
> > + zram_free_page(zram, index);
> > + write_unlock(&zram->meta->tb_lock);
>
> + }
>
> Also this code might still call zram_bvec_read() and increase num_reads
> for discard request: I guess bio_data_dir(bio) == READ == 0 in this case.
>
> Btw, why __zram_make_request() has an that rw argument? All the information
> it needs is passed by the bio argument already. I kind of recollect to have
> seen a cleanup patch that get rid of it or is it just my imagination?
>

it doesn't. cleanup patch 'do not pass rw argument to __zram_make_request()'
is in linux-next.

-ss

> Jerome
>
> > + }
> > return ret;
> > }
> >
> > -ss
> >
> >> static void zram_reset_device(struct zram *zram, bool reset_capacity)
> >> {
> >> size_t index;
> >> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
> >> struct bio_vec bvec;
> >> struct bvec_iter iter;
> >>
> >> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
> >> + zram_bio_discard(zram, bio);
> >> + bio_endio(bio, 0);
> >> + return;
> >> + }
> >> +
> >> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
> >> offset = (bio->bi_iter.bi_sector &
> >> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
> >> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
> >> ZRAM_LOGICAL_BLOCK_SIZE);
> >> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
> >> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
> >> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
> >> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
> >> + zram->disk->queue->limits.discard_zeroes_data = 1;
> >> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
> >>
> >> add_disk(zram->disk);
> >>
> >> --
> >> 1.7.9.5
> >>
>

2014-02-26 14:06:47

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

On 02/26/2014 02:57 PM, Sergey Senozhatsky wrote:
> On (02/26/14 14:44), Jerome Marchand wrote:
>> On 02/26/2014 02:16 PM, Sergey Senozhatsky wrote:
>>> Hello,
>>>
>>> On (02/26/14 14:23), Joonsoo Kim wrote:
>>>> zram is ram based block device and can be used by backend of filesystem.
>>>> When filesystem deletes a file, it normally doesn't do anything on data
>>>> block of that file. It just marks on metadata of that file. This behavior
>>>> has no problem on disk based block device, but has problems on ram based
>>>> block device, since we can't free memory used for data block. To overcome
>>>> this disadvantage, there is REQ_DISCARD functionality. If block device
>>>> support REQ_DISCARD and filesystem is mounted with discard option,
>>>> filesystem sends REQ_DISCARD to block device whenever some data blocks are
>>>> discarded. All we have to do is to handle this request.
>>>>
>>>> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
>>>> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
>>>> used.
>>>>
>>>> v2: handle unaligned case commented by Jerome
>>>>
>>>> Signed-off-by: Joonsoo Kim <[email protected]>
>>>>
>>>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>>>> index 5ec61be..5364c1e 100644
>>>> --- a/drivers/block/zram/zram_drv.c
>>>> +++ b/drivers/block/zram/zram_drv.c
>>>> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>>>> return ret;
>>>> }
>>>>
>>>> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
>>>> +{
>>>> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>>>> + size_t n = bio->bi_iter.bi_size;
>>>> + size_t misalign;
>>>> +
>>>> + /*
>>>> + * On some arch, logical block (4096) aligned request couldn't be
>>>> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
>>>> + * Therefore we should handle this misaligned case here.
>>>> + */
>>>> + misalign = (bio->bi_iter.bi_sector &
>>>> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>>>> + if (misalign) {
>>>> + if (n < misalign)
>>>> + return;
>>>> +
>>>> + n -= misalign;
>>>> + index++;
>>>> + }
>>>> +
>>>> + while (n >= PAGE_SIZE) {
>>>> + write_lock(&zram->meta->tb_lock);
>>>> + zram_free_page(zram, index);
>>>> + write_unlock(&zram->meta->tb_lock);
>>>> + index++;
>>>> + n -= PAGE_SIZE;
>>>> + }
>>>> +}
>>>> +
>>>
>>> a side note, do we need zram_bio_discard() function? I mean, can we handle
>>> discard request in zram_bvec_rw(), where we already know index, etc. (passed
>>> from __zram_make_request())?
>>>
>>
>> We'd still have to make sure not to discard pages that are still partially
>> used, but it might simplify the code: __zram_make_request() already takes
>> care of splitting the request.
>>
>>> for example:
>>>
>>> @@ -510,6 +510,11 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>>> ret = zram_bvec_write(zram, bvec, index, offset);
>>> }
>>>
>>> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
>>
>> + if (!is_partial_io(bvec) {
>>
>>> + write_lock(&zram->meta->tb_lock);
>>> + zram_free_page(zram, index);
>>> + write_unlock(&zram->meta->tb_lock);
>>
>> + }
>>
>> Also this code might still call zram_bvec_read() and increase num_reads
>> for discard request: I guess bio_data_dir(bio) == READ == 0 in this case.
>>
>> Btw, why __zram_make_request() has an that rw argument? All the information
>> it needs is passed by the bio argument already. I kind of recollect to have
>> seen a cleanup patch that get rid of it or is it just my imagination?
>>
>
> it doesn't. cleanup patch 'do not pass rw argument to __zram_make_request()'
> is in linux-next.
>

You're right. I must be blind since there is an exemple of __zram_make_request()
without this argument just a few line below.

> -ss
>
>> Jerome
>>
>>> + }
>>> return ret;
>>> }
>>>
>>> -ss
>>>
>>>> static void zram_reset_device(struct zram *zram, bool reset_capacity)
>>>> {
>>>> size_t index;
>>>> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)

I need to open my eyes.

Jerome

>>>> struct bio_vec bvec;
>>>> struct bvec_iter iter;
>>>>
>>>> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
>>>> + zram_bio_discard(zram, bio);
>>>> + bio_endio(bio, 0);
>>>> + return;
>>>> + }
>>>> +
>>>> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>>>> offset = (bio->bi_iter.bi_sector &
>>>> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>>>> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
>>>> ZRAM_LOGICAL_BLOCK_SIZE);
>>>> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
>>>> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
>>>> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
>>>> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
>>>> + zram->disk->queue->limits.discard_zeroes_data = 1;
>>>> + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
>>>>
>>>> add_disk(zram->disk);
>>>>
>>>> --
>>>> 1.7.9.5
>>>>
>>

2014-02-28 15:20:51

by Joonsoo Kim

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

2014-02-26 23:06 GMT+09:00 Jerome Marchand <[email protected]>:
> On 02/26/2014 02:57 PM, Sergey Senozhatsky wrote:
>> On (02/26/14 14:44), Jerome Marchand wrote:
>>> On 02/26/2014 02:16 PM, Sergey Senozhatsky wrote:
>>>> Hello,
>>>>
>>>> On (02/26/14 14:23), Joonsoo Kim wrote:
>>>>> zram is ram based block device and can be used by backend of filesystem.
>>>>> When filesystem deletes a file, it normally doesn't do anything on data
>>>>> block of that file. It just marks on metadata of that file. This behavior
>>>>> has no problem on disk based block device, but has problems on ram based
>>>>> block device, since we can't free memory used for data block. To overcome
>>>>> this disadvantage, there is REQ_DISCARD functionality. If block device
>>>>> support REQ_DISCARD and filesystem is mounted with discard option,
>>>>> filesystem sends REQ_DISCARD to block device whenever some data blocks are
>>>>> discarded. All we have to do is to handle this request.
>>>>>
>>>>> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
>>>>> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
>>>>> used.
>>>>>
>>>>> v2: handle unaligned case commented by Jerome
>>>>>
>>>>> Signed-off-by: Joonsoo Kim <[email protected]>
>>>>>
>>>>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>>>>> index 5ec61be..5364c1e 100644
>>>>> --- a/drivers/block/zram/zram_drv.c
>>>>> +++ b/drivers/block/zram/zram_drv.c
>>>>> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>>>>> return ret;
>>>>> }
>>>>>
>>>>> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
>>>>> +{
>>>>> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>>>>> + size_t n = bio->bi_iter.bi_size;
>>>>> + size_t misalign;
>>>>> +
>>>>> + /*
>>>>> + * On some arch, logical block (4096) aligned request couldn't be
>>>>> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
>>>>> + * Therefore we should handle this misaligned case here.
>>>>> + */
>>>>> + misalign = (bio->bi_iter.bi_sector &
>>>>> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>>>>> + if (misalign) {
>>>>> + if (n < misalign)
>>>>> + return;
>>>>> +
>>>>> + n -= misalign;
>>>>> + index++;
>>>>> + }
>>>>> +
>>>>> + while (n >= PAGE_SIZE) {
>>>>> + write_lock(&zram->meta->tb_lock);
>>>>> + zram_free_page(zram, index);
>>>>> + write_unlock(&zram->meta->tb_lock);
>>>>> + index++;
>>>>> + n -= PAGE_SIZE;
>>>>> + }
>>>>> +}
>>>>> +
>>>>
>>>> a side note, do we need zram_bio_discard() function? I mean, can we handle
>>>> discard request in zram_bvec_rw(), where we already know index, etc. (passed
>>>> from __zram_make_request())?
>>>>

Hello, Sergey.
Sorry for late response.

I think that introducing new function is better idea, since
discard_request is significantly
different with rw request. First of all, it doesn't use bvec. So splitting code
in __zram_make_request() would not work properly for it. And
zram_bvec_rw() is bvec handler
and deals with PAGE_SIZE unit request which is not appropriate for
discard request.

But, it is good to use common index, offset, so I will move down position of
zram_bio_discard().

Thanks for comment!

2014-02-28 15:24:39

by Joonsoo Kim

[permalink] [raw]
Subject: Re: [PATCH v2] zram: support REQ_DISCARD

2014-02-26 17:07 GMT+09:00 Minchan Kim <[email protected]>:
> Hi Joonsoo,
>
> On Wed, Feb 26, 2014 at 02:23:15PM +0900, Joonsoo Kim wrote:
>> zram is ram based block device and can be used by backend of filesystem.
>> When filesystem deletes a file, it normally doesn't do anything on data
>> block of that file. It just marks on metadata of that file. This behavior
>> has no problem on disk based block device, but has problems on ram based
>> block device, since we can't free memory used for data block. To overcome
>> this disadvantage, there is REQ_DISCARD functionality. If block device
>> support REQ_DISCARD and filesystem is mounted with discard option,
>> filesystem sends REQ_DISCARD to block device whenever some data blocks are
>> discarded. All we have to do is to handle this request.
>>
>> This patch implements to flag up QUEUE_FLAG_DISCARD and handle this
>> REQ_DISCARD request. With it, we can free memory used by zram if it isn't
>> used.
>>
>> v2: handle unaligned case commented by Jerome
>>
>> Signed-off-by: Joonsoo Kim <[email protected]>
>>
>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>> index 5ec61be..5364c1e 100644
>> --- a/drivers/block/zram/zram_drv.c
>> +++ b/drivers/block/zram/zram_drv.c
>> @@ -501,6 +501,36 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>> return ret;
>> }
>>
>> +static void zram_bio_discard(struct zram *zram, struct bio *bio)
>> +{
>> + u32 index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>> + size_t n = bio->bi_iter.bi_size;
>
> Nitpick:
> Please use more meaningful name(ex, len) rather than 'n'.
>

Hello, Minchan.

Will do.

>> + size_t misalign;
>> +
>> + * On some arch, logical block (4096) aligned request couldn't be
>> + * aligned to PAGE_SIZE, since their PAGE_SIZE aren't 4096.
>> + * Therefore we should handle this misaligned case here.
>> + */
>> + misalign = (bio->bi_iter.bi_sector &
>> + (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>> + if (misalign) {
>> + if (n < misalign)
>> + return;
>> +
>> + n -= misalign;
>> + index++;
>> + }
>> +
>> + while (n >= PAGE_SIZE) {
>> + write_lock(&zram->meta->tb_lock);
>> + zram_free_page(zram, index);
>> + write_unlock(&zram->meta->tb_lock);
>> + index++;
>> + n -= PAGE_SIZE;
>> + }
>> +}
>> +
>> static void zram_reset_device(struct zram *zram, bool reset_capacity)
>> {
>> size_t index;
>> @@ -618,6 +648,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
>> struct bio_vec bvec;
>> struct bvec_iter iter;
>>
>> + if (unlikely(bio->bi_rw & REQ_DISCARD)) {
>> + zram_bio_discard(zram, bio);
>> + bio_endio(bio, 0);
>> + return;
>> + }
>> +
>> index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
>> offset = (bio->bi_iter.bi_sector &
>> (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
>> @@ -784,6 +820,10 @@ static int create_device(struct zram *zram, int device_id)
>> ZRAM_LOGICAL_BLOCK_SIZE);
>> blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
>> blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
>> + zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
>> + zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
>> + zram->disk->queue->limits.discard_zeroes_data = 1;
>
> I don't know what discard_zeroes_data does mean. It seems we should
> make sure zram should return zero pages for discarded block on next
> time but prolblem could happen if you bail out in discard logic
> due to misalign but caller seem to know it was successful?
>
> What happens in this case?
>

This will result in the problem what you think about.
I will change it like as following.

if (PAGE_SIZE == ZRAM_LOGICAL_BLOCK_SIZE)
zram->disk->queue->limits.discard_zeroes_data = 1;
else
zram->disk->queue->limits.discard_zeroes_data = 0;

Does It work for you?

Thanks.