This patchset aims zram clean-up.
[1] clean up multiple pages's bvec handling.
[2] clean up partial IO handling
[3-5] clean up zram via using accessor and removing pointless structure.
With [2-5] applied, we can get a few hundred bytes as well as huge
readibility enhance.
This patchset is based on v4.11-rc4-mmotm-2017-03-30-16-31 and
*drop* zram-factor-out-partial-io-routine.patch.
x86: 708 byte save
add/remove: 1/1 grow/shrink: 0/11 up/down: 478/-1186 (-708)
function old new delta
zram_special_page_read - 478 +478
zram_reset_device 317 314 -3
mem_used_max_store 131 128 -3
compact_store 96 93 -3
mm_stat_show 203 197 -6
zram_add 719 712 -7
zram_slot_free_notify 229 214 -15
zram_make_request 819 803 -16
zram_meta_free 128 111 -17
zram_free_page 180 151 -29
disksize_store 432 361 -71
zram_decompress_page.isra 504 - -504
zram_bvec_rw 2592 2080 -512
Total: Before=25350773, After=25350065, chg -0.00%
ppc64: 231 byte save
add/remove: 2/0 grow/shrink: 1/9 up/down: 681/-912 (-231)
function old new delta
zram_special_page_read - 480 +480
zram_slot_lock - 200 +200
vermagic 39 40 +1
mm_stat_show 256 248 -8
zram_meta_free 200 184 -16
zram_add 944 912 -32
zram_free_page 348 308 -40
disksize_store 572 492 -80
zram_decompress_page 664 564 -100
zram_slot_free_notify 292 160 -132
zram_make_request 1132 1000 -132
zram_bvec_rw 2768 2396 -372
Total: Before=17565825, After=17565594, chg -0.00%
Minchan Kim (5):
[1] zram: handle multiple pages attached bio's bvec
[2] zram: partial IO refactoring
[3] zram: use zram_slot_lock instead of raw bit_spin_lock op
[4] zram: remove zram_meta structure
[5] zram: introduce zram data accessor
drivers/block/zram/zram_drv.c | 532 +++++++++++++++++++++---------------------
drivers/block/zram/zram_drv.h | 6 +-
2 files changed, 270 insertions(+), 268 deletions(-)
--
2.7.4
It's redundant now. Instead, remove it and use zram structure
directly.
Signed-off-by: Minchan Kim <[email protected]>
---
drivers/block/zram/zram_drv.c | 163 +++++++++++++++++-------------------------
drivers/block/zram/zram_drv.h | 6 +-
2 files changed, 65 insertions(+), 104 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 71b0a584bc85..fdb73222841d 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -58,46 +58,46 @@ static inline struct zram *dev_to_zram(struct device *dev)
}
/* flag operations require table entry bit_spin_lock() being held */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
+static int zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- return meta->table[index].value & BIT(flag);
+ return zram->table[index].value & BIT(flag);
}
-static void zram_set_flag(struct zram_meta *meta, u32 index,
+static void zram_set_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].value |= BIT(flag);
+ zram->table[index].value |= BIT(flag);
}
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
+static void zram_clear_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].value &= ~BIT(flag);
+ zram->table[index].value &= ~BIT(flag);
}
-static inline void zram_set_element(struct zram_meta *meta, u32 index,
+static inline void zram_set_element(struct zram *zram, u32 index,
unsigned long element)
{
- meta->table[index].element = element;
+ zram->table[index].element = element;
}
-static inline void zram_clear_element(struct zram_meta *meta, u32 index)
+static inline void zram_clear_element(struct zram *zram, u32 index)
{
- meta->table[index].element = 0;
+ zram->table[index].element = 0;
}
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static size_t zram_get_obj_size(struct zram *zram, u32 index)
{
- return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
-static void zram_set_obj_size(struct zram_meta *meta,
+static void zram_set_obj_size(struct zram *zram,
u32 index, size_t size)
{
- unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+ unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
- meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
#if PAGE_SIZE != 4096
@@ -252,9 +252,8 @@ static ssize_t mem_used_max_store(struct device *dev,
down_read(&zram->init_lock);
if (init_done(zram)) {
- struct zram_meta *meta = zram->meta;
atomic_long_set(&zram->stats.max_used_pages,
- zs_get_total_pages(meta->mem_pool));
+ zs_get_total_pages(zram->mem_pool));
}
up_read(&zram->init_lock);
@@ -327,7 +326,6 @@ static ssize_t compact_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
- struct zram_meta *meta;
down_read(&zram->init_lock);
if (!init_done(zram)) {
@@ -335,8 +333,7 @@ static ssize_t compact_store(struct device *dev,
return -EINVAL;
}
- meta = zram->meta;
- zs_compact(meta->mem_pool);
+ zs_compact(zram->mem_pool);
up_read(&zram->init_lock);
return len;
@@ -373,8 +370,8 @@ static ssize_t mm_stat_show(struct device *dev,
down_read(&zram->init_lock);
if (init_done(zram)) {
- mem_used = zs_get_total_pages(zram->meta->mem_pool);
- zs_pool_stats(zram->meta->mem_pool, &pool_stats);
+ mem_used = zs_get_total_pages(zram->mem_pool);
+ zs_pool_stats(zram->mem_pool, &pool_stats);
}
orig_size = atomic64_read(&zram->stats.pages_stored);
@@ -418,32 +415,26 @@ static DEVICE_ATTR_RO(debug_stat);
static void zram_slot_lock(struct zram *zram, u32 index)
{
- struct zram_meta *meta = zram->meta;
-
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value);
}
static void zram_slot_unlock(struct zram *zram, u32 index)
{
- struct zram_meta *meta = zram->meta;
-
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value);
}
static bool zram_special_page_read(struct zram *zram, u32 index,
struct page *page,
unsigned int offset, unsigned int len)
{
- struct zram_meta *meta = zram->meta;
-
zram_slot_lock(zram, index);
- if (unlikely(!meta->table[index].handle) ||
- zram_test_flag(meta, index, ZRAM_SAME)) {
+ if (unlikely(!zram->table[index].handle) ||
+ zram_test_flag(zram, index, ZRAM_SAME)) {
void *mem;
zram_slot_unlock(zram, index);
mem = kmap_atomic(page);
- zram_fill_page(mem + offset, len, meta->table[index].element);
+ zram_fill_page(mem + offset, len, zram->table[index].element);
kunmap_atomic(mem);
return true;
}
@@ -459,14 +450,12 @@ static bool zram_special_page_write(struct zram *zram, u32 index,
void *mem = kmap_atomic(page);
if (page_same_filled(mem, &element)) {
- struct zram_meta *meta = zram->meta;
-
kunmap_atomic(mem);
/* Free memory associated with this sector now. */
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- zram_set_flag(meta, index, ZRAM_SAME);
- zram_set_element(meta, index, element);
+ zram_set_flag(zram, index, ZRAM_SAME);
+ zram_set_element(zram, index, element);
zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.same_pages);
@@ -477,56 +466,44 @@ static bool zram_special_page_write(struct zram *zram, u32 index,
return false;
}
-static void zram_meta_free(struct zram_meta *meta, u64 disksize)
+static void zram_meta_free(struct zram *zram, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
size_t index;
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++) {
- unsigned long handle = meta->table[index].handle;
+ unsigned long handle = zram->table[index].handle;
/*
* No memory is allocated for same element filled pages.
* Simply clear same page flag.
*/
- if (!handle || zram_test_flag(meta, index, ZRAM_SAME))
+ if (!handle || zram_test_flag(zram, index, ZRAM_SAME))
continue;
- zs_free(meta->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
}
- zs_destroy_pool(meta->mem_pool);
- vfree(meta->table);
- kfree(meta);
+ zs_destroy_pool(zram->mem_pool);
+ vfree(zram->table);
}
-static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
+static bool zram_meta_alloc(struct zram *zram, u64 disksize)
{
size_t num_pages;
- struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
-
- if (!meta)
- return NULL;
num_pages = disksize >> PAGE_SHIFT;
- meta->table = vzalloc(num_pages * sizeof(*meta->table));
- if (!meta->table) {
- pr_err("Error allocating zram address table\n");
- goto out_error;
- }
+ zram->table = vzalloc(num_pages * sizeof(*zram->table));
+ if (!zram->table)
+ return false;
- meta->mem_pool = zs_create_pool(pool_name);
- if (!meta->mem_pool) {
- pr_err("Error creating memory pool\n");
- goto out_error;
+ zram->mem_pool = zs_create_pool(zram->disk->disk_name);
+ if (!zram->mem_pool) {
+ vfree(zram->table);
+ return false;
}
- return meta;
-
-out_error:
- vfree(meta->table);
- kfree(meta);
- return NULL;
+ return true;
}
/*
@@ -536,16 +513,15 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- struct zram_meta *meta = zram->meta;
- unsigned long handle = meta->table[index].handle;
+ unsigned long handle = zram->table[index].handle;
/*
* No memory is allocated for same element filled pages.
* Simply clear same page flag.
*/
- if (zram_test_flag(meta, index, ZRAM_SAME)) {
- zram_clear_flag(meta, index, ZRAM_SAME);
- zram_clear_element(meta, index);
+ if (zram_test_flag(zram, index, ZRAM_SAME)) {
+ zram_clear_flag(zram, index, ZRAM_SAME);
+ zram_clear_element(zram, index);
atomic64_dec(&zram->stats.same_pages);
return;
}
@@ -553,14 +529,14 @@ static void zram_free_page(struct zram *zram, size_t index)
if (!handle)
return;
- zs_free(meta->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
- atomic64_sub(zram_get_obj_size(meta, index),
+ atomic64_sub(zram_get_obj_size(zram, index),
&zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
- meta->table[index].handle = 0;
- zram_set_obj_size(meta, index, 0);
+ zram->table[index].handle = 0;
+ zram_set_obj_size(zram, index, 0);
}
static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
@@ -569,16 +545,15 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
unsigned long handle;
unsigned int size;
void *src, *dst;
- struct zram_meta *meta = zram->meta;
if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
zram_slot_lock(zram, index);
- handle = meta->table[index].handle;
- size = zram_get_obj_size(meta, index);
+ handle = zram->table[index].handle;
+ size = zram_get_obj_size(zram, index);
- src = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
+ src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
dst = kmap_atomic(page);
copy_page(dst, src);
@@ -592,7 +567,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
kunmap_atomic(dst);
zcomp_stream_put(zram->comp);
}
- zs_unmap_object(meta->mem_pool, handle);
+ zs_unmap_object(zram->mem_pool, handle);
zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
@@ -647,7 +622,6 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
unsigned int comp_len;
void *src;
unsigned long handle = 0;
- struct zram_meta *meta = zram->meta;
compress_again:
src = kmap_atomic(page);
@@ -676,7 +650,7 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
* from the slow path and handle has already been allocated.
*/
if (!handle)
- handle = zs_malloc(meta->mem_pool, comp_len,
+ handle = zs_malloc(zram->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
@@ -684,7 +658,7 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
if (!handle) {
zcomp_stream_put(zram->comp);
atomic64_inc(&zram->stats.writestall);
- handle = zs_malloc(meta->mem_pool, comp_len,
+ handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
*zstrm = zcomp_stream_get(zram->comp);
@@ -707,7 +681,6 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
void *src, *dst;
struct zcomp_strm *zstrm;
unsigned long alloced_pages;
- struct zram_meta *meta = zram->meta;
struct page *page = bvec->bv_page;
if (zram_special_page_write(zram, index, page))
@@ -720,16 +693,16 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
return ret;
}
- alloced_pages = zs_get_total_pages(meta->mem_pool);
+ alloced_pages = zs_get_total_pages(zram->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
zcomp_stream_put(zram->comp);
- zs_free(meta->mem_pool, handle);
+ zs_free(zram->mem_pool, handle);
return -ENOMEM;
}
- dst = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
+ dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
if (comp_len == PAGE_SIZE) {
src = kmap_atomic(page);
@@ -740,7 +713,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
zcomp_stream_put(zram->comp);
- zs_unmap_object(meta->mem_pool, handle);
+ zs_unmap_object(zram->mem_pool, handle);
/*
* Free memory associated with this sector
@@ -748,8 +721,8 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- meta->table[index].handle = handle;
- zram_set_obj_size(meta, index, comp_len);
+ zram->table[index].handle = handle;
+ zram_set_obj_size(zram, index, comp_len);
zram_slot_unlock(zram, index);
/* Update stats */
@@ -934,10 +907,8 @@ static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
struct zram *zram;
- struct zram_meta *meta;
zram = bdev->bd_disk->private_data;
- meta = zram->meta;
zram_slot_lock(zram, index);
zram_free_page(zram, index);
@@ -985,7 +956,6 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
static void zram_reset_device(struct zram *zram)
{
- struct zram_meta *meta;
struct zcomp *comp;
u64 disksize;
@@ -998,7 +968,6 @@ static void zram_reset_device(struct zram *zram)
return;
}
- meta = zram->meta;
comp = zram->comp;
disksize = zram->disksize;
@@ -1011,7 +980,7 @@ static void zram_reset_device(struct zram *zram)
up_write(&zram->init_lock);
/* I/O operation under all of CPU are done so let's free */
- zram_meta_free(meta, disksize);
+ zram_meta_free(zram, disksize);
zcomp_destroy(comp);
}
@@ -1020,7 +989,6 @@ static ssize_t disksize_store(struct device *dev,
{
u64 disksize;
struct zcomp *comp;
- struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
int err;
@@ -1029,8 +997,7 @@ static ssize_t disksize_store(struct device *dev,
return -EINVAL;
disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(zram->disk->disk_name, disksize);
- if (!meta)
+ if (!zram_meta_alloc(zram, disksize))
return -ENOMEM;
comp = zcomp_create(zram->compressor);
@@ -1048,7 +1015,6 @@ static ssize_t disksize_store(struct device *dev,
goto out_destroy_comp;
}
- zram->meta = meta;
zram->comp = comp;
zram->disksize = disksize;
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
@@ -1061,7 +1027,7 @@ static ssize_t disksize_store(struct device *dev,
up_write(&zram->init_lock);
zcomp_destroy(comp);
out_free_meta:
- zram_meta_free(meta, disksize);
+ zram_meta_free(zram, disksize);
return err;
}
@@ -1248,7 +1214,6 @@ static int zram_add(void)
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
- zram->meta = NULL;
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index caeff51f1571..e34e44d02e3e 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -92,13 +92,9 @@ struct zram_stats {
atomic64_t writestall; /* no. of write slow paths */
};
-struct zram_meta {
+struct zram {
struct zram_table_entry *table;
struct zs_pool *mem_pool;
-};
-
-struct zram {
- struct zram_meta *meta;
struct zcomp *comp;
struct gendisk *disk;
/* Prevent concurrent execution of device init */
--
2.7.4
Johannes Thumshirn reported system goes the panic when using NVMe over
Fabrics loopback target with zram.
The reason is zram expects each bvec in bio contains a single page
but nvme can attach a huge bulk of pages attached to the bio's bvec
so that zram's index arithmetic could be wrong so that out-of-bound
access makes panic.
It can be solved by limiting max_sectors with SECTORS_PER_PAGE like
[1] but it makes zram slow because bio should split with each pages
so this patch makes zram aware of multiple pages in a bvec so it
could solve without any regression.
[1] 0bc315381fe9, zram: set physical queue limits to avoid array out of
bounds accesses
Cc: Jens Axboe <[email protected]>
Cc: Hannes Reinecke <[email protected]>
Reported-by: Johannes Thumshirn <[email protected]>
Tested-by: Johannes Thumshirn <[email protected]>
Reviewed-by: Johannes Thumshirn <[email protected]>
Signed-off-by: Johannes Thumshirn <[email protected]>
Signed-off-by: Minchan Kim <[email protected]>
---
drivers/block/zram/zram_drv.c | 39 ++++++++++-----------------------------
1 file changed, 10 insertions(+), 29 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 01944419b1f3..28c2836f8c96 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -137,8 +137,7 @@ static inline bool valid_io_request(struct zram *zram,
static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
+ *index += (*offset + bvec->bv_len) / PAGE_SIZE;
*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}
@@ -838,34 +837,20 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
}
bio_for_each_segment(bvec, bio, iter) {
- int max_transfer_size = PAGE_SIZE - offset;
-
- if (bvec.bv_len > max_transfer_size) {
- /*
- * zram_bvec_rw() can only make operation on a single
- * zram page. Split the bio vector.
- */
- struct bio_vec bv;
-
- bv.bv_page = bvec.bv_page;
- bv.bv_len = max_transfer_size;
- bv.bv_offset = bvec.bv_offset;
+ struct bio_vec bv = bvec;
+ unsigned int remained = bvec.bv_len;
+ do {
+ bv.bv_len = min_t(unsigned int, PAGE_SIZE, remained);
if (zram_bvec_rw(zram, &bv, index, offset,
- op_is_write(bio_op(bio))) < 0)
+ op_is_write(bio_op(bio))) < 0)
goto out;
- bv.bv_len = bvec.bv_len - max_transfer_size;
- bv.bv_offset += max_transfer_size;
- if (zram_bvec_rw(zram, &bv, index + 1, 0,
- op_is_write(bio_op(bio))) < 0)
- goto out;
- } else
- if (zram_bvec_rw(zram, &bvec, index, offset,
- op_is_write(bio_op(bio))) < 0)
- goto out;
+ bv.bv_offset += bv.bv_len;
+ remained -= bv.bv_len;
- update_position(&index, &offset, &bvec);
+ update_position(&index, &offset, &bv);
+ } while (remained);
}
bio_endio(bio);
@@ -882,8 +867,6 @@ static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
{
struct zram *zram = queue->queuedata;
- blk_queue_split(queue, &bio, queue->bio_split);
-
if (!valid_io_request(zram, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size)) {
atomic64_inc(&zram->stats.invalid_io);
@@ -1191,8 +1174,6 @@ static int zram_add(void)
blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
- zram->disk->queue->limits.max_sectors = SECTORS_PER_PAGE;
- zram->disk->queue->limits.chunk_sectors = 0;
blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
/*
* zram_bio_discard() will clear all logical blocks if logical block
--
2.7.4
For architecture(PAGE_SIZE > 4K), zram have supported partial IO.
However, the mixed code for handling normal/partial IO is too mess,
error-prone to modify IO handler functions with upcoming feature
so this patch aims for cleaning up zram's IO handling functions.
Signed-off-by: Minchan Kim <[email protected]>
---
drivers/block/zram/zram_drv.c | 333 +++++++++++++++++++++++-------------------
1 file changed, 184 insertions(+), 149 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 28c2836f8c96..7938f4b98b01 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -45,6 +45,8 @@ static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
static unsigned int num_devices = 1;
+static void zram_free_page(struct zram *zram, size_t index);
+
static inline bool init_done(struct zram *zram)
{
return zram->disksize;
@@ -98,10 +100,17 @@ static void zram_set_obj_size(struct zram_meta *meta,
meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
}
+#if PAGE_SIZE != 4096
static inline bool is_partial_io(struct bio_vec *bvec)
{
return bvec->bv_len != PAGE_SIZE;
}
+#else
+static inline bool is_partial_io(struct bio_vec *bvec)
+{
+ return false;
+}
+#endif
static void zram_revalidate_disk(struct zram *zram)
{
@@ -191,18 +200,6 @@ static bool page_same_filled(void *ptr, unsigned long *element)
return true;
}
-static void handle_same_page(struct bio_vec *bvec, unsigned long element)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
-}
-
static ssize_t initstate_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -418,6 +415,53 @@ static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
static DEVICE_ATTR_RO(debug_stat);
+static bool zram_special_page_read(struct zram *zram, u32 index,
+ struct page *page,
+ unsigned int offset, unsigned int len)
+{
+ struct zram_meta *meta = zram->meta;
+
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ if (unlikely(!meta->table[index].handle) ||
+ zram_test_flag(meta, index, ZRAM_SAME)) {
+ void *mem;
+
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ mem = kmap_atomic(page);
+ zram_fill_page(mem + offset, len, meta->table[index].element);
+ kunmap_atomic(mem);
+ return true;
+ }
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+
+ return false;
+}
+
+static bool zram_special_page_write(struct zram *zram, u32 index,
+ struct page *page)
+{
+ unsigned long element;
+ void *mem = kmap_atomic(page);
+
+ if (page_same_filled(mem, &element)) {
+ struct zram_meta *meta = zram->meta;
+
+ kunmap_atomic(mem);
+ /* Free memory associated with this sector now. */
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_free_page(zram, index);
+ zram_set_flag(meta, index, ZRAM_SAME);
+ zram_set_element(meta, index, element);
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+
+ atomic64_inc(&zram->stats.same_pages);
+ return true;
+ }
+ kunmap_atomic(mem);
+
+ return false;
+}
+
static void zram_meta_free(struct zram_meta *meta, u64 disksize)
{
size_t num_pages = disksize >> PAGE_SHIFT;
@@ -504,169 +548,104 @@ static void zram_free_page(struct zram *zram, size_t index)
zram_set_obj_size(meta, index, 0);
}
-static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
+static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
{
- int ret = 0;
- unsigned char *cmem;
- struct zram_meta *meta = zram->meta;
+ int ret;
unsigned long handle;
unsigned int size;
+ void *src, *dst;
+ struct zram_meta *meta = zram->meta;
+
+ if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
+ return 0;
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
handle = meta->table[index].handle;
size = zram_get_obj_size(meta, index);
- if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
- return 0;
- }
-
- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
+ src = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) {
- copy_page(mem, cmem);
+ dst = kmap_atomic(page);
+ copy_page(dst, src);
+ kunmap_atomic(dst);
+ ret = 0;
} else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
- ret = zcomp_decompress(zstrm, cmem, size, mem);
+ dst = kmap_atomic(page);
+ ret = zcomp_decompress(zstrm, src, size, dst);
+ kunmap_atomic(dst);
zcomp_stream_put(zram->comp);
}
zs_unmap_object(meta->mem_pool, handle);
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret)) {
+ if (unlikely(ret))
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- return ret;
- }
- return 0;
+ return ret;
}
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset)
+ u32 index, int offset)
{
int ret;
struct page *page;
- unsigned char *user_mem, *uncmem = NULL;
- struct zram_meta *meta = zram->meta;
- page = bvec->bv_page;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- if (unlikely(!meta->table[index].handle) ||
- zram_test_flag(meta, index, ZRAM_SAME)) {
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
- handle_same_page(bvec, meta->table[index].element);
+ page = bvec->bv_page;
+ if (zram_special_page_read(zram, index, page, bvec->bv_offset,
+ bvec->bv_len))
return 0;
- }
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-
- if (is_partial_io(bvec))
- /* Use a temporary buffer to decompress the page */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
-
- user_mem = kmap_atomic(page);
- if (!is_partial_io(bvec))
- uncmem = user_mem;
- if (!uncmem) {
- pr_err("Unable to allocate temp memory\n");
- ret = -ENOMEM;
- goto out_cleanup;
+ if (is_partial_io(bvec)) {
+ /* Use a temporary buffer to decompress the page */
+ page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
+ if (!page)
+ return -ENOMEM;
}
- ret = zram_decompress_page(zram, uncmem, index);
- /* Should NEVER happen. Return bio error if it does. */
+ ret = zram_decompress_page(zram, page, index);
if (unlikely(ret))
- goto out_cleanup;
+ goto out;
- if (is_partial_io(bvec))
- memcpy(user_mem + bvec->bv_offset, uncmem + offset,
- bvec->bv_len);
+ if (is_partial_io(bvec)) {
+ void *dst = kmap_atomic(bvec->bv_page);
+ void *src = kmap_atomic(page);
- flush_dcache_page(page);
- ret = 0;
-out_cleanup:
- kunmap_atomic(user_mem);
+ memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
+ kunmap_atomic(src);
+ kunmap_atomic(dst);
+ }
+out:
if (is_partial_io(bvec))
- kfree(uncmem);
+ __free_page(page);
+
return ret;
}
-static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset)
+static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
+ struct page *page,
+ unsigned long *out_handle, unsigned int *out_comp_len)
{
- int ret = 0;
- unsigned int clen;
+ int ret;
+ unsigned int comp_len;
+ void *src;
unsigned long handle = 0;
- struct page *page;
- unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
- struct zcomp_strm *zstrm = NULL;
- unsigned long alloced_pages;
- unsigned long element;
-
- page = bvec->bv_page;
- if (is_partial_io(bvec)) {
- /*
- * This is a partial IO. We need to read the full page
- * before to write the changes.
- */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
- if (!uncmem) {
- ret = -ENOMEM;
- goto out;
- }
- ret = zram_decompress_page(zram, uncmem, index);
- if (ret)
- goto out;
- }
compress_again:
- user_mem = kmap_atomic(page);
- if (is_partial_io(bvec)) {
- memcpy(uncmem + offset, user_mem + bvec->bv_offset,
- bvec->bv_len);
- kunmap_atomic(user_mem);
- user_mem = NULL;
- } else {
- uncmem = user_mem;
- }
-
- if (page_same_filled(uncmem, &element)) {
- if (user_mem)
- kunmap_atomic(user_mem);
- /* Free memory associated with this sector now. */
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
- zram_free_page(zram, index);
- zram_set_flag(meta, index, ZRAM_SAME);
- zram_set_element(meta, index, element);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
-
- atomic64_inc(&zram->stats.same_pages);
- ret = 0;
- goto out;
- }
-
- zstrm = zcomp_stream_get(zram->comp);
- ret = zcomp_compress(zstrm, uncmem, &clen);
- if (!is_partial_io(bvec)) {
- kunmap_atomic(user_mem);
- user_mem = NULL;
- uncmem = NULL;
- }
+ src = kmap_atomic(page);
+ ret = zcomp_compress(*zstrm, src, &comp_len);
+ kunmap_atomic(src);
if (unlikely(ret)) {
pr_err("Compression failed! err=%d\n", ret);
- goto out;
+ return ret;
}
- src = zstrm->buffer;
- if (unlikely(clen > max_zpage_size)) {
- clen = PAGE_SIZE;
- if (is_partial_io(bvec))
- src = uncmem;
- }
+ if (unlikely(comp_len > max_zpage_size))
+ comp_len = PAGE_SIZE;
/*
* handle allocation has 2 paths:
@@ -682,50 +661,70 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
* from the slow path and handle has already been allocated.
*/
if (!handle)
- handle = zs_malloc(meta->mem_pool, clen,
+ handle = zs_malloc(meta->mem_pool, comp_len,
__GFP_KSWAPD_RECLAIM |
__GFP_NOWARN |
__GFP_HIGHMEM |
__GFP_MOVABLE);
if (!handle) {
zcomp_stream_put(zram->comp);
- zstrm = NULL;
-
atomic64_inc(&zram->stats.writestall);
-
- handle = zs_malloc(meta->mem_pool, clen,
+ handle = zs_malloc(meta->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
+ *zstrm = zcomp_stream_get(zram->comp);
if (handle)
goto compress_again;
+ return -ENOMEM;
+ }
- pr_err("Error allocating memory for compressed page: %u, size=%u\n",
- index, clen);
- ret = -ENOMEM;
- goto out;
+ *out_handle = handle;
+ *out_comp_len = comp_len;
+ return 0;
+}
+
+static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset)
+{
+ int ret;
+ unsigned long handle;
+ unsigned int comp_len;
+ void *src, *dst;
+ struct zcomp_strm *zstrm;
+ unsigned long alloced_pages;
+ struct zram_meta *meta = zram->meta;
+ struct page *page = bvec->bv_page;
+
+ if (zram_special_page_write(zram, index, page))
+ return 0;
+
+ zstrm = zcomp_stream_get(zram->comp);
+ ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
+ if (ret) {
+ zcomp_stream_put(zram->comp);
+ return ret;
}
alloced_pages = zs_get_total_pages(meta->mem_pool);
update_used_max(zram, alloced_pages);
if (zram->limit_pages && alloced_pages > zram->limit_pages) {
+ zcomp_stream_put(zram->comp);
zs_free(meta->mem_pool, handle);
- ret = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
+ dst = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
- if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
+ if (comp_len == PAGE_SIZE) {
src = kmap_atomic(page);
- copy_page(cmem, src);
+ copy_page(dst, src);
kunmap_atomic(src);
} else {
- memcpy(cmem, src, clen);
+ memcpy(dst, zstrm->buffer, comp_len);
}
zcomp_stream_put(zram->comp);
- zstrm = NULL;
zs_unmap_object(meta->mem_pool, handle);
/*
@@ -734,19 +733,54 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
*/
bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
zram_free_page(zram, index);
-
meta->table[index].handle = handle;
- zram_set_obj_size(meta, index, clen);
+ zram_set_obj_size(meta, index, comp_len);
bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
/* Update stats */
- atomic64_add(clen, &zram->stats.compr_data_size);
+ atomic64_add(comp_len, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
+ return 0;
+}
+
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset)
+{
+ int ret;
+ struct page *page = NULL;
+ void *src;
+ struct bio_vec vec;
+
+ vec = *bvec;
+ if (is_partial_io(bvec)) {
+ void *dst;
+ /*
+ * This is a partial IO. We need to read the full page
+ * before to write the changes.
+ */
+ page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
+ if (!page)
+ return -ENOMEM;
+
+ ret = zram_decompress_page(zram, page, index);
+ if (ret)
+ goto out;
+
+ src = kmap_atomic(bvec->bv_page);
+ dst = kmap_atomic(page);
+ memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
+ kunmap_atomic(dst);
+ kunmap_atomic(src);
+
+ vec.bv_page = page;
+ vec.bv_len = PAGE_SIZE;
+ vec.bv_offset = 0;
+ }
+
+ ret = __zram_bvec_write(zram, &vec, index, offset);
out:
- if (zstrm)
- zcomp_stream_put(zram->comp);
if (is_partial_io(bvec))
- kfree(uncmem);
+ __free_page(page);
return ret;
}
@@ -802,6 +836,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
if (!is_write) {
atomic64_inc(&zram->stats.num_reads);
ret = zram_bvec_read(zram, bvec, index, offset);
+ flush_dcache_page(bvec->bv_page);
} else {
atomic64_inc(&zram->stats.num_writes);
ret = zram_bvec_write(zram, bvec, index, offset);
--
2.7.4
With element, sometime I got confused handle and element access.
It might be my bad but I think it's time to introduce accessor
to prevent future idiot like me.
This patch is just clean-up patch so it shouldn't change any
behavior.
Signed-off-by: Minchan Kim <[email protected]>
---
drivers/block/zram/zram_drv.c | 33 ++++++++++++++++++++++-----------
1 file changed, 22 insertions(+), 11 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fdb73222841d..c3171e5aa582 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -57,6 +57,16 @@ static inline struct zram *dev_to_zram(struct device *dev)
return (struct zram *)dev_to_disk(dev)->private_data;
}
+static unsigned long zram_get_handle(struct zram *zram, u32 index)
+{
+ return zram->table[index].handle;
+}
+
+static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
+{
+ zram->table[index].handle = handle;
+}
+
/* flag operations require table entry bit_spin_lock() being held */
static int zram_test_flag(struct zram *zram, u32 index,
enum zram_pageflags flag)
@@ -82,9 +92,9 @@ static inline void zram_set_element(struct zram *zram, u32 index,
zram->table[index].element = element;
}
-static inline void zram_clear_element(struct zram *zram, u32 index)
+static unsigned long zram_get_element(struct zram *zram, u32 index)
{
- zram->table[index].element = 0;
+ return zram->table[index].element;
}
static size_t zram_get_obj_size(struct zram *zram, u32 index)
@@ -428,13 +438,14 @@ static bool zram_special_page_read(struct zram *zram, u32 index,
unsigned int offset, unsigned int len)
{
zram_slot_lock(zram, index);
- if (unlikely(!zram->table[index].handle) ||
- zram_test_flag(zram, index, ZRAM_SAME)) {
+ if (unlikely(!zram_get_handle(zram, index) ||
+ zram_test_flag(zram, index, ZRAM_SAME))) {
void *mem;
zram_slot_unlock(zram, index);
mem = kmap_atomic(page);
- zram_fill_page(mem + offset, len, zram->table[index].element);
+ zram_fill_page(mem + offset, len,
+ zram_get_element(zram, index));
kunmap_atomic(mem);
return true;
}
@@ -473,7 +484,7 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
/* Free all pages that are still in this zram device */
for (index = 0; index < num_pages; index++) {
- unsigned long handle = zram->table[index].handle;
+ unsigned long handle = zram_get_handle(zram, index);
/*
* No memory is allocated for same element filled pages.
* Simply clear same page flag.
@@ -513,7 +524,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize)
*/
static void zram_free_page(struct zram *zram, size_t index)
{
- unsigned long handle = zram->table[index].handle;
+ unsigned long handle = zram_get_handle(zram, index);
/*
* No memory is allocated for same element filled pages.
@@ -521,7 +532,7 @@ static void zram_free_page(struct zram *zram, size_t index)
*/
if (zram_test_flag(zram, index, ZRAM_SAME)) {
zram_clear_flag(zram, index, ZRAM_SAME);
- zram_clear_element(zram, index);
+ zram_set_element(zram, index, 0);
atomic64_dec(&zram->stats.same_pages);
return;
}
@@ -535,7 +546,7 @@ static void zram_free_page(struct zram *zram, size_t index)
&zram->stats.compr_data_size);
atomic64_dec(&zram->stats.pages_stored);
- zram->table[index].handle = 0;
+ zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
}
@@ -550,7 +561,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
return 0;
zram_slot_lock(zram, index);
- handle = zram->table[index].handle;
+ handle = zram_get_handle(zram, index);
size = zram_get_obj_size(zram, index);
src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
@@ -721,7 +732,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
*/
zram_slot_lock(zram, index);
zram_free_page(zram, index);
- zram->table[index].handle = handle;
+ zram_set_handle(zram, index, handle);
zram_set_obj_size(zram, index, comp_len);
zram_slot_unlock(zram, index);
--
2.7.4
With this clean-up phase, I want to use zram's wrapper function
to lock table access which is more consistent with other zram's
functions.
Signed-off-by: Minchan Kim <[email protected]>
---
drivers/block/zram/zram_drv.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 7938f4b98b01..71b0a584bc85 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -415,24 +415,39 @@ static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
static DEVICE_ATTR_RO(debug_stat);
+
+static void zram_slot_lock(struct zram *zram, u32 index)
+{
+ struct zram_meta *meta = zram->meta;
+
+ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+}
+
+static void zram_slot_unlock(struct zram *zram, u32 index)
+{
+ struct zram_meta *meta = zram->meta;
+
+ bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+}
+
static bool zram_special_page_read(struct zram *zram, u32 index,
struct page *page,
unsigned int offset, unsigned int len)
{
struct zram_meta *meta = zram->meta;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
if (unlikely(!meta->table[index].handle) ||
zram_test_flag(meta, index, ZRAM_SAME)) {
void *mem;
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
mem = kmap_atomic(page);
zram_fill_page(mem + offset, len, meta->table[index].element);
kunmap_atomic(mem);
return true;
}
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
return false;
}
@@ -448,11 +463,11 @@ static bool zram_special_page_write(struct zram *zram, u32 index,
kunmap_atomic(mem);
/* Free memory associated with this sector now. */
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
zram_set_flag(meta, index, ZRAM_SAME);
zram_set_element(meta, index, element);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.same_pages);
return true;
@@ -559,7 +574,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
return 0;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
handle = meta->table[index].handle;
size = zram_get_obj_size(meta, index);
@@ -578,7 +593,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
zcomp_stream_put(zram->comp);
}
zs_unmap_object(meta->mem_pool, handle);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret))
@@ -731,11 +746,11 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
* Free memory associated with this sector
* before overwriting unused sectors.
*/
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
meta->table[index].handle = handle;
zram_set_obj_size(meta, index, comp_len);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
/* Update stats */
atomic64_add(comp_len, &zram->stats.compr_data_size);
@@ -793,7 +808,6 @@ static void zram_bio_discard(struct zram *zram, u32 index,
int offset, struct bio *bio)
{
size_t n = bio->bi_iter.bi_size;
- struct zram_meta *meta = zram->meta;
/*
* zram manages data in physical block size units. Because logical block
@@ -814,9 +828,9 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
while (n >= PAGE_SIZE) {
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
index++;
n -= PAGE_SIZE;
@@ -925,9 +939,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
zram = bdev->bd_disk->private_data;
meta = zram->meta;
- bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_lock(zram, index);
zram_free_page(zram, index);
- bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
+ zram_slot_unlock(zram, index);
atomic64_inc(&zram->stats.notify_free);
}
--
2.7.4
Hi!
On 04/03/2017 08:17 AM, Minchan Kim wrote:
> For architecture(PAGE_SIZE > 4K), zram have supported partial IO.
> However, the mixed code for handling normal/partial IO is too mess,
> error-prone to modify IO handler functions with upcoming feature
> so this patch aims for cleaning up zram's IO handling functions.
>
> Signed-off-by: Minchan Kim <[email protected]>
> ---
> drivers/block/zram/zram_drv.c | 333 +++++++++++++++++++++++-------------------
> 1 file changed, 184 insertions(+), 149 deletions(-)
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 28c2836f8c96..7938f4b98b01 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -45,6 +45,8 @@ static const char *default_compressor = "lzo";
> /* Module params (documentation at end) */
> static unsigned int num_devices = 1;
>
> +static void zram_free_page(struct zram *zram, size_t index);
> +
> static inline bool init_done(struct zram *zram)
> {
> return zram->disksize;
> @@ -98,10 +100,17 @@ static void zram_set_obj_size(struct zram_meta *meta,
> meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
> }
>
> +#if PAGE_SIZE != 4096
> static inline bool is_partial_io(struct bio_vec *bvec)
> {
> return bvec->bv_len != PAGE_SIZE;
> }
> +#else
For page size of 4096 bv_len can still be < 4096 and partial pages should be supported
(uncompress before write etc). ?
> +static inline bool is_partial_io(struct bio_vec *bvec)
> +{
> + return false;
> +}
> +#endif
>
> static void zram_revalidate_disk(struct zram *zram)
> {
> @@ -191,18 +200,6 @@ static bool page_same_filled(void *ptr, unsigned long *element)
> return true;
> }
>
> -static void handle_same_page(struct bio_vec *bvec, unsigned long element)
> -{
> - struct page *page = bvec->bv_page;
> - void *user_mem;
> -
> - user_mem = kmap_atomic(page);
> - zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
> - kunmap_atomic(user_mem);
> -
> - flush_dcache_page(page);
> -}
> -
> static ssize_t initstate_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> @@ -418,6 +415,53 @@ static DEVICE_ATTR_RO(io_stat);
> static DEVICE_ATTR_RO(mm_stat);
> static DEVICE_ATTR_RO(debug_stat);
>
> +static bool zram_special_page_read(struct zram *zram, u32 index,
> + struct page *page,
> + unsigned int offset, unsigned int len)
> +{
> + struct zram_meta *meta = zram->meta;
> +
> + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> + if (unlikely(!meta->table[index].handle) ||
> + zram_test_flag(meta, index, ZRAM_SAME)) {
> + void *mem;
> +
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> + mem = kmap_atomic(page);
> + zram_fill_page(mem + offset, len, meta->table[index].element);
> + kunmap_atomic(mem);
> + return true;
> + }
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> +
> + return false;
> +}
> +
> +static bool zram_special_page_write(struct zram *zram, u32 index,
> + struct page *page)
> +{
> + unsigned long element;
> + void *mem = kmap_atomic(page);
> +
> + if (page_same_filled(mem, &element)) {
> + struct zram_meta *meta = zram->meta;
> +
> + kunmap_atomic(mem);
> + /* Free memory associated with this sector now. */
> + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> + zram_free_page(zram, index);
> + zram_set_flag(meta, index, ZRAM_SAME);
> + zram_set_element(meta, index, element);
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> +
> + atomic64_inc(&zram->stats.same_pages);
> + return true;
> + }
> + kunmap_atomic(mem);
> +
> + return false;
> +}
> +
> static void zram_meta_free(struct zram_meta *meta, u64 disksize)
> {
> size_t num_pages = disksize >> PAGE_SHIFT;
> @@ -504,169 +548,104 @@ static void zram_free_page(struct zram *zram, size_t index)
> zram_set_obj_size(meta, index, 0);
> }
>
> -static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
> +static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
> {
> - int ret = 0;
> - unsigned char *cmem;
> - struct zram_meta *meta = zram->meta;
> + int ret;
> unsigned long handle;
> unsigned int size;
> + void *src, *dst;
> + struct zram_meta *meta = zram->meta;
> +
> + if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
> + return 0;
>
> bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> handle = meta->table[index].handle;
> size = zram_get_obj_size(meta, index);
>
> - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> - zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
> - return 0;
> - }
> -
> - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> + src = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> if (size == PAGE_SIZE) {
> - copy_page(mem, cmem);
> + dst = kmap_atomic(page);
> + copy_page(dst, src);
> + kunmap_atomic(dst);
> + ret = 0;
> } else {
> struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
>
> - ret = zcomp_decompress(zstrm, cmem, size, mem);
> + dst = kmap_atomic(page);
> + ret = zcomp_decompress(zstrm, src, size, dst);
> + kunmap_atomic(dst);
> zcomp_stream_put(zram->comp);
> }
> zs_unmap_object(meta->mem_pool, handle);
> bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
>
> /* Should NEVER happen. Return bio error if it does. */
> - if (unlikely(ret)) {
> + if (unlikely(ret))
> pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
> - return ret;
> - }
>
> - return 0;
> + return ret;
> }
>
> static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
> - u32 index, int offset)
> + u32 index, int offset)
> {
> int ret;
> struct page *page;
> - unsigned char *user_mem, *uncmem = NULL;
> - struct zram_meta *meta = zram->meta;
> - page = bvec->bv_page;
>
> - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> - if (unlikely(!meta->table[index].handle) ||
> - zram_test_flag(meta, index, ZRAM_SAME)) {
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> - handle_same_page(bvec, meta->table[index].element);
> + page = bvec->bv_page;
> + if (zram_special_page_read(zram, index, page, bvec->bv_offset,
> + bvec->bv_len))
> return 0;
> - }
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> -
> - if (is_partial_io(bvec))
> - /* Use a temporary buffer to decompress the page */
> - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
> -
> - user_mem = kmap_atomic(page);
> - if (!is_partial_io(bvec))
> - uncmem = user_mem;
>
> - if (!uncmem) {
> - pr_err("Unable to allocate temp memory\n");
> - ret = -ENOMEM;
> - goto out_cleanup;
> + if (is_partial_io(bvec)) {
> + /* Use a temporary buffer to decompress the page */
> + page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
> + if (!page)
> + return -ENOMEM;
> }
>
> - ret = zram_decompress_page(zram, uncmem, index);
> - /* Should NEVER happen. Return bio error if it does. */
> + ret = zram_decompress_page(zram, page, index);
> if (unlikely(ret))
> - goto out_cleanup;
> + goto out;
>
> - if (is_partial_io(bvec))
> - memcpy(user_mem + bvec->bv_offset, uncmem + offset,
> - bvec->bv_len);
> + if (is_partial_io(bvec)) {
> + void *dst = kmap_atomic(bvec->bv_page);
> + void *src = kmap_atomic(page);
>
> - flush_dcache_page(page);
> - ret = 0;
> -out_cleanup:
> - kunmap_atomic(user_mem);
> + memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
> + kunmap_atomic(src);
> + kunmap_atomic(dst);
> + }
> +out:
> if (is_partial_io(bvec))
> - kfree(uncmem);
> + __free_page(page);
> +
> return ret;
> }
>
> -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
> - int offset)
> +static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
> + struct page *page,
> + unsigned long *out_handle, unsigned int *out_comp_len)
> {
> - int ret = 0;
> - unsigned int clen;
> + int ret;
> + unsigned int comp_len;
> + void *src;
> unsigned long handle = 0;
> - struct page *page;
> - unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
> struct zram_meta *meta = zram->meta;
> - struct zcomp_strm *zstrm = NULL;
> - unsigned long alloced_pages;
> - unsigned long element;
> -
> - page = bvec->bv_page;
> - if (is_partial_io(bvec)) {
> - /*
> - * This is a partial IO. We need to read the full page
> - * before to write the changes.
> - */
> - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
> - if (!uncmem) {
> - ret = -ENOMEM;
> - goto out;
> - }
> - ret = zram_decompress_page(zram, uncmem, index);
> - if (ret)
> - goto out;
> - }
>
> compress_again:
> - user_mem = kmap_atomic(page);
> - if (is_partial_io(bvec)) {
> - memcpy(uncmem + offset, user_mem + bvec->bv_offset,
> - bvec->bv_len);
> - kunmap_atomic(user_mem);
> - user_mem = NULL;
> - } else {
> - uncmem = user_mem;
> - }
> -
> - if (page_same_filled(uncmem, &element)) {
> - if (user_mem)
> - kunmap_atomic(user_mem);
> - /* Free memory associated with this sector now. */
> - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> - zram_free_page(zram, index);
> - zram_set_flag(meta, index, ZRAM_SAME);
> - zram_set_element(meta, index, element);
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> -
> - atomic64_inc(&zram->stats.same_pages);
> - ret = 0;
> - goto out;
> - }
> -
> - zstrm = zcomp_stream_get(zram->comp);
> - ret = zcomp_compress(zstrm, uncmem, &clen);
> - if (!is_partial_io(bvec)) {
> - kunmap_atomic(user_mem);
> - user_mem = NULL;
> - uncmem = NULL;
> - }
> + src = kmap_atomic(page);
> + ret = zcomp_compress(*zstrm, src, &comp_len);
> + kunmap_atomic(src);
>
> if (unlikely(ret)) {
> pr_err("Compression failed! err=%d\n", ret);
> - goto out;
> + return ret;
> }
>
> - src = zstrm->buffer;
> - if (unlikely(clen > max_zpage_size)) {
> - clen = PAGE_SIZE;
> - if (is_partial_io(bvec))
> - src = uncmem;
> - }
> + if (unlikely(comp_len > max_zpage_size))
> + comp_len = PAGE_SIZE;
>
> /*
> * handle allocation has 2 paths:
> @@ -682,50 +661,70 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
> * from the slow path and handle has already been allocated.
> */
> if (!handle)
> - handle = zs_malloc(meta->mem_pool, clen,
> + handle = zs_malloc(meta->mem_pool, comp_len,
> __GFP_KSWAPD_RECLAIM |
> __GFP_NOWARN |
> __GFP_HIGHMEM |
> __GFP_MOVABLE);
> if (!handle) {
> zcomp_stream_put(zram->comp);
> - zstrm = NULL;
> -
> atomic64_inc(&zram->stats.writestall);
> -
> - handle = zs_malloc(meta->mem_pool, clen,
> + handle = zs_malloc(meta->mem_pool, comp_len,
> GFP_NOIO | __GFP_HIGHMEM |
> __GFP_MOVABLE);
> + *zstrm = zcomp_stream_get(zram->comp);
> if (handle)
> goto compress_again;
> + return -ENOMEM;
> + }
>
> - pr_err("Error allocating memory for compressed page: %u, size=%u\n",
> - index, clen);
> - ret = -ENOMEM;
> - goto out;
> + *out_handle = handle;
> + *out_comp_len = comp_len;
> + return 0;
> +}
> +
> +static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
> + u32 index, int offset)
> +{
> + int ret;
> + unsigned long handle;
> + unsigned int comp_len;
> + void *src, *dst;
> + struct zcomp_strm *zstrm;
> + unsigned long alloced_pages;
> + struct zram_meta *meta = zram->meta;
> + struct page *page = bvec->bv_page;
> +
> + if (zram_special_page_write(zram, index, page))
> + return 0;
> +
> + zstrm = zcomp_stream_get(zram->comp);
> + ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
> + if (ret) {
> + zcomp_stream_put(zram->comp);
> + return ret;
> }
>
> alloced_pages = zs_get_total_pages(meta->mem_pool);
> update_used_max(zram, alloced_pages);
>
> if (zram->limit_pages && alloced_pages > zram->limit_pages) {
> + zcomp_stream_put(zram->comp);
> zs_free(meta->mem_pool, handle);
> - ret = -ENOMEM;
> - goto out;
> + return -ENOMEM;
> }
>
> - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
> + dst = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
>
> - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
> + if (comp_len == PAGE_SIZE) {
> src = kmap_atomic(page);
> - copy_page(cmem, src);
> + copy_page(dst, src);
> kunmap_atomic(src);
> } else {
> - memcpy(cmem, src, clen);
> + memcpy(dst, zstrm->buffer, comp_len);
> }
>
> zcomp_stream_put(zram->comp);
> - zstrm = NULL;
> zs_unmap_object(meta->mem_pool, handle);
>
> /*
> @@ -734,19 +733,54 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
> */
> bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> zram_free_page(zram, index);
> -
> meta->table[index].handle = handle;
> - zram_set_obj_size(meta, index, clen);
> + zram_set_obj_size(meta, index, comp_len);
> bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
>
> /* Update stats */
> - atomic64_add(clen, &zram->stats.compr_data_size);
> + atomic64_add(comp_len, &zram->stats.compr_data_size);
> atomic64_inc(&zram->stats.pages_stored);
> + return 0;
> +}
> +
> +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
> + u32 index, int offset)
> +{
> + int ret;
> + struct page *page = NULL;
> + void *src;
> + struct bio_vec vec;
> +
> + vec = *bvec;
> + if (is_partial_io(bvec)) {
> + void *dst;
> + /*
> + * This is a partial IO. We need to read the full page
> + * before to write the changes.
> + */
> + page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
> + if (!page)
> + return -ENOMEM;
> +
> + ret = zram_decompress_page(zram, page, index);
> + if (ret)
> + goto out;
> +
> + src = kmap_atomic(bvec->bv_page);
> + dst = kmap_atomic(page);
> + memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
> + kunmap_atomic(dst);
> + kunmap_atomic(src);
> +
> + vec.bv_page = page;
> + vec.bv_len = PAGE_SIZE;
> + vec.bv_offset = 0;
> + }
> +
> + ret = __zram_bvec_write(zram, &vec, index, offset);
> out:
> - if (zstrm)
> - zcomp_stream_put(zram->comp);
> if (is_partial_io(bvec))
> - kfree(uncmem);
> + __free_page(page);
> return ret;
> }
>
> @@ -802,6 +836,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
> if (!is_write) {
> atomic64_inc(&zram->stats.num_reads);
> ret = zram_bvec_read(zram, bvec, index, offset);
> + flush_dcache_page(bvec->bv_page);
> } else {
> atomic64_inc(&zram->stats.num_writes);
> ret = zram_bvec_write(zram, bvec, index, offset);
>
Hello Minchan,
On (04/03/17 14:17), Minchan Kim wrote:
> With this clean-up phase, I want to use zram's wrapper function
> to lock table access which is more consistent with other zram's
> functions.
which reminds me of...
there was a discussion a long time ago, -rt people absolutely
hate bit spin_locks and they suggested us to replace it with
normal spin_locks (and I promised to take a look at it, but
got interrupted and never really returned back to it).
for !lockdep builds the impact is somewhat small; for lockdep
builds we increase the memory usage, but
a) lockdep builds are debug builds by definition, no one runs lockdep
enabled kernels in production
b) we have lockdep in zram now, which is nice
c) spin_locks probably have better fairness guarantees
what do you think? can we, in this patch set, also replce bit
spin_locks with a normal spin_lock?
-ss
Hi Mika,
On Mon, Apr 03, 2017 at 08:52:33AM +0300, Mika Penttil? wrote:
>
> Hi!
>
> On 04/03/2017 08:17 AM, Minchan Kim wrote:
> > For architecture(PAGE_SIZE > 4K), zram have supported partial IO.
> > However, the mixed code for handling normal/partial IO is too mess,
> > error-prone to modify IO handler functions with upcoming feature
> > so this patch aims for cleaning up zram's IO handling functions.
> >
> > Signed-off-by: Minchan Kim <[email protected]>
> > ---
> > drivers/block/zram/zram_drv.c | 333 +++++++++++++++++++++++-------------------
> > 1 file changed, 184 insertions(+), 149 deletions(-)
> >
> > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > index 28c2836f8c96..7938f4b98b01 100644
> > --- a/drivers/block/zram/zram_drv.c
> > +++ b/drivers/block/zram/zram_drv.c
> > @@ -45,6 +45,8 @@ static const char *default_compressor = "lzo";
> > /* Module params (documentation at end) */
> > static unsigned int num_devices = 1;
> >
> > +static void zram_free_page(struct zram *zram, size_t index);
> > +
> > static inline bool init_done(struct zram *zram)
> > {
> > return zram->disksize;
> > @@ -98,10 +100,17 @@ static void zram_set_obj_size(struct zram_meta *meta,
> > meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
> > }
> >
> > +#if PAGE_SIZE != 4096
> > static inline bool is_partial_io(struct bio_vec *bvec)
> > {
> > return bvec->bv_len != PAGE_SIZE;
> > }
> > +#else
>
> For page size of 4096 bv_len can still be < 4096 and partial pages should be supported
> (uncompress before write etc). ?
zram declares this.
#define ZRAM_LOGICAL_BLOCK_SIZE (1<<12)
blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
blk_queue_logical_block_size(zram->disk->queue,
ZRAM_LOGICAL_BLOCK_SIZE);
So, I thought there is no such partial IO in 4096 page architecture.
Am I missing something? Could you tell the scenario if it happens?
Thanks!
Hi Sergey,
On Mon, Apr 03, 2017 at 03:08:58PM +0900, Sergey Senozhatsky wrote:
> Hello Minchan,
>
> On (04/03/17 14:17), Minchan Kim wrote:
> > With this clean-up phase, I want to use zram's wrapper function
> > to lock table access which is more consistent with other zram's
> > functions.
>
> which reminds me of...
>
> there was a discussion a long time ago, -rt people absolutely
> hate bit spin_locks and they suggested us to replace it with
> normal spin_locks (and I promised to take a look at it, but
> got interrupted and never really returned back to it).
>
> for !lockdep builds the impact is somewhat small; for lockdep
> builds we increase the memory usage, but
>
> a) lockdep builds are debug builds by definition, no one runs lockdep
> enabled kernels in production
>
> b) we have lockdep in zram now, which is nice
It's really one I want to have.
>
> c) spin_locks probably have better fairness guarantees
In fact, it wouldn't be an imporant because zram's slot lock contention
is not heavy.
>
>
> what do you think? can we, in this patch set, also replce bit
> spin_locks with a normal spin_lock?
With changing only zram side from bit_spin_lock to spin_lock,
it would be crippled. I mean zsmalloc should be changed, too
and it's really hard. :(
On 04/03/2017 09:12 AM, Minchan Kim wrote:
> Hi Mika,
>
> On Mon, Apr 03, 2017 at 08:52:33AM +0300, Mika Penttil? wrote:
>>
>> Hi!
>>
>> On 04/03/2017 08:17 AM, Minchan Kim wrote:
>>> For architecture(PAGE_SIZE > 4K), zram have supported partial IO.
>>> However, the mixed code for handling normal/partial IO is too mess,
>>> error-prone to modify IO handler functions with upcoming feature
>>> so this patch aims for cleaning up zram's IO handling functions.
>>>
>>> Signed-off-by: Minchan Kim <[email protected]>
>>> ---
>>> drivers/block/zram/zram_drv.c | 333 +++++++++++++++++++++++-------------------
>>> 1 file changed, 184 insertions(+), 149 deletions(-)
>>>
>>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>>> index 28c2836f8c96..7938f4b98b01 100644
>>> --- a/drivers/block/zram/zram_drv.c
>>> +++ b/drivers/block/zram/zram_drv.c
>>> @@ -45,6 +45,8 @@ static const char *default_compressor = "lzo";
>>> /* Module params (documentation at end) */
>>> static unsigned int num_devices = 1;
>>>
>>> +static void zram_free_page(struct zram *zram, size_t index);
>>> +
>>> static inline bool init_done(struct zram *zram)
>>> {
>>> return zram->disksize;
>>> @@ -98,10 +100,17 @@ static void zram_set_obj_size(struct zram_meta *meta,
>>> meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
>>> }
>>>
>>> +#if PAGE_SIZE != 4096
>>> static inline bool is_partial_io(struct bio_vec *bvec)
>>> {
>>> return bvec->bv_len != PAGE_SIZE;
>>> }
>>> +#else
>>
>> For page size of 4096 bv_len can still be < 4096 and partial pages should be supported
>> (uncompress before write etc). ?
>
> zram declares this.
>
> #define ZRAM_LOGICAL_BLOCK_SIZE (1<<12)
>
> blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
> blk_queue_logical_block_size(zram->disk->queue,
> ZRAM_LOGICAL_BLOCK_SIZE);
>
> So, I thought there is no such partial IO in 4096 page architecture.
> Am I missing something? Could you tell the scenario if it happens?
I think you're right. At least swap operates with min 4096 sizes.
>
> Thanks!
>
On (04/03/17 15:34), Minchan Kim wrote:
[..]
> > c) spin_locks probably have better fairness guarantees
>
> In fact, it wouldn't be an imporant because zram's slot lock contention
> is not heavy.
mostly agree. I think (and I may be mistaken) direct IO
causes contention; but direct IO is probably not a usual
zram workload.
> > what do you think? can we, in this patch set, also replce bit
> > spin_locks with a normal spin_lock?
>
> With changing only zram side from bit_spin_lock to spin_lock,
> it would be crippled. I mean zsmalloc should be changed, too
> and it's really hard. :(
hm, good point.
-ss
On Mon, 3 Apr 2017 14:17:29 +0900 Minchan Kim <[email protected]> wrote:
> Johannes Thumshirn reported system goes the panic when using NVMe over
> Fabrics loopback target with zram.
>
> The reason is zram expects each bvec in bio contains a single page
> but nvme can attach a huge bulk of pages attached to the bio's bvec
> so that zram's index arithmetic could be wrong so that out-of-bound
> access makes panic.
>
> It can be solved by limiting max_sectors with SECTORS_PER_PAGE like
> [1] but it makes zram slow because bio should split with each pages
> so this patch makes zram aware of multiple pages in a bvec so it
> could solve without any regression.
>
> [1] 0bc315381fe9, zram: set physical queue limits to avoid array out of
> bounds accesses
This isn't a cleanup - it fixes a panic (or is it a BUG or is it an
oops, or...)
How serious is this bug? Should the fix be backported into -stable
kernels? etc.
A better description of the bug's behaviour would be appropriate.
> Cc: Jens Axboe <[email protected]>
> Cc: Hannes Reinecke <[email protected]>
> Reported-by: Johannes Thumshirn <[email protected]>
> Tested-by: Johannes Thumshirn <[email protected]>
> Reviewed-by: Johannes Thumshirn <[email protected]>
> Signed-off-by: Johannes Thumshirn <[email protected]>
> Signed-off-by: Minchan Kim <[email protected]>
This signoff trail is confusing. It somewhat implies that Johannes
authored the patch which I don't think is the case?
Hi Andrew,
On Mon, Apr 03, 2017 at 03:45:28PM -0700, Andrew Morton wrote:
> On Mon, 3 Apr 2017 14:17:29 +0900 Minchan Kim <[email protected]> wrote:
>
> > Johannes Thumshirn reported system goes the panic when using NVMe over
> > Fabrics loopback target with zram.
> >
> > The reason is zram expects each bvec in bio contains a single page
> > but nvme can attach a huge bulk of pages attached to the bio's bvec
> > so that zram's index arithmetic could be wrong so that out-of-bound
> > access makes panic.
> >
> > It can be solved by limiting max_sectors with SECTORS_PER_PAGE like
> > [1] but it makes zram slow because bio should split with each pages
> > so this patch makes zram aware of multiple pages in a bvec so it
> > could solve without any regression.
> >
> > [1] 0bc315381fe9, zram: set physical queue limits to avoid array out of
> > bounds accesses
>
> This isn't a cleanup - it fixes a panic (or is it a BUG or is it an
> oops, or...)
I should have written more carefully.
Johannes reported the problem with fix[1] and Jens already sent it to the
mainline to fix it. However, during the discussion, we can solve the problem
nice way so this is revert of [1] plus solving the problem with other way
which no need to split bio.
Thanks.
>
> How serious is this bug? Should the fix be backported into -stable
> kernels? etc.
>
> A better description of the bug's behaviour would be appropriate.
>
> > Cc: Jens Axboe <[email protected]>
> > Cc: Hannes Reinecke <[email protected]>
> > Reported-by: Johannes Thumshirn <[email protected]>
> > Tested-by: Johannes Thumshirn <[email protected]>
> > Reviewed-by: Johannes Thumshirn <[email protected]>
> > Signed-off-by: Johannes Thumshirn <[email protected]>
> > Signed-off-by: Minchan Kim <[email protected]>
>
> This signoff trail is confusing. It somewhat implies that Johannes
> authored the patch which I don't think is the case?
>
>
Hello,
On (04/03/17 14:17), Minchan Kim wrote:
> +static bool zram_special_page_read(struct zram *zram, u32 index,
> + struct page *page,
> + unsigned int offset, unsigned int len)
> +{
> + struct zram_meta *meta = zram->meta;
> +
> + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> + if (unlikely(!meta->table[index].handle) ||
> + zram_test_flag(meta, index, ZRAM_SAME)) {
> + void *mem;
> +
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> + mem = kmap_atomic(page);
> + zram_fill_page(mem + offset, len, meta->table[index].element);
> + kunmap_atomic(mem);
> + return true;
> + }
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> +
> + return false;
> +}
> +
> +static bool zram_special_page_write(struct zram *zram, u32 index,
> + struct page *page)
> +{
> + unsigned long element;
> + void *mem = kmap_atomic(page);
> +
> + if (page_same_filled(mem, &element)) {
> + struct zram_meta *meta = zram->meta;
> +
> + kunmap_atomic(mem);
> + /* Free memory associated with this sector now. */
> + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> + zram_free_page(zram, index);
> + zram_set_flag(meta, index, ZRAM_SAME);
> + zram_set_element(meta, index, element);
> + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> +
> + atomic64_inc(&zram->stats.same_pages);
> + return true;
> + }
> + kunmap_atomic(mem);
> +
> + return false;
> +}
zram_special_page_read() and zram_special_page_write() have a slightly
different locking semantics.
zram_special_page_read() copy-out ZRAM_SAME page having slot unlocked
(can the slot got overwritten in the meantime?), while
zram_special_page_write() keeps the slot locked through out the entire
operation.
> static void zram_meta_free(struct zram_meta *meta, u64 disksize)
> {
> size_t num_pages = disksize >> PAGE_SHIFT;
> @@ -504,169 +548,104 @@ static void zram_free_page(struct zram *zram, size_t index)
> zram_set_obj_size(meta, index, 0);
> }
>
> -static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
> +static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
> {
> - int ret = 0;
> - unsigned char *cmem;
> - struct zram_meta *meta = zram->meta;
> + int ret;
> unsigned long handle;
> unsigned int size;
> + void *src, *dst;
> + struct zram_meta *meta = zram->meta;
> +
> + if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
> + return 0;
>
> bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> handle = meta->table[index].handle;
> size = zram_get_obj_size(meta, index);
>
> - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> - zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
> - return 0;
> - }
> -
> - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> + src = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> if (size == PAGE_SIZE) {
> - copy_page(mem, cmem);
> + dst = kmap_atomic(page);
> + copy_page(dst, src);
> + kunmap_atomic(dst);
> + ret = 0;
> } else {
> struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
>
> - ret = zcomp_decompress(zstrm, cmem, size, mem);
> + dst = kmap_atomic(page);
> + ret = zcomp_decompress(zstrm, src, size, dst);
> + kunmap_atomic(dst);
> zcomp_stream_put(zram->comp);
> }
> zs_unmap_object(meta->mem_pool, handle);
> bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
>
> /* Should NEVER happen. Return bio error if it does. */
> - if (unlikely(ret)) {
> + if (unlikely(ret))
> pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
> - return ret;
> - }
>
> - return 0;
> + return ret;
> }
>
> static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
> - u32 index, int offset)
> + u32 index, int offset)
> {
> int ret;
> struct page *page;
> - unsigned char *user_mem, *uncmem = NULL;
> - struct zram_meta *meta = zram->meta;
> - page = bvec->bv_page;
>
> - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> - if (unlikely(!meta->table[index].handle) ||
> - zram_test_flag(meta, index, ZRAM_SAME)) {
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> - handle_same_page(bvec, meta->table[index].element);
> + page = bvec->bv_page;
> + if (zram_special_page_read(zram, index, page, bvec->bv_offset,
> + bvec->bv_len))
so, I think zram_bvec_read() path calls zram_special_page_read() twice:
a) direct zram_special_page_read() call
b) zram_decompress_page()->zram_special_page_read()
is it supposed to be so?
> return 0;
> - }
> - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> -
> - if (is_partial_io(bvec))
> - /* Use a temporary buffer to decompress the page */
> - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
> -
> - user_mem = kmap_atomic(page);
> - if (!is_partial_io(bvec))
> - uncmem = user_mem;
>
> - if (!uncmem) {
> - pr_err("Unable to allocate temp memory\n");
> - ret = -ENOMEM;
> - goto out_cleanup;
> + if (is_partial_io(bvec)) {
> + /* Use a temporary buffer to decompress the page */
> + page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
> + if (!page)
> + return -ENOMEM;
> }
>
> - ret = zram_decompress_page(zram, uncmem, index);
> - /* Should NEVER happen. Return bio error if it does. */
> + ret = zram_decompress_page(zram, page, index);
> if (unlikely(ret))
> - goto out_cleanup;
> + goto out;
>
> - if (is_partial_io(bvec))
> - memcpy(user_mem + bvec->bv_offset, uncmem + offset,
> - bvec->bv_len);
> + if (is_partial_io(bvec)) {
> + void *dst = kmap_atomic(bvec->bv_page);
> + void *src = kmap_atomic(page);
>
> - flush_dcache_page(page);
> - ret = 0;
> -out_cleanup:
> - kunmap_atomic(user_mem);
> + memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
> + kunmap_atomic(src);
> + kunmap_atomic(dst);
> + }
> +out:
> if (is_partial_io(bvec))
> - kfree(uncmem);
> + __free_page(page);
> +
> return ret;
> }
>
> -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
> - int offset)
> +static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
> + struct page *page,
> + unsigned long *out_handle, unsigned int *out_comp_len)
ok, I see why... not super happy with the `zstrm' magic, but OK. can do.
-ss
On (04/03/17 14:17), Minchan Kim wrote:
> With this clean-up phase, I want to use zram's wrapper function
> to lock table access which is more consistent with other zram's
> functions.
>
> Signed-off-by: Minchan Kim <[email protected]>
Reviewed-by: Sergey Senozhatsky <[email protected]>
-ss
On (04/03/17 14:17), Minchan Kim wrote:
[..]
> -static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
> +static bool zram_meta_alloc(struct zram *zram, u64 disksize)
> {
> size_t num_pages;
> - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
> -
> - if (!meta)
> - return NULL;
>
> num_pages = disksize >> PAGE_SHIFT;
> - meta->table = vzalloc(num_pages * sizeof(*meta->table));
> - if (!meta->table) {
> - pr_err("Error allocating zram address table\n");
> - goto out_error;
> - }
> + zram->table = vzalloc(num_pages * sizeof(*zram->table));
> + if (!zram->table)
> + return false;
>
> - meta->mem_pool = zs_create_pool(pool_name);
> - if (!meta->mem_pool) {
> - pr_err("Error creating memory pool\n");
> - goto out_error;
> + zram->mem_pool = zs_create_pool(zram->disk->disk_name);
> + if (!zram->mem_pool) {
> + vfree(zram->table);
> + return false;
> }
>
> - return meta;
> -
> -out_error:
> - vfree(meta->table);
> - kfree(meta);
> - return NULL;
> + return true;
> }
[..]
> @@ -1020,7 +989,6 @@ static ssize_t disksize_store(struct device *dev,
> {
> u64 disksize;
> struct zcomp *comp;
> - struct zram_meta *meta;
> struct zram *zram = dev_to_zram(dev);
> int err;
>
> @@ -1029,8 +997,7 @@ static ssize_t disksize_store(struct device *dev,
> return -EINVAL;
>
> disksize = PAGE_ALIGN(disksize);
> - meta = zram_meta_alloc(zram->disk->disk_name, disksize);
> - if (!meta)
> + if (!zram_meta_alloc(zram, disksize))
> return -ENOMEM;
>
> comp = zcomp_create(zram->compressor);
> @@ -1048,7 +1015,6 @@ static ssize_t disksize_store(struct device *dev,
> goto out_destroy_comp;
> }
>
> - zram->meta = meta;
> zram->comp = comp;
> zram->disksize = disksize;
> set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
> @@ -1061,7 +1027,7 @@ static ssize_t disksize_store(struct device *dev,
> up_write(&zram->init_lock);
> zcomp_destroy(comp);
> out_free_meta:
> - zram_meta_free(meta, disksize);
> + zram_meta_free(zram, disksize);
> return err;
> }
OK, I don't think it's the same.
we used to have
struct zram_meta *zram_meta_alloc()
{
meta->table = vzalloc()
meta->mem_pool = zs_create_pool();
return meta;
}
disksize_store()
{
meta = zram_meta_alloc();
if (init_done(zram)) {
pr_info("Cannot change disksize for initialized device\n");
goto out_destroy_comp;
}
zram->meta = meta;
^^^^^^^^^^^^^^^^^^
}
now we have
struct zram_meta *zram_meta_alloc()
{
zram->table = vzalloc()
zram->mem_pool = zs_create_pool();
return true;
}
disksize_store()
{
zram_meta_alloc();
^^^^^^^^^^^^^^^^^
if (init_done(zram)) {
pr_info("Cannot change disksize for initialized device\n");
goto out_destroy_comp;
}
}
by the time we call init_done(zram) on already init device zram->table
and zram->mem_pool are overwritten and lost. right?
[..]
> -struct zram_meta {
> +struct zram {
> struct zram_table_entry *table;
> struct zs_pool *mem_pool;
> -};
> -
> -struct zram {
> - struct zram_meta *meta;
> struct zcomp *comp;
> struct gendisk *disk;
> /* Prevent concurrent execution of device init */
we still have several zram_meta_FOO() left overs in zram_drv.c
-ss
On (04/03/17 14:17), Minchan Kim wrote:
> With element, sometime I got confused handle and element access.
> It might be my bad but I think it's time to introduce accessor
> to prevent future idiot like me.
> This patch is just clean-up patch so it shouldn't change any
> behavior.
>
> Signed-off-by: Minchan Kim <[email protected]>
Reviewed-by: Sergey Senozhatsky <[email protected]>
-ss
Hi Sergey,
On Tue, Apr 04, 2017 at 11:17:06AM +0900, Sergey Senozhatsky wrote:
> Hello,
>
> On (04/03/17 14:17), Minchan Kim wrote:
> > +static bool zram_special_page_read(struct zram *zram, u32 index,
> > + struct page *page,
> > + unsigned int offset, unsigned int len)
> > +{
> > + struct zram_meta *meta = zram->meta;
> > +
> > + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> > + if (unlikely(!meta->table[index].handle) ||
> > + zram_test_flag(meta, index, ZRAM_SAME)) {
> > + void *mem;
> > +
> > + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> > + mem = kmap_atomic(page);
> > + zram_fill_page(mem + offset, len, meta->table[index].element);
> > + kunmap_atomic(mem);
> > + return true;
> > + }
> > + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> > +
> > + return false;
> > +}
> > +
> > +static bool zram_special_page_write(struct zram *zram, u32 index,
> > + struct page *page)
> > +{
> > + unsigned long element;
> > + void *mem = kmap_atomic(page);
> > +
> > + if (page_same_filled(mem, &element)) {
> > + struct zram_meta *meta = zram->meta;
> > +
> > + kunmap_atomic(mem);
> > + /* Free memory associated with this sector now. */
> > + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> > + zram_free_page(zram, index);
> > + zram_set_flag(meta, index, ZRAM_SAME);
> > + zram_set_element(meta, index, element);
> > + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> > +
> > + atomic64_inc(&zram->stats.same_pages);
> > + return true;
> > + }
> > + kunmap_atomic(mem);
> > +
> > + return false;
> > +}
>
> zram_special_page_read() and zram_special_page_write() have a slightly
> different locking semantics.
>
> zram_special_page_read() copy-out ZRAM_SAME page having slot unlocked
> (can the slot got overwritten in the meantime?), while
IMHO, yes, it can be overwritten but it doesn't make corruption of kernel.
I mean if such race happens, it's user fault who should protect the race.
zRAM is dumb block device so it can read/write block user request but
one thing we should keep the promise is it shouldn't corrupt the kernel.
Such pov, zram_special_page_read wouldn't be a problem to return
stale data, I think.
> zram_special_page_write() keeps the slot locked through out the entire
> operation.
zram_special_page_write is something different because it updates
zram_table's slot via zram_set_[flag|element] so it should be protected
by zram.
>
> > static void zram_meta_free(struct zram_meta *meta, u64 disksize)
> > {
> > size_t num_pages = disksize >> PAGE_SHIFT;
> > @@ -504,169 +548,104 @@ static void zram_free_page(struct zram *zram, size_t index)
> > zram_set_obj_size(meta, index, 0);
> > }
> >
> > -static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
> > +static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
> > {
> > - int ret = 0;
> > - unsigned char *cmem;
> > - struct zram_meta *meta = zram->meta;
> > + int ret;
> > unsigned long handle;
> > unsigned int size;
> > + void *src, *dst;
> > + struct zram_meta *meta = zram->meta;
> > +
> > + if (zram_special_page_read(zram, index, page, 0, PAGE_SIZE))
> > + return 0;
> >
> > bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> > handle = meta->table[index].handle;
> > size = zram_get_obj_size(meta, index);
> >
> > - if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) {
> > - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> > - zram_fill_page(mem, PAGE_SIZE, meta->table[index].element);
> > - return 0;
> > - }
> > -
> > - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> > + src = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> > if (size == PAGE_SIZE) {
> > - copy_page(mem, cmem);
> > + dst = kmap_atomic(page);
> > + copy_page(dst, src);
> > + kunmap_atomic(dst);
> > + ret = 0;
> > } else {
> > struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
> >
> > - ret = zcomp_decompress(zstrm, cmem, size, mem);
> > + dst = kmap_atomic(page);
> > + ret = zcomp_decompress(zstrm, src, size, dst);
> > + kunmap_atomic(dst);
> > zcomp_stream_put(zram->comp);
> > }
> > zs_unmap_object(meta->mem_pool, handle);
> > bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> >
> > /* Should NEVER happen. Return bio error if it does. */
> > - if (unlikely(ret)) {
> > + if (unlikely(ret))
> > pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
> > - return ret;
> > - }
> >
> > - return 0;
> > + return ret;
> > }
> >
> > static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
> > - u32 index, int offset)
> > + u32 index, int offset)
> > {
> > int ret;
> > struct page *page;
> > - unsigned char *user_mem, *uncmem = NULL;
> > - struct zram_meta *meta = zram->meta;
> > - page = bvec->bv_page;
> >
> > - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
> > - if (unlikely(!meta->table[index].handle) ||
> > - zram_test_flag(meta, index, ZRAM_SAME)) {
> > - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
> > - handle_same_page(bvec, meta->table[index].element);
> > + page = bvec->bv_page;
> > + if (zram_special_page_read(zram, index, page, bvec->bv_offset,
> > + bvec->bv_len))
>
> so, I think zram_bvec_read() path calls zram_special_page_read() twice:
>
> a) direct zram_special_page_read() call
>
> b) zram_decompress_page()->zram_special_page_read()
>
> is it supposed to be so?
Yes, Because zram_decompress_page is called by zram_bvec_write
in case of partial IO. Maybe, we makes it simple with removing
zram_special_page_read in zram_bvec_read. I will look.
On Tue, Apr 04, 2017 at 11:18:51AM +0900, Sergey Senozhatsky wrote:
> On (04/03/17 14:17), Minchan Kim wrote:
> > With this clean-up phase, I want to use zram's wrapper function
> > to lock table access which is more consistent with other zram's
> > functions.
> >
> > Signed-off-by: Minchan Kim <[email protected]>
>
> Reviewed-by: Sergey Senozhatsky <[email protected]>
Thanks!
On Tue, Apr 04, 2017 at 11:31:15AM +0900, Sergey Senozhatsky wrote:
> On (04/03/17 14:17), Minchan Kim wrote:
> [..]
> > -static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize)
> > +static bool zram_meta_alloc(struct zram *zram, u64 disksize)
> > {
> > size_t num_pages;
> > - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
> > -
> > - if (!meta)
> > - return NULL;
> >
> > num_pages = disksize >> PAGE_SHIFT;
> > - meta->table = vzalloc(num_pages * sizeof(*meta->table));
> > - if (!meta->table) {
> > - pr_err("Error allocating zram address table\n");
> > - goto out_error;
> > - }
> > + zram->table = vzalloc(num_pages * sizeof(*zram->table));
> > + if (!zram->table)
> > + return false;
> >
> > - meta->mem_pool = zs_create_pool(pool_name);
> > - if (!meta->mem_pool) {
> > - pr_err("Error creating memory pool\n");
> > - goto out_error;
> > + zram->mem_pool = zs_create_pool(zram->disk->disk_name);
> > + if (!zram->mem_pool) {
> > + vfree(zram->table);
> > + return false;
> > }
> >
> > - return meta;
> > -
> > -out_error:
> > - vfree(meta->table);
> > - kfree(meta);
> > - return NULL;
> > + return true;
> > }
>
> [..]
>
> > @@ -1020,7 +989,6 @@ static ssize_t disksize_store(struct device *dev,
> > {
> > u64 disksize;
> > struct zcomp *comp;
> > - struct zram_meta *meta;
> > struct zram *zram = dev_to_zram(dev);
> > int err;
> >
> > @@ -1029,8 +997,7 @@ static ssize_t disksize_store(struct device *dev,
> > return -EINVAL;
> >
> > disksize = PAGE_ALIGN(disksize);
> > - meta = zram_meta_alloc(zram->disk->disk_name, disksize);
> > - if (!meta)
> > + if (!zram_meta_alloc(zram, disksize))
> > return -ENOMEM;
> >
> > comp = zcomp_create(zram->compressor);
> > @@ -1048,7 +1015,6 @@ static ssize_t disksize_store(struct device *dev,
> > goto out_destroy_comp;
> > }
> >
> > - zram->meta = meta;
> > zram->comp = comp;
> > zram->disksize = disksize;
> > set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
> > @@ -1061,7 +1027,7 @@ static ssize_t disksize_store(struct device *dev,
> > up_write(&zram->init_lock);
> > zcomp_destroy(comp);
> > out_free_meta:
> > - zram_meta_free(meta, disksize);
> > + zram_meta_free(zram, disksize);
> > return err;
> > }
>
> OK, I don't think it's the same.
>
> we used to have
>
> struct zram_meta *zram_meta_alloc()
> {
> meta->table = vzalloc()
> meta->mem_pool = zs_create_pool();
> return meta;
> }
>
>
> disksize_store()
> {
> meta = zram_meta_alloc();
> if (init_done(zram)) {
> pr_info("Cannot change disksize for initialized device\n");
> goto out_destroy_comp;
> }
>
> zram->meta = meta;
> ^^^^^^^^^^^^^^^^^^
> }
>
> now we have
>
> struct zram_meta *zram_meta_alloc()
> {
> zram->table = vzalloc()
> zram->mem_pool = zs_create_pool();
> return true;
> }
>
>
> disksize_store()
> {
> zram_meta_alloc();
> ^^^^^^^^^^^^^^^^^
> if (init_done(zram)) {
> pr_info("Cannot change disksize for initialized device\n");
> goto out_destroy_comp;
> }
> }
>
>
> by the time we call init_done(zram) on already init device zram->table
> and zram->mem_pool are overwritten and lost. right?
Right you are.
I will fix it!
>
>
> [..]
> > -struct zram_meta {
> > +struct zram {
> > struct zram_table_entry *table;
> > struct zs_pool *mem_pool;
> > -};
> > -
> > -struct zram {
> > - struct zram_meta *meta;
> > struct zcomp *comp;
> > struct gendisk *disk;
> > /* Prevent concurrent execution of device init */
>
>
> we still have several zram_meta_FOO() left overs in zram_drv.c
>
> -ss
On (04/03/17 14:17), Minchan Kim wrote:
> Johannes Thumshirn reported system goes the panic when using NVMe over
> Fabrics loopback target with zram.
>
> The reason is zram expects each bvec in bio contains a single page
> but nvme can attach a huge bulk of pages attached to the bio's bvec
> so that zram's index arithmetic could be wrong so that out-of-bound
> access makes panic.
>
> It can be solved by limiting max_sectors with SECTORS_PER_PAGE like
> [1] but it makes zram slow because bio should split with each pages
> so this patch makes zram aware of multiple pages in a bvec so it
> could solve without any regression.
>
> [1] 0bc315381fe9, zram: set physical queue limits to avoid array out of
> bounds accesses
>
> Cc: Jens Axboe <[email protected]>
> Cc: Hannes Reinecke <[email protected]>
> Reported-by: Johannes Thumshirn <[email protected]>
> Tested-by: Johannes Thumshirn <[email protected]>
> Reviewed-by: Johannes Thumshirn <[email protected]>
> Signed-off-by: Johannes Thumshirn <[email protected]>
> Signed-off-by: Minchan Kim <[email protected]>
Reviewed-by: Sergey Senozhatsky <[email protected]>
> + unsigned int remained = bvec.bv_len;
...
> + } while (remained);
a tiny nitpick, "-ed" in variable name looks a bit unusual.
-ss
On Tue, Apr 04, 2017 at 11:31:15AM +0900, Sergey Senozhatsky wrote:
< snip >
>
> [..]
> > -struct zram_meta {
> > +struct zram {
> > struct zram_table_entry *table;
> > struct zs_pool *mem_pool;
> > -};
> > -
> > -struct zram {
> > - struct zram_meta *meta;
> > struct zcomp *comp;
> > struct gendisk *disk;
> > /* Prevent concurrent execution of device init */
>
>
> we still have several zram_meta_FOO() left overs in zram_drv.c
I intentionally used that term because I don't think better name
to wrap logis which allocates table and mempool.
Feel free to suggest if you have better.
Thanks.
On (04/04/17 14:40), Minchan Kim wrote:
> > [..]
> > > -struct zram_meta {
> > > +struct zram {
> > > struct zram_table_entry *table;
> > > struct zs_pool *mem_pool;
> > > -};
> > > -
> > > -struct zram {
> > > - struct zram_meta *meta;
> > > struct zcomp *comp;
> > > struct gendisk *disk;
> > > /* Prevent concurrent execution of device init */
> >
> >
> > we still have several zram_meta_FOO() left overs in zram_drv.c
>
> I intentionally used that term because I don't think better name
> to wrap logis which allocates table and mempool.
ah, it was intentional. um, OK, let's keep zram_meta_foo().
-ss
Hi Andrew,
Could you drop this patchset?
I will send updated version based on Sergey with some bug fixes.
zram-handle-multiple-pages-attached-bios-bvec.patch
zram-partial-io-refactoring.patch
zram-use-zram_slot_lock-instead-of-raw-bit_spin_lock-op.patch
zram-remove-zram_meta-structure.patch
zram-introduce-zram-data-accessor.patch
Thanks!
On Mon, Apr 03, 2017 at 02:17:28PM +0900, Minchan Kim wrote:
> This patchset aims zram clean-up.
>
> [1] clean up multiple pages's bvec handling.
> [2] clean up partial IO handling
> [3-5] clean up zram via using accessor and removing pointless structure.
>
> With [2-5] applied, we can get a few hundred bytes as well as huge
> readibility enhance.
>
> This patchset is based on v4.11-rc4-mmotm-2017-03-30-16-31 and
> *drop* zram-factor-out-partial-io-routine.patch.
>
> x86: 708 byte save
>
> add/remove: 1/1 grow/shrink: 0/11 up/down: 478/-1186 (-708)
> function old new delta
> zram_special_page_read - 478 +478
> zram_reset_device 317 314 -3
> mem_used_max_store 131 128 -3
> compact_store 96 93 -3
> mm_stat_show 203 197 -6
> zram_add 719 712 -7
> zram_slot_free_notify 229 214 -15
> zram_make_request 819 803 -16
> zram_meta_free 128 111 -17
> zram_free_page 180 151 -29
> disksize_store 432 361 -71
> zram_decompress_page.isra 504 - -504
> zram_bvec_rw 2592 2080 -512
> Total: Before=25350773, After=25350065, chg -0.00%
>
> ppc64: 231 byte save
>
> add/remove: 2/0 grow/shrink: 1/9 up/down: 681/-912 (-231)
> function old new delta
> zram_special_page_read - 480 +480
> zram_slot_lock - 200 +200
> vermagic 39 40 +1
> mm_stat_show 256 248 -8
> zram_meta_free 200 184 -16
> zram_add 944 912 -32
> zram_free_page 348 308 -40
> disksize_store 572 492 -80
> zram_decompress_page 664 564 -100
> zram_slot_free_notify 292 160 -132
> zram_make_request 1132 1000 -132
> zram_bvec_rw 2768 2396 -372
> Total: Before=17565825, After=17565594, chg -0.00%
>
> Minchan Kim (5):
> [1] zram: handle multiple pages attached bio's bvec
> [2] zram: partial IO refactoring
> [3] zram: use zram_slot_lock instead of raw bit_spin_lock op
> [4] zram: remove zram_meta structure
> [5] zram: introduce zram data accessor
>
> drivers/block/zram/zram_drv.c | 532 +++++++++++++++++++++---------------------
> drivers/block/zram/zram_drv.h | 6 +-
> 2 files changed, 270 insertions(+), 268 deletions(-)
>
> --
> 2.7.4
>