2013-06-04 16:08:33

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 01/10] zram: kill unused zram_get_num_devices()

Now there's no caller of zram_get_num_devices(), so kill it.
And change zram_devices to static because it's only used in zram_drv.c.

Signed-off-by: Jiang Liu <[email protected]>
---
drivers/staging/zram/zram_drv.c | 7 +------
drivers/staging/zram/zram_drv.h | 2 --
2 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index e34e3fe..0a07de4 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -37,7 +37,7 @@

/* Globals */
static int zram_major;
-struct zram *zram_devices;
+static struct zram *zram_devices;

/* Module params (documentation at end) */
static unsigned int num_devices = 1;
@@ -669,11 +669,6 @@ static void destroy_device(struct zram *zram)
blk_cleanup_queue(zram->queue);
}

-unsigned int zram_get_num_devices(void)
-{
- return num_devices;
-}
-
static int __init zram_init(void)
{
int ret, dev_id;
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 2d1a3f1..a18b66d 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -109,8 +109,6 @@ struct zram {
struct zram_stats stats;
};

-extern struct zram *zram_devices;
-unsigned int zram_get_num_devices(void);
#ifdef CONFIG_SYSFS
extern struct attribute_group zram_disk_attr_group;
#endif
--
1.8.1.2


2013-06-04 16:08:46

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 04/10] zram: destroy all devices on error recovery path in zram_init()

On error recovery path of zram_init(), it leaks the zram device object
causing the failure. So change create_device() to free allocated
resources on error path.

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_drv.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 847d207..88a53f4 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -595,7 +595,7 @@ static const struct block_device_operations zram_devops = {

static int create_device(struct zram *zram, int device_id)
{
- int ret = 0;
+ int ret = -ENOMEM;

init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
@@ -605,7 +605,6 @@ static int create_device(struct zram *zram, int device_id)
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- ret = -ENOMEM;
goto out;
}

@@ -615,11 +614,9 @@ static int create_device(struct zram *zram, int device_id)
/* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
- blk_cleanup_queue(zram->queue);
pr_warn("Error allocating disk structure for device %d\n",
device_id);
- ret = -ENOMEM;
- goto out;
+ goto out_free_queue;
}

zram->disk->major = zram_major;
@@ -648,11 +645,17 @@ static int create_device(struct zram *zram, int device_id)
&zram_disk_attr_group);
if (ret < 0) {
pr_warn("Error creating sysfs group");
- goto out;
+ goto out_free_disk;
}

zram->init_done = 0;
+ return 0;

+out_free_disk:
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+out_free_queue:
+ blk_cleanup_queue(zram->queue);
out:
return ret;
}
--
1.8.1.2

2013-06-04 16:08:58

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 07/10] zram: optimize memory operations with clear_page()/copy_page()

Signed-off-by: Jiang Liu <[email protected]>
---
drivers/staging/zram/zram_drv.c | 27 ++++++++++++++++-----------
1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 1c3974f..a4595ca 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -128,23 +128,26 @@ static void zram_free_page(struct zram *zram, size_t index)
meta->table[index].size = 0;
}

+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
static void handle_zero_page(struct bio_vec *bvec)
{
struct page *page = bvec->bv_page;
void *user_mem;

user_mem = kmap_atomic(page);
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
kunmap_atomic(user_mem);

flush_dcache_page(page);
}

-static inline int is_partial_io(struct bio_vec *bvec)
-{
- return bvec->bv_len != PAGE_SIZE;
-}
-
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
{
int ret = LZO_E_OK;
@@ -154,13 +157,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
unsigned long handle = meta->table[index].handle;

if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- memset(mem, 0, PAGE_SIZE);
+ clear_page(mem);
return 0;
}

cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (meta->table[index].size == PAGE_SIZE)
- memcpy(mem, cmem, PAGE_SIZE);
+ copy_page(mem, cmem);
else
ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
mem, &clen);
@@ -309,11 +312,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);

- if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
+ if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
src = kmap_atomic(page);
- memcpy(cmem, src, clen);
- if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
+ copy_page(cmem, src);
kunmap_atomic(src);
+ } else {
+ memcpy(cmem, src, clen);
+ }

zs_unmap_object(meta->mem_pool, handle);

--
1.8.1.2

2013-06-04 16:08:52

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 05/10] zram: avoid double free in function zram_bvec_write()

When doing a patial write and the whole page is filled with zero,
zram_bvec_write() will free uncmem twice.

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_drv.c | 2 --
1 file changed, 2 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 88a53f4..6a54bb9 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -272,8 +272,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,

if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
zram->stats.pages_zero++;
zram_set_flag(meta, index, ZRAM_ZERO);
ret = 0;
--
1.8.1.2

2013-06-04 16:09:01

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 08/10] zram: protect sysfs handler from invalid memory access

Use zram->init_lock to protect access to zram->meta, otherwise it
may cause invalid memory access if zram->meta has been freed by
zram_reset_device().

This issue may be triggered by:
Thread 1:
while true; do cat mem_used_total; done
Thread 2:
while true; do echo 8M > disksize; echo 1 > reset; done

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_sysfs.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
index 8cb7822..e239d94 100644
--- a/drivers/staging/zram/zram_sysfs.c
+++ b/drivers/staging/zram/zram_sysfs.c
@@ -179,8 +179,10 @@ static ssize_t mem_used_total_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);
struct zram_meta *meta = zram->meta;

+ down_read(&zram->init_lock);
if (zram->init_done)
val = zs_get_total_size_bytes(meta->mem_pool);
+ up_read(&zram->init_lock);

return sprintf(buf, "%llu\n", val);
}
--
1.8.1.2

2013-06-04 16:09:08

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 09/10] zram: minor code cleanup

Minor code cleanup for zram.

Signed-off-by: Jiang Liu <[email protected]>
---
drivers/staging/zram/zram_drv.c | 11 ++++-------
drivers/staging/zram/zram_sysfs.c | 3 +--
2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index a4595ca..088bd6a 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -481,13 +481,11 @@ static void __zram_reset_device(struct zram *zram)
/* Free all pages that are still in this zram device */
for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
unsigned long handle = meta->table[index].handle;
- if (!handle)
- continue;
-
- zs_free(meta->mem_pool, handle);
+ if (handle)
+ zs_free(meta->mem_pool, handle);
}

- zram_meta_free(zram->meta);
+ zram_meta_free(meta);
zram->meta = NULL;
/* Reset stats */
memset(&zram->stats, 0, sizeof(zram->stats));
@@ -686,8 +684,7 @@ static int __init zram_init(void)
int ret, dev_id;

if (num_devices > max_num_devices) {
- pr_warn("Invalid value for num_devices: %u\n",
- num_devices);
+ pr_warn("Invalid value for num_devices: %u\n", num_devices);
ret = -EINVAL;
goto out;
}
diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
index e239d94..2ae6b50 100644
--- a/drivers/staging/zram/zram_sysfs.c
+++ b/drivers/staging/zram/zram_sysfs.c
@@ -187,8 +187,7 @@ static ssize_t mem_used_total_show(struct device *dev,
return sprintf(buf, "%llu\n", val);
}

-static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
- disksize_show, disksize_store);
+static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
--
1.8.1.2

2013-06-04 16:09:28

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
Some architectures have native support of atomic64 operations,
so we can get rid of the spin_lock() in zram_stat64_xxx().
On the other hand, for platforms use generic version of atomic64
implement, it may cause an extra save/restore of the interrupt
flag. So it's a tradeoff.

Signed-off-by: Jiang Liu <[email protected]>
---
drivers/staging/zram/zram_drv.c | 36 ++++++++----------------------------
drivers/staging/zram/zram_drv.h | 14 +++++++-------
drivers/staging/zram/zram_sysfs.c | 21 +++++----------------
3 files changed, 20 insertions(+), 51 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 088bd6a..d174f60 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -42,25 +42,6 @@ static struct zram *zram_devices;
/* Module params (documentation at end) */
static unsigned int num_devices = 1;

-static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc)
-{
- spin_lock(&zram->stat64_lock);
- *v = *v + inc;
- spin_unlock(&zram->stat64_lock);
-}
-
-static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec)
-{
- spin_lock(&zram->stat64_lock);
- *v = *v - dec;
- spin_unlock(&zram->stat64_lock);
-}
-
-static void zram_stat64_inc(struct zram *zram, u64 *v)
-{
- zram_stat64_add(zram, v, 1);
-}
-
static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
@@ -120,8 +101,7 @@ static void zram_free_page(struct zram *zram, size_t index)
if (size <= PAGE_SIZE / 2)
zram->stats.good_compress--;

- zram_stat64_sub(zram, &zram->stats.compr_size,
- meta->table[index].size);
+ atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
zram->stats.pages_stored--;

meta->table[index].handle = 0;
@@ -172,7 +152,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
/* Should NEVER happen. Return bio error if it does. */
if (unlikely(ret != LZO_E_OK)) {
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
- zram_stat64_inc(zram, &zram->stats.failed_reads);
+ atomic64_inc(&zram->stats.failed_reads);
return ret;
}

@@ -326,7 +306,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
meta->table[index].size = clen;

/* Update stats */
- zram_stat64_add(zram, &zram->stats.compr_size, clen);
+ atomic64_add(clen, &zram->stats.compr_size);
zram->stats.pages_stored++;
if (clen <= PAGE_SIZE / 2)
zram->stats.good_compress++;
@@ -336,7 +316,7 @@ out:
kfree(uncmem);

if (ret)
- zram_stat64_inc(zram, &zram->stats.failed_writes);
+ atomic64_inc(&zram->stats.failed_writes);
return ret;
}

@@ -373,10 +353,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)

switch (rw) {
case READ:
- zram_stat64_inc(zram, &zram->stats.num_reads);
+ atomic64_inc(&zram->stats.num_reads);
break;
case WRITE:
- zram_stat64_inc(zram, &zram->stats.num_writes);
+ atomic64_inc(&zram->stats.num_writes);
break;
}

@@ -453,7 +433,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
goto error;

if (!valid_io_request(zram, bio)) {
- zram_stat64_inc(zram, &zram->stats.invalid_io);
+ atomic64_inc(&zram->stats.invalid_io);
goto error;
}

@@ -590,7 +570,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
down_write(&zram->lock);
zram_free_page(zram, index);
up_write(&zram->lock);
- zram_stat64_inc(zram, &zram->stats.notify_free);
+ atomic64_inc(&zram->stats.notify_free);
}

static const struct block_device_operations zram_devops = {
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index a18b66d..581caf0 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -70,13 +70,13 @@ struct table {
} __aligned(4);

struct zram_stats {
- u64 compr_size; /* compressed size of pages stored */
- u64 num_reads; /* failed + successful */
- u64 num_writes; /* --do-- */
- u64 failed_reads; /* should NEVER! happen */
- u64 failed_writes; /* can happen when memory is too low */
- u64 invalid_io; /* non-page-aligned I/O requests */
- u64 notify_free; /* no. of swap slot free notifications */
+ atomic64_t compr_size; /* compressed size of pages stored */
+ atomic64_t num_reads; /* failed + successful */
+ atomic64_t num_writes; /* --do-- */
+ atomic64_t failed_reads; /* should NEVER! happen */
+ atomic64_t failed_writes; /* can happen when memory is too low */
+ atomic64_t invalid_io; /* non-page-aligned I/O requests */
+ atomic64_t notify_free; /* no. of swap slot free notifications */
u32 pages_zero; /* no. of zero filled pages */
u32 pages_stored; /* no. of pages currently stored */
u32 good_compress; /* % of pages with compression ratio<=50% */
diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
index 2ae6b50..ab02d35 100644
--- a/drivers/staging/zram/zram_sysfs.c
+++ b/drivers/staging/zram/zram_sysfs.c
@@ -19,17 +19,6 @@

#include "zram_drv.h"

-static u64 zram_stat64_read(struct zram *zram, u64 *v)
-{
- u64 val;
-
- spin_lock(&zram->stat64_lock);
- val = *v;
- spin_unlock(&zram->stat64_lock);
-
- return val;
-}
-
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
@@ -116,7 +105,7 @@ static ssize_t num_reads_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_reads));
+ (u64)atomic64_read(&zram->stats.num_reads));
}

static ssize_t num_writes_show(struct device *dev,
@@ -125,7 +114,7 @@ static ssize_t num_writes_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.num_writes));
+ (u64)atomic64_read(&zram->stats.num_writes));
}

static ssize_t invalid_io_show(struct device *dev,
@@ -134,7 +123,7 @@ static ssize_t invalid_io_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.invalid_io));
+ (u64)atomic64_read(&zram->stats.invalid_io));
}

static ssize_t notify_free_show(struct device *dev,
@@ -143,7 +132,7 @@ static ssize_t notify_free_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.notify_free));
+ (u64)atomic64_read(&zram->stats.notify_free));
}

static ssize_t zero_pages_show(struct device *dev,
@@ -169,7 +158,7 @@ static ssize_t compr_data_size_show(struct device *dev,
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- zram_stat64_read(zram, &zram->stats.compr_size));
+ (u64)atomic64_read(&zram->stats.compr_size));
}

static ssize_t mem_used_total_show(struct device *dev,
--
1.8.1.2

2013-06-04 16:09:59

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 06/10] zram: avoid access beyond the zram device

Function valid_io_request() should verify the entire request doesn't
exceed the zram device, otherwise it will cause invalid memory access.

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_drv.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 6a54bb9..1c3974f 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -428,6 +428,10 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
return 0;
}

+ if (unlikely((bio->bi_sector << SECTOR_SHIFT) + bio->bi_size >=
+ zram->disksize))
+ return 0;
+
/* I/O request is valid */
return 1;
}
--
1.8.1.2

2013-06-04 16:08:44

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 03/10] zram: use zram->lock to protect zram_free_page() in swap free notify path

zram_free_page() is protected by down_write(&zram->lock) when called by
zram_bvec_write(), but there's no such protection when called by
zram_slot_free_notify(), which may cause wrong states to zram object.

There are two possible consequences of this issue:
1) It may cause invalid memory access if we read from a zram device used
as swap device. (Not sure whether it's legal to make read/write
requests to a zram device used as swap device.)
2) It may cause some fields (bad_compress, good_compress, pages_stored)
in zram->stats wrong if the swap layer makes concurrently call to
zram_slot_free_notify().

So enhance zram_slot_free_notify() to acquire writer lock on zram->lock
before calling zram_free_page().

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_drv.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 5a2f20b..847d207 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -582,7 +582,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
struct zram *zram;

zram = bdev->bd_disk->private_data;
+ down_write(&zram->lock);
zram_free_page(zram, index);
+ up_write(&zram->lock);
zram_stat64_inc(zram, &zram->stats.notify_free);
}

--
1.8.1.2

2013-06-04 16:08:42

by Jiang Liu

[permalink] [raw]
Subject: [PATCH v2 02/10] zram: avoid invalid memory access in zram_exit()

Memory for zram->disk object may have already been freed after returning
from destroy_device(zram), then it's unsafe for zram_reset_device(zram)
to access zram->disk again.

We can't solve this bug by flipping the order of destroy_device(zram)
and zram_reset_device(zram), that will cause deadlock issues to the
zram sysfs handler.

So fix it by holding an extra reference to zram->disk before calling
destroy_device(zram).

Signed-off-by: Jiang Liu <[email protected]>
Cc: [email protected]
---
drivers/staging/zram/zram_drv.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 0a07de4..5a2f20b 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -722,8 +722,10 @@ static void __exit zram_exit(void)
for (i = 0; i < num_devices; i++) {
zram = &zram_devices[i];

+ get_disk(zram->disk);
destroy_device(zram);
zram_reset_device(zram);
+ put_disk(zram->disk);
}

unregister_blkdev(zram_major, "zram");
--
1.8.1.2

2013-06-05 05:52:45

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 01/10] zram: kill unused zram_get_num_devices()

Hello,

On Wed, Jun 05, 2013 at 12:05:59AM +0800, Jiang Liu wrote:
> Now there's no caller of zram_get_num_devices(), so kill it.
> And change zram_devices to static because it's only used in zram_drv.c.
>
> Signed-off-by: Jiang Liu <[email protected]>

I am looking at next-20130604 and am still seeing that dev_to_zram is using it.
Maybe I expect Greg picked your eariler patch yesterday but I didn't Cced and
your patchset doesn't say anything about that although your patchset's title
says it's v2. So I'd like to make it clear.

Thanks.

> ---
> drivers/staging/zram/zram_drv.c | 7 +------
> drivers/staging/zram/zram_drv.h | 2 --
> 2 files changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index e34e3fe..0a07de4 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -37,7 +37,7 @@
>
> /* Globals */
> static int zram_major;
> -struct zram *zram_devices;
> +static struct zram *zram_devices;
>
> /* Module params (documentation at end) */
> static unsigned int num_devices = 1;
> @@ -669,11 +669,6 @@ static void destroy_device(struct zram *zram)
> blk_cleanup_queue(zram->queue);
> }
>
> -unsigned int zram_get_num_devices(void)
> -{
> - return num_devices;
> -}
> -
> static int __init zram_init(void)
> {
> int ret, dev_id;
> diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
> index 2d1a3f1..a18b66d 100644
> --- a/drivers/staging/zram/zram_drv.h
> +++ b/drivers/staging/zram/zram_drv.h
> @@ -109,8 +109,6 @@ struct zram {
> struct zram_stats stats;
> };
>
> -extern struct zram *zram_devices;
> -unsigned int zram_get_num_devices(void);
> #ifdef CONFIG_SYSFS
> extern struct attribute_group zram_disk_attr_group;
> #endif
> --
> 1.8.1.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Kind regards,
Minchan Kim

2013-06-05 06:04:48

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 02/10] zram: avoid invalid memory access in zram_exit()

On Wed, Jun 05, 2013 at 12:06:00AM +0800, Jiang Liu wrote:
> Memory for zram->disk object may have already been freed after returning
> from destroy_device(zram), then it's unsafe for zram_reset_device(zram)
> to access zram->disk again.
>
> We can't solve this bug by flipping the order of destroy_device(zram)
> and zram_reset_device(zram), that will cause deadlock issues to the
> zram sysfs handler.

What kinds of deadlock happen?
Could you elaborate it more?

--
Kind regards,
Minchan Kim

2013-06-05 06:29:39

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 03/10] zram: use zram->lock to protect zram_free_page() in swap free notify path

On Wed, Jun 05, 2013 at 12:06:01AM +0800, Jiang Liu wrote:
> zram_free_page() is protected by down_write(&zram->lock) when called by
> zram_bvec_write(), but there's no such protection when called by
> zram_slot_free_notify(), which may cause wrong states to zram object.
>
> There are two possible consequences of this issue:
> 1) It may cause invalid memory access if we read from a zram device used
> as swap device. (Not sure whether it's legal to make read/write
> requests to a zram device used as swap device.)

I think it's possible if the permission is allowed so we should take care
of that. But I'd like to clear your comment about "invalid memory access".

As I read the code, one of the problem we can encounter by race between
zram_bvec_read and zram_slot_free_notify is BUG_ON(!handle) on zs_map_object
or pr_err("Decompression failed xxxx) on zram_decompress_page.
Otherwise, it would be able to access different swap block with
user request's one.

Could you please expand your vague "invalid memory access"?


> 2) It may cause some fields (bad_compress, good_compress, pages_stored)
> in zram->stats wrong if the swap layer makes concurrently call to
> zram_slot_free_notify().
>
> So enhance zram_slot_free_notify() to acquire writer lock on zram->lock
> before calling zram_free_page().

OK.

And please add the comment struct zram->lock feild, too.

Thanks.

>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]
> ---
> drivers/staging/zram/zram_drv.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index 5a2f20b..847d207 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -582,7 +582,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
> struct zram *zram;
>
> zram = bdev->bd_disk->private_data;
> + down_write(&zram->lock);
> zram_free_page(zram, index);
> + up_write(&zram->lock);
> zram_stat64_inc(zram, &zram->stats.notify_free);
> }
>
> --
> 1.8.1.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Kind regards,
Minchan Kim

2013-06-05 06:40:17

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 04/10] zram: destroy all devices on error recovery path in zram_init()

On Wed, Jun 05, 2013 at 12:06:02AM +0800, Jiang Liu wrote:
> On error recovery path of zram_init(), it leaks the zram device object
> causing the failure. So change create_device() to free allocated
> resources on error path.
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]
Acked-by: Minchan Kim <[email protected]>

--
Kind regards,
Minchan Kim

2013-06-05 06:42:03

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 05/10] zram: avoid double free in function zram_bvec_write()

On Wed, Jun 05, 2013 at 12:06:03AM +0800, Jiang Liu wrote:
> When doing a patial write and the whole page is filled with zero,
> zram_bvec_write() will free uncmem twice.
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]
Acked-by: Minchan Kim <[email protected]>

--
Kind regards,
Minchan Kim

2013-06-05 06:43:06

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 06/10] zram: avoid access beyond the zram device

On Wed, Jun 05, 2013 at 12:06:04AM +0800, Jiang Liu wrote:
> Function valid_io_request() should verify the entire request doesn't
> exceed the zram device, otherwise it will cause invalid memory access.

Right but you need to explain what invalid memory access is and what's
the result from that to user.

>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]
> ---
> drivers/staging/zram/zram_drv.c | 4 ++++
> 1 file changed, 4 insertions(+)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index 6a54bb9..1c3974f 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -428,6 +428,10 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
> return 0;
> }
>
> + if (unlikely((bio->bi_sector << SECTOR_SHIFT) + bio->bi_size >=
> + zram->disksize))
> + return 0;
> +
> /* I/O request is valid */
> return 1;
> }
> --
> 1.8.1.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Kind regards,
Minchan Kim

2013-06-05 06:57:51

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 07/10] zram: optimize memory operations with clear_page()/copy_page()

Looks good but although we can know your intention easily with only
subject, it would be better to add body in description.

More questionable thing is I'm not sure Greg accepts this optimization
patch(NOT bug fix) because he claimed he will not accept any patches
from zram/zsmalloc except plain bug fix patch if we don't give any
plan about promotion from staging tree and we didn't yet.(Maybe I need
to talk about zsmalloc promotion with z* family people and mm guys)

If it could be acceptable, we have a few patches to optimize zram, too.

On Wed, Jun 05, 2013 at 12:06:05AM +0800, Jiang Liu wrote:
> Signed-off-by: Jiang Liu <[email protected]>
> ---
> drivers/staging/zram/zram_drv.c | 27 ++++++++++++++++-----------
> 1 file changed, 16 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index 1c3974f..a4595ca 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -128,23 +128,26 @@ static void zram_free_page(struct zram *zram, size_t index)
> meta->table[index].size = 0;
> }
>
> +static inline int is_partial_io(struct bio_vec *bvec)
> +{
> + return bvec->bv_len != PAGE_SIZE;
> +}
> +
> static void handle_zero_page(struct bio_vec *bvec)
> {
> struct page *page = bvec->bv_page;
> void *user_mem;
>
> user_mem = kmap_atomic(page);
> - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
> + if (is_partial_io(bvec))
> + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
> + else
> + clear_page(user_mem);
> kunmap_atomic(user_mem);
>
> flush_dcache_page(page);
> }
>
> -static inline int is_partial_io(struct bio_vec *bvec)
> -{
> - return bvec->bv_len != PAGE_SIZE;
> -}
> -
> static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
> {
> int ret = LZO_E_OK;
> @@ -154,13 +157,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
> unsigned long handle = meta->table[index].handle;
>
> if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
> - memset(mem, 0, PAGE_SIZE);
> + clear_page(mem);
> return 0;
> }
>
> cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
> if (meta->table[index].size == PAGE_SIZE)
> - memcpy(mem, cmem, PAGE_SIZE);
> + copy_page(mem, cmem);
> else
> ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
> mem, &clen);
> @@ -309,11 +312,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
> }
> cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
>
> - if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
> + if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
> src = kmap_atomic(page);
> - memcpy(cmem, src, clen);
> - if ((clen == PAGE_SIZE) && !is_partial_io(bvec))
> + copy_page(cmem, src);
> kunmap_atomic(src);
> + } else {
> + memcpy(cmem, src, clen);
> + }
>
> zs_unmap_object(meta->mem_pool, handle);
>
> --
> 1.8.1.2
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/

--
Kind regards,
Minchan Kim

2013-06-05 07:03:36

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 08/10] zram: protect sysfs handler from invalid memory access

On Wed, Jun 05, 2013 at 12:06:06AM +0800, Jiang Liu wrote:
> Use zram->init_lock to protect access to zram->meta, otherwise it
> may cause invalid memory access if zram->meta has been freed by
> zram_reset_device().
>
> This issue may be triggered by:
> Thread 1:
> while true; do cat mem_used_total; done
> Thread 2:
> while true; do echo 8M > disksize; echo 1 > reset; done
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]

Acked-by: Minchan Kim <[email protected]>


--
Kind regards,
Minchan Kim

2013-06-05 07:13:28

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 09/10] zram: minor code cleanup

On Wed, Jun 05, 2013 at 12:06:07AM +0800, Jiang Liu wrote:
> Minor code cleanup for zram.
>
> Signed-off-by: Jiang Liu <[email protected]>
> ---
> drivers/staging/zram/zram_drv.c | 11 ++++-------
> drivers/staging/zram/zram_sysfs.c | 3 +--
> 2 files changed, 5 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index a4595ca..088bd6a 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -481,13 +481,11 @@ static void __zram_reset_device(struct zram *zram)
> /* Free all pages that are still in this zram device */
> for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
> unsigned long handle = meta->table[index].handle;
> - if (!handle)
> - continue;
> -
> - zs_free(meta->mem_pool, handle);
> + if (handle)
> + zs_free(meta->mem_pool, handle);
> }
>
> - zram_meta_free(zram->meta);
> + zram_meta_free(meta);
> zram->meta = NULL;
> /* Reset stats */
> memset(&zram->stats, 0, sizeof(zram->stats));
> @@ -686,8 +684,7 @@ static int __init zram_init(void)
> int ret, dev_id;
>
> if (num_devices > max_num_devices) {
> - pr_warn("Invalid value for num_devices: %u\n",
> - num_devices);
> + pr_warn("Invalid value for num_devices: %u\n", num_devices);
> ret = -EINVAL;
> goto out;
> }
> diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c
> index e239d94..2ae6b50 100644
> --- a/drivers/staging/zram/zram_sysfs.c
> +++ b/drivers/staging/zram/zram_sysfs.c
> @@ -187,8 +187,7 @@ static ssize_t mem_used_total_show(struct device *dev,
> return sprintf(buf, "%llu\n", val);
> }
>
> -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
> - disksize_show, disksize_store);
> +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store);
> static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
> static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
> static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
> --
> 1.8.1.2

What's the effect from this patch?

Does it make code more clean than old?
Does it make code faster than old?
Does it make code size smaller than old?

Thanks for the clean up, really but let's leave as it is if you have a
strong reason. Because I'd like to reduce git blame/bisect step to find
the real culprit when some mess happens.

--
Kind regards,
Minchan Kim

2013-06-05 09:06:54

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 01/10] zram: kill unused zram_get_num_devices()

Hi Jiang,

Could you please state the changes that happened between v1 and v2.
That would be really helpful to the reviewers, and probably even more
useful to the maintainer.

Thanks,
Jerome

On 06/04/2013 06:05 PM, Jiang Liu wrote:
> Now there's no caller of zram_get_num_devices(), so kill it.
> And change zram_devices to static because it's only used in zram_drv.c.
>
> Signed-off-by: Jiang Liu <[email protected]>
> ---
> drivers/staging/zram/zram_drv.c | 7 +------
> drivers/staging/zram/zram_drv.h | 2 --
> 2 files changed, 1 insertion(+), 8 deletions(-)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index e34e3fe..0a07de4 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -37,7 +37,7 @@
>
> /* Globals */
> static int zram_major;
> -struct zram *zram_devices;
> +static struct zram *zram_devices;
>
> /* Module params (documentation at end) */
> static unsigned int num_devices = 1;
> @@ -669,11 +669,6 @@ static void destroy_device(struct zram *zram)
> blk_cleanup_queue(zram->queue);
> }
>
> -unsigned int zram_get_num_devices(void)
> -{
> - return num_devices;
> -}
> -
> static int __init zram_init(void)
> {
> int ret, dev_id;
> diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
> index 2d1a3f1..a18b66d 100644
> --- a/drivers/staging/zram/zram_drv.h
> +++ b/drivers/staging/zram/zram_drv.h
> @@ -109,8 +109,6 @@ struct zram {
> struct zram_stats stats;
> };
>
> -extern struct zram *zram_devices;
> -unsigned int zram_get_num_devices(void);
> #ifdef CONFIG_SYSFS
> extern struct attribute_group zram_disk_attr_group;
> #endif
>

2013-06-05 10:26:26

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 03/10] zram: use zram->lock to protect zram_free_page() in swap free notify path

On 06/04/2013 06:06 PM, Jiang Liu wrote:
> zram_free_page() is protected by down_write(&zram->lock) when called by
> zram_bvec_write(), but there's no such protection when called by
> zram_slot_free_notify(), which may cause wrong states to zram object.
>
> There are two possible consequences of this issue:
> 1) It may cause invalid memory access if we read from a zram device used
> as swap device. (Not sure whether it's legal to make read/write
> requests to a zram device used as swap device.)

It's never a good idea to mess with a swap device in use, but AFAICT
nothing prevents it (save for file permissions and admin sanity).

Jerome

> 2) It may cause some fields (bad_compress, good_compress, pages_stored)
> in zram->stats wrong if the swap layer makes concurrently call to
> zram_slot_free_notify().
>
> So enhance zram_slot_free_notify() to acquire writer lock on zram->lock
> before calling zram_free_page().
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]
> ---
> drivers/staging/zram/zram_drv.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index 5a2f20b..847d207 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -582,7 +582,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
> struct zram *zram;
>
> zram = bdev->bd_disk->private_data;
> + down_write(&zram->lock);
> zram_free_page(zram, index);
> + up_write(&zram->lock);
> zram_stat64_inc(zram, &zram->stats.notify_free);
> }
>
>

2013-06-05 10:40:32

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 04/10] zram: destroy all devices on error recovery path in zram_init()

On 06/04/2013 06:06 PM, Jiang Liu wrote:
> On error recovery path of zram_init(), it leaks the zram device object
> causing the failure. So change create_device() to free allocated
> resources on error path.
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]

Acked-by: Jerome Marchand <[email protected]>

2013-06-05 12:02:30

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On 06/04/2013 06:06 PM, Jiang Liu wrote:
> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
> Some architectures have native support of atomic64 operations,
> so we can get rid of the spin_lock() in zram_stat64_xxx().
> On the other hand, for platforms use generic version of atomic64
> implement, it may cause an extra save/restore of the interrupt
> flag. So it's a tradeoff.
>
> Signed-off-by: Jiang Liu <[email protected]>

Before optimizing stats, I'd like to make sure that they're correct.
What makes 64 bits fields so different that they need atomicity while
32 bits wouldn't? Actually all of them save compr_size only increase,
which would make a race less critical than for 32 bits fields that all
can go up and down (if a decrement overwrites a increment, the counter
can wrap around zero).

Jerome

2013-06-05 15:09:43

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 01/10] zram: kill unused zram_get_num_devices()

On Wed 05 Jun 2013 01:52:42 PM CST, Minchan Kim wrote:
> Hello,
>
> On Wed, Jun 05, 2013 at 12:05:59AM +0800, Jiang Liu wrote:
>> Now there's no caller of zram_get_num_devices(), so kill it.
>> And change zram_devices to static because it's only used in zram_drv.c.
>>
>> Signed-off-by: Jiang Liu <[email protected]>
>
> I am looking at next-20130604 and am still seeing that dev_to_zram is using it.
> Maybe I expect Greg picked your eariler patch yesterday but I didn't Cced and
> your patchset doesn't say anything about that although your patchset's title
> says it's v2. So I'd like to make it clear.
Hi Minchan,
The first patch of the patchset should be "zram: simplify and
optimize
dev_to_zram()" but I missed that one in v2. Sorry for the inconvenience.
Will resend v3 with the first patch.
Regards!
Gerry

>
> Thanks.
>
>> ---
>> drivers/staging/zram/zram_drv.c | 7 +------
>> drivers/staging/zram/zram_drv.h | 2 --
>> 2 files changed, 1 insertion(+), 8 deletions(-)
>>
>> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
>> index e34e3fe..0a07de4 100644
>> --- a/drivers/staging/zram/zram_drv.c
>> +++ b/drivers/staging/zram/zram_drv.c
>> @@ -37,7 +37,7 @@
>>
>> /* Globals */
>> static int zram_major;
>> -struct zram *zram_devices;
>> +static struct zram *zram_devices;
>>
>> /* Module params (documentation at end) */
>> static unsigned int num_devices = 1;
>> @@ -669,11 +669,6 @@ static void destroy_device(struct zram *zram)
>> blk_cleanup_queue(zram->queue);
>> }
>>
>> -unsigned int zram_get_num_devices(void)
>> -{
>> - return num_devices;
>> -}
>> -
>> static int __init zram_init(void)
>> {
>> int ret, dev_id;
>> diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
>> index 2d1a3f1..a18b66d 100644
>> --- a/drivers/staging/zram/zram_drv.h
>> +++ b/drivers/staging/zram/zram_drv.h
>> @@ -109,8 +109,6 @@ struct zram {
>> struct zram_stats stats;
>> };
>>
>> -extern struct zram *zram_devices;
>> -unsigned int zram_get_num_devices(void);
>> #ifdef CONFIG_SYSFS
>> extern struct attribute_group zram_disk_attr_group;
>> #endif
>> --
>> 1.8.1.2
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>

2013-06-05 15:24:29

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 02/10] zram: avoid invalid memory access in zram_exit()

On Wed 05 Jun 2013 02:04:42 PM CST, Minchan Kim wrote:
> On Wed, Jun 05, 2013 at 12:06:00AM +0800, Jiang Liu wrote:
>> Memory for zram->disk object may have already been freed after returning
>> from destroy_device(zram), then it's unsafe for zram_reset_device(zram)
>> to access zram->disk again.
>>
>> We can't solve this bug by flipping the order of destroy_device(zram)
>> and zram_reset_device(zram), that will cause deadlock issues to the
>> zram sysfs handler.
>
> What kinds of deadlock happen?
> Could you elaborate it more?
>
Hi Minchan,
I will try my best to explain the situation.
1) if we change the order as:
zram_reset_device(zram);
destroy_device(zram);
zram->meta could be rebuilt by disksize_store() just between
zram_reset_device(zram) and destroy_device(zram) because all sysfs
entries are still available, which then cause memory leak.

2) If we change the code as:
down_write(&zram->init_lock);
__zram_reset_device(zram);
destroy_device(zram);
up_write(&zram->init_lock);
Then it will cause a typical deadlock as:
Thread1:
1) acquire init_lock
2) destroy_device(zram);
2.a)sysfs_remove_group()
2.b) wait for all sysfs files to be closed and released.

Thread2:
1) echo xxm > disksize
2) open sysfs file and call disksize_store()
3) disksize_store() tries to acquire zram->init_lock

Then deadlock.

Regards!
Gerry

2013-06-05 16:00:29

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 03/10] zram: use zram->lock to protect zram_free_page() in swap free notify path

On Wed 05 Jun 2013 02:29:35 PM CST, Minchan Kim wrote:
> On Wed, Jun 05, 2013 at 12:06:01AM +0800, Jiang Liu wrote:
>> zram_free_page() is protected by down_write(&zram->lock) when called by
>> zram_bvec_write(), but there's no such protection when called by
>> zram_slot_free_notify(), which may cause wrong states to zram object.
>>
>> There are two possible consequences of this issue:
>> 1) It may cause invalid memory access if we read from a zram device used
>> as swap device. (Not sure whether it's legal to make read/write
>> requests to a zram device used as swap device.)
>
> I think it's possible if the permission is allowed so we should take care
> of that. But I'd like to clear your comment about "invalid memory access".
>
> As I read the code, one of the problem we can encounter by race between
> zram_bvec_read and zram_slot_free_notify is BUG_ON(!handle) on zs_map_object
> or pr_err("Decompression failed xxxx) on zram_decompress_page.
> Otherwise, it would be able to access different swap block with
> user request's one.
>
> Could you please expand your vague "invalid memory access"?
Hi Minchan,
I have no enough time to read all zsmalloc code yet, but I'm
thinking of this scenario:
a user does a "dd" from a zram device used as swap device.

A possible sequence may be:
Thread 1: zram_bvec_rw()->zram_bvec_read()->zram_decompress_page()

Just before zram_decompress_page() calls zs_map_object().
Thread 2:
zram_slot_free_notify()->zram_free_page()->zs_free()->free_zspage()->reset_page()/free_page()

Then:
Thread 1: zs_map_object()

->get_zspage_mapping(get_first_page(page), &class_idx, &fg)
->get_first_page()
->return
page->first_page /* may be invalid address */

Actually I guess it may cause different invalid memory access,
depending on the
executing order of thread 1 and thread 2.

Regards!
Gerry
>
>
>> 2) It may cause some fields (bad_compress, good_compress, pages_stored)
>> in zram->stats wrong if the swap layer makes concurrently call to
>> zram_slot_free_notify().
>>
>> So enhance zram_slot_free_notify() to acquire writer lock on zram->lock
>> before calling zram_free_page().
>
> OK.
>
> And please add the comment struct zram->lock feild, too.
>
> Thanks.
>
>>
>> Signed-off-by: Jiang Liu <[email protected]>
>> Cc: [email protected]
>> ---
>> drivers/staging/zram/zram_drv.c | 2 ++
>> 1 file changed, 2 insertions(+)
>>
>> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
>> index 5a2f20b..847d207 100644
>> --- a/drivers/staging/zram/zram_drv.c
>> +++ b/drivers/staging/zram/zram_drv.c
>> @@ -582,7 +582,9 @@ static void zram_slot_free_notify(struct block_device *bdev,
>> struct zram *zram;
>>
>> zram = bdev->bd_disk->private_data;
>> + down_write(&zram->lock);
>> zram_free_page(zram, index);
>> + up_write(&zram->lock);
>> zram_stat64_inc(zram, &zram->stats.notify_free);
>> }
>>
>> --
>> 1.8.1.2
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at http://www.tux.org/lkml/
>

2013-06-05 16:21:58

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On Wed 05 Jun 2013 08:02:12 PM CST, Jerome Marchand wrote:
> On 06/04/2013 06:06 PM, Jiang Liu wrote:
>> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
>> Some architectures have native support of atomic64 operations,
>> so we can get rid of the spin_lock() in zram_stat64_xxx().
>> On the other hand, for platforms use generic version of atomic64
>> implement, it may cause an extra save/restore of the interrupt
>> flag. So it's a tradeoff.
>>
>> Signed-off-by: Jiang Liu <[email protected]>
>
> Before optimizing stats, I'd like to make sure that they're correct.
> What makes 64 bits fields so different that they need atomicity while
> 32 bits wouldn't? Actually all of them save compr_size only increase,
> which would make a race less critical than for 32 bits fields that all
> can go up and down (if a decrement overwrites a increment, the counter
> can wrap around zero).
>
> Jerome
>
Hi Jerome,
I'm not sure about the design decision, but I could give a
guess here.
1) All 32-bit counters are only modified by
zram_bvec_write()/zram_page_free()
and is/should be protected by down_write(&zram->lock).
2) __zram_make_request() modifies some 64-bit counters without
protection.
3) zram_bvec_write() modifies some 64-bit counters and it's protected
with
down_read(&zram->lock).
4) It's always safe for sysfs handler to read 32bit counters.
5) It's unsafe for sysfs handler to read 64bit counters on 32bit
platforms.

So it does work with current design, but very hard to understand.
Suggest to use atomic_t for 32bit counters too for maintainability,
though may be a little slower.
Any suggestion?
Regards!
Gerry

2013-06-06 09:37:35

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On 06/05/2013 06:21 PM, Jiang Liu wrote:
> On Wed 05 Jun 2013 08:02:12 PM CST, Jerome Marchand wrote:
>> On 06/04/2013 06:06 PM, Jiang Liu wrote:
>>> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
>>> Some architectures have native support of atomic64 operations,
>>> so we can get rid of the spin_lock() in zram_stat64_xxx().
>>> On the other hand, for platforms use generic version of atomic64
>>> implement, it may cause an extra save/restore of the interrupt
>>> flag. So it's a tradeoff.
>>>
>>> Signed-off-by: Jiang Liu <[email protected]>
>>
>> Before optimizing stats, I'd like to make sure that they're correct.
>> What makes 64 bits fields so different that they need atomicity while
>> 32 bits wouldn't? Actually all of them save compr_size only increase,
>> which would make a race less critical than for 32 bits fields that all
>> can go up and down (if a decrement overwrites a increment, the counter
>> can wrap around zero).
>>
>> Jerome
>>
> Hi Jerome,
> I'm not sure about the design decision, but I could give a
> guess here.
> 1) All 32-bit counters are only modified by
> zram_bvec_write()/zram_page_free()
> and is/should be protected by down_write(&zram->lock).

Good point!

> 2) __zram_make_request() modifies some 64-bit counters without
> protection.
> 3) zram_bvec_write() modifies some 64-bit counters and it's protected
> with
> down_read(&zram->lock).

I assume you mean down_write().

> 4) It's always safe for sysfs handler to read 32bit counters.
> 5) It's unsafe for sysfs handler to read 64bit counters on 32bit
> platforms.

I was unaware of that.

>
> So it does work with current design, but very hard to understand.
> Suggest to use atomic_t for 32bit counters too for maintainability,
> though may be a little slower.
> Any suggestion?

If atomic counter aren't necessary, no need to use them, but a comment
in zram_stats definition would be nice. Could you add one in your next
version of this patch?

Thanks
Jerome

> Regards!
> Gerry
>

2013-06-06 14:37:00

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On Thu 06 Jun 2013 05:37:19 PM CST, Jerome Marchand wrote:
> On 06/05/2013 06:21 PM, Jiang Liu wrote:
>> On Wed 05 Jun 2013 08:02:12 PM CST, Jerome Marchand wrote:
>>> On 06/04/2013 06:06 PM, Jiang Liu wrote:
>>>> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
>>>> Some architectures have native support of atomic64 operations,
>>>> so we can get rid of the spin_lock() in zram_stat64_xxx().
>>>> On the other hand, for platforms use generic version of atomic64
>>>> implement, it may cause an extra save/restore of the interrupt
>>>> flag. So it's a tradeoff.
>>>>
>>>> Signed-off-by: Jiang Liu <[email protected]>
>>>
>>> Before optimizing stats, I'd like to make sure that they're correct.
>>> What makes 64 bits fields so different that they need atomicity while
>>> 32 bits wouldn't? Actually all of them save compr_size only increase,
>>> which would make a race less critical than for 32 bits fields that all
>>> can go up and down (if a decrement overwrites a increment, the counter
>>> can wrap around zero).
>>>
>>> Jerome
>>>
>> Hi Jerome,
>> I'm not sure about the design decision, but I could give a
>> guess here.
>> 1) All 32-bit counters are only modified by
>> zram_bvec_write()/zram_page_free()
>> and is/should be protected by down_write(&zram->lock).
>
> Good point!
>
>> 2) __zram_make_request() modifies some 64-bit counters without
>> protection.
>> 3) zram_bvec_write() modifies some 64-bit counters and it's protected
>> with
>> down_read(&zram->lock).
>
> I assume you mean down_write().
Actually I mean "zram_bvec_read()" instead of "zram_bvec_write()".
Read side is protected by down_read(&zram->lock).
Regards!
Gerry

>
>> 4) It's always safe for sysfs handler to read 32bit counters.
>> 5) It's unsafe for sysfs handler to read 64bit counters on 32bit
>> platforms.
>
> I was unaware of that.
>
>>
>> So it does work with current design, but very hard to understand.
>> Suggest to use atomic_t for 32bit counters too for maintainability,
>> though may be a little slower.
>> Any suggestion?
>
> If atomic counter aren't necessary, no need to use them, but a comment
> in zram_stats definition would be nice. Could you add one in your next
> version of this patch?
Sure!

>
> Thanks
> Jerome
>
>> Regards!
>> Gerry
>>
>

2013-06-06 15:07:32

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On 06/06/2013 04:36 PM, Jiang Liu wrote:
> On Thu 06 Jun 2013 05:37:19 PM CST, Jerome Marchand wrote:
>> On 06/05/2013 06:21 PM, Jiang Liu wrote:
>>> On Wed 05 Jun 2013 08:02:12 PM CST, Jerome Marchand wrote:
>>>> On 06/04/2013 06:06 PM, Jiang Liu wrote:
>>>>> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
>>>>> Some architectures have native support of atomic64 operations,
>>>>> so we can get rid of the spin_lock() in zram_stat64_xxx().
>>>>> On the other hand, for platforms use generic version of atomic64
>>>>> implement, it may cause an extra save/restore of the interrupt
>>>>> flag. So it's a tradeoff.
>>>>>
>>>>> Signed-off-by: Jiang Liu <[email protected]>
>>>>
>>>> Before optimizing stats, I'd like to make sure that they're correct.
>>>> What makes 64 bits fields so different that they need atomicity while
>>>> 32 bits wouldn't? Actually all of them save compr_size only increase,
>>>> which would make a race less critical than for 32 bits fields that all
>>>> can go up and down (if a decrement overwrites a increment, the counter
>>>> can wrap around zero).
>>>>
>>>> Jerome
>>>>
>>> Hi Jerome,
>>> I'm not sure about the design decision, but I could give a
>>> guess here.
>>> 1) All 32-bit counters are only modified by
>>> zram_bvec_write()/zram_page_free()
>>> and is/should be protected by down_write(&zram->lock).
>>
>> Good point!
>>
>>> 2) __zram_make_request() modifies some 64-bit counters without
>>> protection.
>>> 3) zram_bvec_write() modifies some 64-bit counters and it's protected
>>> with
>>> down_read(&zram->lock).
>>
>> I assume you mean down_write().
> Actually I mean "zram_bvec_read()" instead of "zram_bvec_write()".

Indeed, failed_reads is updated there.

> Read side is protected by down_read(&zram->lock).

which does not prevent concurrent read access. The counter isn't
protected by zram_lock here.

Jerome

> Regards!
> Gerry
>
>>
>>> 4) It's always safe for sysfs handler to read 32bit counters.
>>> 5) It's unsafe for sysfs handler to read 64bit counters on 32bit
>>> platforms.
>>
>> I was unaware of that.
>>
>>>
>>> So it does work with current design, but very hard to understand.
>>> Suggest to use atomic_t for 32bit counters too for maintainability,
>>> though may be a little slower.
>>> Any suggestion?
>>
>> If atomic counter aren't necessary, no need to use them, but a comment
>> in zram_stats definition would be nice. Could you add one in your next
>> version of this patch?
> Sure!
>
>>
>> Thanks
>> Jerome
>>
>>> Regards!
>>> Gerry
>>>
>>
>
>

2013-06-06 15:56:53

by Jiang Liu

[permalink] [raw]
Subject: Re: [PATCH v2 10/10] zram: use atomic64_xxx() to replace zram_stat64_xxx()

On Thu 06 Jun 2013 11:07:13 PM CST, Jerome Marchand wrote:
> On 06/06/2013 04:36 PM, Jiang Liu wrote:
>> On Thu 06 Jun 2013 05:37:19 PM CST, Jerome Marchand wrote:
>>> On 06/05/2013 06:21 PM, Jiang Liu wrote:
>>>> On Wed 05 Jun 2013 08:02:12 PM CST, Jerome Marchand wrote:
>>>>> On 06/04/2013 06:06 PM, Jiang Liu wrote:
>>>>>> Use atomic64_xxx() to replace open-coded zram_stat64_xxx().
>>>>>> Some architectures have native support of atomic64 operations,
>>>>>> so we can get rid of the spin_lock() in zram_stat64_xxx().
>>>>>> On the other hand, for platforms use generic version of atomic64
>>>>>> implement, it may cause an extra save/restore of the interrupt
>>>>>> flag. So it's a tradeoff.
>>>>>>
>>>>>> Signed-off-by: Jiang Liu <[email protected]>
>>>>>
>>>>> Before optimizing stats, I'd like to make sure that they're correct.
>>>>> What makes 64 bits fields so different that they need atomicity while
>>>>> 32 bits wouldn't? Actually all of them save compr_size only increase,
>>>>> which would make a race less critical than for 32 bits fields that all
>>>>> can go up and down (if a decrement overwrites a increment, the counter
>>>>> can wrap around zero).
>>>>>
>>>>> Jerome
>>>>>
>>>> Hi Jerome,
>>>> I'm not sure about the design decision, but I could give a
>>>> guess here.
>>>> 1) All 32-bit counters are only modified by
>>>> zram_bvec_write()/zram_page_free()
>>>> and is/should be protected by down_write(&zram->lock).
>>>
>>> Good point!
>>>
>>>> 2) __zram_make_request() modifies some 64-bit counters without
>>>> protection.
>>>> 3) zram_bvec_write() modifies some 64-bit counters and it's protected
>>>> with
>>>> down_read(&zram->lock).
>>>
>>> I assume you mean down_write().
>> Actually I mean "zram_bvec_read()" instead of "zram_bvec_write()".
>
> Indeed, failed_reads is updated there.
>
>> Read side is protected by down_read(&zram->lock).
>
> which does not prevent concurrent read access. The counter isn't
> protected by zram_lock here.
Hi Jerome,
Yeah, it's true. The down_read(&zram->lock) can't protect
modification
to stat counters because there may be multiple readers. So zram uses
zram_stat_inc() here.
Regards!
Gerry

>
> Jerome
>
>> Regards!
>> Gerry
>>
>>>
>>>> 4) It's always safe for sysfs handler to read 32bit counters.
>>>> 5) It's unsafe for sysfs handler to read 64bit counters on 32bit
>>>> platforms.
>>>
>>> I was unaware of that.
>>>
>>>>
>>>> So it does work with current design, but very hard to understand.
>>>> Suggest to use atomic_t for 32bit counters too for maintainability,
>>>> though may be a little slower.
>>>> Any suggestion?
>>>
>>> If atomic counter aren't necessary, no need to use them, but a comment
>>> in zram_stats definition would be nice. Could you add one in your next
>>> version of this patch?
>> Sure!
>>
>>>
>>> Thanks
>>> Jerome
>>>
>>>> Regards!
>>>> Gerry
>>>>
>>>
>>
>>
>

2013-06-07 09:35:29

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH v2 05/10] zram: avoid double free in function zram_bvec_write()

On 06/04/2013 06:06 PM, Jiang Liu wrote:
> When doing a patial write and the whole page is filled with zero,
> zram_bvec_write() will free uncmem twice.
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: [email protected]

I thought I acked this one already. I guess I didn't.

Acked-by: Jerome Marchand <[email protected]>