LinuxLists.cc - [PATCH 1/2] staging: zram: add Crypto API support

2013-07-30 12:32:07

Subject: [PATCH 1/2] staging: zram: add Crypto API support

Current version of zram does not allow any substitution of a default
compression algorithm. Therefore, I decided to change the existing
implementation of page compression by adding Crypto API compability.

All direct calls to lzo1x compression/decompression methods are now
replaced by calls consistent with Crypto. Also, I removed "workmem"
field from struct zram_meta, as it was there for lzo1x purposes only
and is no longer needed. Finally, I added a set of functions required
by Crypto API to work properly.

In order to substitute the default algorithm (lzo), change the value
of zram.compressor module parameter to a proper name (e.g. lz4).

Signed-off-by: Piotr Sarna <[email protected]>
Acked-by: Bartlomiej Zolnierkiewicz <[email protected]>
Signed-off-by: Kyungmin Park <[email protected]>
---
drivers/staging/zram/Kconfig | 5 +-
drivers/staging/zram/zram_drv.c | 106 ++++++++++++++++++++++++++++++++-------
drivers/staging/zram/zram_drv.h | 1 -
3 files changed, 89 insertions(+), 23 deletions(-)

diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig
index 983314c..b51cac5 100644
--- a/drivers/staging/zram/Kconfig
+++ b/drivers/staging/zram/Kconfig
@@ -1,8 +1,7 @@
config ZRAM
tristate "Compressed RAM block device support"
- depends on BLOCK && SYSFS && ZSMALLOC
- select LZO_COMPRESS
- select LZO_DECOMPRESS
+ depends on BLOCK && SYSFS && ZSMALLOC && CRYPTO=y
+ select CRYPTO_LZO
default n
help
Creates virtual block devices called /dev/zramX (X = 0, 1, ...).
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 7ebf91d..d6f1f67 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -29,12 +29,14 @@
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <linux/lzo.h>
+#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/vmalloc.h>

#include "zram_drv.h"

+#define ZRAM_COMPRESSOR_DEFAULT "lzo"
+
/* Globals */
static int zram_major;
static struct zram *zram_devices;
@@ -42,6 +44,64 @@ static struct zram *zram_devices;
/* Module params (documentation at end) */
static unsigned int num_devices = 1;

+/* Cryptographic API features */
+static char *zram_compressor = ZRAM_COMPRESSOR_DEFAULT;
+static struct crypto_comp *zram_comp_tfm;
+
+enum comp_op {
+ ZRAM_COMPOP_COMPRESS,
+ ZRAM_COMPOP_DECOMPRESS
+};
+
+static int zram_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen)
+{
+ struct crypto_comp *tfm;
+ int ret;
+
+ tfm = zram_comp_tfm;
+ switch (op) {
+ case ZRAM_COMPOP_COMPRESS:
+ ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
+ break;
+ case ZRAM_COMPOP_DECOMPRESS:
+ ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int __init zram_comp_init(void)
+{
+ int ret;
+ ret = crypto_has_comp(zram_compressor, 0, 0);
+ if (!ret) {
+ pr_info("%s is not available\n", zram_compressor);
+ zram_compressor = ZRAM_COMPRESSOR_DEFAULT;
+ ret = crypto_has_comp(zram_compressor, 0, 0);
+ if (!ret)
+ return -ENODEV;
+ }
+ pr_info("using %s compressor\n", zram_compressor);
+
+ /* alloc transform */
+ zram_comp_tfm = crypto_alloc_comp(zram_compressor, 0, 0);
+ if (!zram_comp_tfm)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline void zram_comp_exit(void)
+{
+ if (zram_comp_tfm)
+ crypto_free_comp(zram_comp_tfm);
+}
+/* end of Cryptographic API features */
+
static inline struct zram *dev_to_zram(struct device *dev)
{
return (struct zram *)dev_to_disk(dev)->private_data;
@@ -190,7 +250,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
static void zram_meta_free(struct zram_meta *meta)
{
zs_destroy_pool(meta->mem_pool);
- kfree(meta->compress_workmem);
free_pages((unsigned long)meta->compress_buffer, 1);
vfree(meta->table);
kfree(meta);
@@ -203,15 +262,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
if (!meta)
goto out;

- meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!meta->compress_workmem)
- goto free_meta;
-
meta->compress_buffer =
(void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
if (!meta->compress_buffer) {
pr_err("Error allocating compressor buffer space\n");
- goto free_workmem;
+ goto free_meta;
}

num_pages = disksize >> PAGE_SHIFT;
@@ -233,8 +288,6 @@ free_table:
vfree(meta->table);
free_buffer:
free_pages((unsigned long)meta->compress_buffer, 1);
-free_workmem:
- kfree(meta->compress_workmem);
free_meta:
kfree(meta);
meta = NULL;
@@ -314,7 +367,7 @@ static void zram_free_page(struct zram *zram, size_t index)

static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
{
- int ret = LZO_E_OK;
+ int ret = 0;
size_t clen = PAGE_SIZE;
unsigned char *cmem;
struct zram_meta *meta = zram->meta;
@@ -329,12 +382,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
if (meta->table[index].size == PAGE_SIZE)
copy_page(mem, cmem);
else
- ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
- mem, &clen);
+ ret = zram_comp_op(ZRAM_COMPOP_DECOMPRESS, cmem,
+ meta->table[index].size, mem, &clen);
+
zs_unmap_object(meta->mem_pool, handle);

/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
+ if (unlikely(ret != 0)) {
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
atomic64_inc(&zram->stats.failed_reads);
return ret;
@@ -374,7 +428,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,

ret = zram_decompress_page(zram, uncmem, index);
/* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK))
+ if (unlikely(ret != 0))
goto out_cleanup;

if (is_partial_io(bvec))
@@ -440,8 +494,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
goto out;
}

- ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
- meta->compress_workmem);
+ ret = zram_comp_op(ZRAM_COMPOP_COMPRESS, uncmem,
+ PAGE_SIZE, src, &clen);

if (!is_partial_io(bvec)) {
kunmap_atomic(user_mem);
@@ -449,7 +503,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
uncmem = NULL;
}

- if (unlikely(ret != LZO_E_OK)) {
+ if (unlikely(ret != 0)) {
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
@@ -854,18 +908,26 @@ static int __init zram_init(void)
{
int ret, dev_id;

+ /* Initialize Cryptographic API */
+ pr_info("Loading Crypto API features\n");
+ if (zram_comp_init()) {
+ pr_err("Compressor initialization failed\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
if (num_devices > max_num_devices) {
pr_warn("Invalid value for num_devices: %u\n",
num_devices);
ret = -EINVAL;
- goto out;
+ goto free_comp;
}

zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_warn("Unable to get major number\n");
ret = -EBUSY;
- goto out;
+ goto free_comp;
}

/* Allocate the device array and initialize each one */
@@ -891,6 +953,8 @@ free_devices:
kfree(zram_devices);
unregister:
unregister_blkdev(zram_major, "zram");
+free_comp:
+ zram_comp_exit();
out:
return ret;
}
@@ -912,6 +976,7 @@ static void __exit zram_exit(void)
unregister_blkdev(zram_major, "zram");

kfree(zram_devices);
+ zram_comp_exit();
pr_debug("Cleanup done!\n");
}

@@ -921,6 +986,9 @@ module_exit(zram_exit);
module_param(num_devices, uint, 0);
MODULE_PARM_DESC(num_devices, "Number of zram devices");

+module_param_named(compressor, zram_compressor, charp, 0);
+MODULE_PARM_DESC(compressor, "Compressor type");
+
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <[email protected]>");
MODULE_DESCRIPTION("Compressed RAM Block Device");
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 9e57bfb..93f4d14 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -88,7 +88,6 @@ struct zram_stats {
};

struct zram_meta {
- void *compress_workmem;
void *compress_buffer;
struct table *table;
struct zs_pool *mem_pool;
--
1.7.9.5

2013-07-30 12:32:21

by Piotr Sarna

[permalink] [raw]

Subject: [PATCH 2/2] staging: zram: add per-cpu support to Crypto

Since original zram code did not implement any per-cpu operations,
my previous patch (staging: zram: add Crypto API support) did not
include them either.

This patch complements the first one with per-cpu support for Crypto,
allocating tfms buffer separately for each online processor.
Changes are based on zswap and zcache per-cpu code.

Basic tests (concurrent writing several 10-40MB chunks to zram) performed
on an ARM-based EXYNOS4412 Quad-Core showed that per-cpu code provides
noticeable time saving, ranging between 30-40% for LZO and LZ4 compressors.
Sample data (LZO): writing 160MB, 40MB per thread took 0.60s with per-cpu
code included and approximately 0.80s without per-cpu support.

Signed-off-by: Piotr Sarna <[email protected]>
Acked-by: Bartlomiej Zolnierkiewicz <[email protected]>
Signed-off-by: Kyungmin Park <[email protected]>
---
drivers/staging/zram/zram_drv.c | 146 +++++++++++++++++++++++++++++++++------
drivers/staging/zram/zram_drv.h | 1 -
2 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index d6f1f67..3dd5085 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -30,6 +30,7 @@
#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/crypto.h>
+#include <linux/cpu.h>
#include <linux/string.h>
#include <linux/vmalloc.h>

@@ -46,7 +47,7 @@ static unsigned int num_devices = 1;

/* Cryptographic API features */
static char *zram_compressor = ZRAM_COMPRESSOR_DEFAULT;
-static struct crypto_comp *zram_comp_tfm;
+static struct crypto_comp * __percpu *zram_comp_pcpu_tfms;

enum comp_op {
ZRAM_COMPOP_COMPRESS,
@@ -59,7 +60,7 @@ static int zram_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
struct crypto_comp *tfm;
int ret;

- tfm = zram_comp_tfm;
+ tfm = *per_cpu_ptr(zram_comp_pcpu_tfms, get_cpu());
switch (op) {
case ZRAM_COMPOP_COMPRESS:
ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
@@ -70,6 +71,7 @@ static int zram_comp_op(enum comp_op op, const u8 *src, unsigned int slen,
default:
ret = -EINVAL;
}
+ put_cpu();

return ret;
}
@@ -87,9 +89,9 @@ static int __init zram_comp_init(void)
}
pr_info("using %s compressor\n", zram_compressor);

- /* alloc transform */
- zram_comp_tfm = crypto_alloc_comp(zram_compressor, 0, 0);
- if (!zram_comp_tfm)
+ /* alloc percpu transforms */
+ zram_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
+ if (!zram_comp_pcpu_tfms)
return -ENOMEM;

return 0;
@@ -97,8 +99,110 @@ static int __init zram_comp_init(void)

static inline void zram_comp_exit(void)
{
- if (zram_comp_tfm)
- crypto_free_comp(zram_comp_tfm);
+ /* free percpu transforms */
+ if (zram_comp_pcpu_tfms)
+ free_percpu(zram_comp_pcpu_tfms);
+}
+
+
+/* Crypto API features: percpu code */
+#define ZRAM_DSTMEM_ORDER 1
+static DEFINE_PER_CPU(u8 *, zram_dstmem);
+
+static int zram_comp_cpu_up(int cpu)
+{
+ struct crypto_comp *tfm;
+
+ tfm = crypto_alloc_comp(zram_compressor, 0, 0);
+ if (IS_ERR(tfm))
+ return NOTIFY_BAD;
+ *per_cpu_ptr(zram_comp_pcpu_tfms, cpu) = tfm;
+ return NOTIFY_OK;
+}
+
+static void zram_comp_cpu_down(int cpu)
+{
+ struct crypto_comp *tfm;
+
+ tfm = *per_cpu_ptr(zram_comp_pcpu_tfms, cpu);
+ crypto_free_comp(tfm);
+ *per_cpu_ptr(zram_comp_pcpu_tfms, cpu) = NULL;
+}
+
+static int zram_cpu_notifier(struct notifier_block *nb,
+ unsigned long action, void *pcpu)
+{
+ int ret;
+ int cpu = (long) pcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ ret = zram_comp_cpu_up(cpu);
+ if (ret != NOTIFY_OK) {
+ pr_err("zram: can't allocate compressor xform\n");
+ return ret;
+ }
+ per_cpu(zram_dstmem, cpu) = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_REPEAT, ZRAM_DSTMEM_ORDER);
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ zram_comp_cpu_down(cpu);
+ free_pages((unsigned long) per_cpu(zram_dstmem, cpu),
+ ZRAM_DSTMEM_ORDER);
+ per_cpu(zram_dstmem, cpu) = NULL;
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block zram_cpu_notifier_block = {
+ .notifier_call = zram_cpu_notifier
+};
+
+/* Helper function releasing tfms from online cpus */
+static inline void zram_comp_cpus_down(void)
+{
+ int cpu;
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ void *pcpu = (void *)(long)cpu;
+ zram_cpu_notifier(&zram_cpu_notifier_block,
+ CPU_UP_CANCELED, pcpu);
+ }
+ put_online_cpus();
+}
+
+static int zram_cpu_init(void)
+{
+ int ret;
+ unsigned int cpu;
+
+ ret = register_cpu_notifier(&zram_cpu_notifier_block);
+ if (ret) {
+ pr_err("zram: can't register cpu notifier\n");
+ goto out;
+ }
+
+ get_online_cpus();
+ for_each_online_cpu(cpu) {
+ void *pcpu = (void *)(long)cpu;
+ if (zram_cpu_notifier(&zram_cpu_notifier_block,
+ CPU_UP_PREPARE, pcpu) != NOTIFY_OK)
+ goto cleanup;
+ }
+ put_online_cpus();
+ return ret;
+
+cleanup:
+ zram_comp_cpus_down();
+
+out:
+ put_online_cpus();
+ return -ENOMEM;
}
/* end of Cryptographic API features */

@@ -250,7 +354,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
static void zram_meta_free(struct zram_meta *meta)
{
zs_destroy_pool(meta->mem_pool);
- free_pages((unsigned long)meta->compress_buffer, 1);
vfree(meta->table);
kfree(meta);
}
@@ -262,18 +365,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
if (!meta)
goto out;

- meta->compress_buffer =
- (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
- if (!meta->compress_buffer) {
- pr_err("Error allocating compressor buffer space\n");
- goto free_meta;
- }
-
num_pages = disksize >> PAGE_SHIFT;
meta->table = vzalloc(num_pages * sizeof(*meta->table));
if (!meta->table) {
pr_err("Error allocating zram address table\n");
- goto free_buffer;
+ goto free_meta;
}

meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
@@ -286,8 +382,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)

free_table:
vfree(meta->table);
-free_buffer:
- free_pages((unsigned long)meta->compress_buffer, 1);
free_meta:
kfree(meta);
meta = NULL;
@@ -455,7 +549,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct zram_meta *meta = zram->meta;

page = bvec->bv_page;
- src = meta->compress_buffer;
+ src = __get_cpu_var(zram_dstmem);
+ BUG_ON(src == NULL);

if (is_partial_io(bvec)) {
/*
@@ -916,18 +1011,24 @@ static int __init zram_init(void)
goto out;
}

+ if (zram_cpu_init()) {
+ pr_err("Per-cpu initialization failed\n");
+ ret = -ENOMEM;
+ goto free_comp;
+ }
+
if (num_devices > max_num_devices) {
pr_warn("Invalid value for num_devices: %u\n",
num_devices);
ret = -EINVAL;
- goto free_comp;
+ goto free_cpu_comp;
}

zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_warn("Unable to get major number\n");
ret = -EBUSY;
- goto free_comp;
+ goto free_cpu_comp;
}

/* Allocate the device array and initialize each one */
@@ -953,6 +1054,8 @@ free_devices:
kfree(zram_devices);
unregister:
unregister_blkdev(zram_major, "zram");
+free_cpu_comp:
+ zram_comp_cpus_down();
free_comp:
zram_comp_exit();
out:
@@ -976,6 +1079,7 @@ static void __exit zram_exit(void)
unregister_blkdev(zram_major, "zram");

kfree(zram_devices);
+ zram_comp_cpus_down();
zram_comp_exit();
pr_debug("Cleanup done!\n");
}
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 93f4d14..474474a 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -88,7 +88,6 @@ struct zram_stats {
};

struct zram_meta {
- void *compress_buffer;
struct table *table;
struct zs_pool *mem_pool;
};
--
1.7.9.5

2013-07-30 13:52:17

by Greg Kroah-Hartman

[permalink] [raw]

Subject: Re: [PATCH 1/2] staging: zram: add Crypto API support

On Tue, Jul 30, 2013 at 02:30:48PM +0200, Piotr Sarna wrote:
> Current version of zram does not allow any substitution of a default
> compression algorithm. Therefore, I decided to change the existing
> implementation of page compression by adding Crypto API compability.

As I have stated before, I want this code to get merged out of the
drivers/staging/ area _before_ any new features get added to it. People
are taking too much time adding new stuff, like this, and no one seems
to be working to get it merged to the proper place anymore.

So again, I'm going to have to say no to a new feature here, sorry.
zcache, zram, and zsmalloc need to get cleaned up and merged out of
staging before anything new can be added to them.

Or, if no one is working on that, I guess I can just delete them,
right?...

thanks,

greg k-h

2013-07-30 18:46:36

by Sergey Senozhatsky

[permalink] [raw]

Subject: Re: [PATCH 2/2] staging: zram: add per-cpu support to Crypto

On (07/30/13 14:30), Piotr Sarna wrote:
>
> Since original zram code did not implement any per-cpu operations,
> my previous patch (staging: zram: add Crypto API support) did not
> include them either.
>
> This patch complements the first one with per-cpu support for Crypto,
> allocating tfms buffer separately for each online processor.
> Changes are based on zswap and zcache per-cpu code.
>
> Basic tests (concurrent writing several 10-40MB chunks to zram) performed
> on an ARM-based EXYNOS4412 Quad-Core showed that per-cpu code provides
> noticeable time saving, ranging between 30-40% for LZO and LZ4 compressors.
> Sample data (LZO): writing 160MB, 40MB per thread took 0.60s with per-cpu
> code included and approximately 0.80s without per-cpu support.

Hello,

I've been working on similar thing, though my implementation does not deal
with per-cpu data and CPU hotplug actions. I know Greg's opinion on your patch
set, so I just share my thoughts/work.

Each zram device contains a list of workmem structures with required
compression/decompression and algorithm working memory pre-allocated. The
number of allocated workmem structures is limited to online CPUs number. Each
reader/writer performs idle workmem list lookup, with possible cases:
-- idle workmem exist: removes it from idle list and performs operation
-- idle workmem does not exist:
a) workmem number is less than online CPUs: allocate new workmem
b) workmem number is equals to online CPUs: put task into wait
list

upon completion task puts workmem to idle list (or releases structure if
the number of online CPUs has decreased) and wakes up existing sleepers.
There is no more rw lock in RW path, as well, so writer does not block
readers (unless they touch the same sector. many concurrent readers are
allowed, while write should block concurrent read/write operations).

patch also hides direct compression call, and introduces

struct zram_compress_ops {
long workmem_sz;

int (*compress)(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len, void *wrkmem);

int (*decompress)(const unsigned char *src, size_t src_len,
unsigned char *dst, size_t *dst_len);
};

instead, so compression algorithm can be changed via sysfs (not in this patch).

initial testing has demonstrated that iozone in mixed workflow can perform
significantly faster (iozone -t -T -R -l 3 -u 3 -r 16K -s 40M +Z -I):

w/o patch (LZO)
Children see throughput for 8 mixed workload = 428973.09 KB/sec
Parent sees throughput for 8 mixed workload = 384181.66 KB/sec

w/ patch (LZO)

Children see throughput for 8 mixed workload = 2957859.84 KB/sec
Parent sees throughput for 8 mixed workload = 1859763.07 KB/sec

Signed-off-by: Sergey Senozhatsky <[email protected]>

---

drivers/staging/zram/zram_drv.c | 604 +++++++++++++++++++++++-----------------
drivers/staging/zram/zram_drv.h | 75 +++--
2 files changed, 403 insertions(+), 276 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 7ebf91d..e936e38 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -29,9 +29,8 @@
#include <linux/genhd.h>
#include <linux/highmem.h>
#include <linux/slab.h>
-#include <linux/lzo.h>
#include <linux/string.h>
-#include <linux/vmalloc.h>
+#include <linux/lzo.h>

#include "zram_drv.h"

@@ -99,21 +98,13 @@ static ssize_t notify_free_show(struct device *dev,
(u64)atomic64_read(&zram->stats.notify_free));
}

-static ssize_t zero_pages_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct zram *zram = dev_to_zram(dev);
-
- return sprintf(buf, "%u\n", zram->stats.pages_zero);
-}
-
static ssize_t orig_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct zram *zram = dev_to_zram(dev);

return sprintf(buf, "%llu\n",
- (u64)(zram->stats.pages_stored) << PAGE_SHIFT);
+ (u64)atomic64_read(&zram->stats.pages_stored) << PAGE_SHIFT);
}

static ssize_t compr_data_size_show(struct device *dev,
@@ -134,7 +125,7 @@ static ssize_t mem_used_total_show(struct device *dev,

down_read(&zram->init_lock);
if (zram->init_done)
- val = zs_get_total_size_bytes(meta->mem_pool);
+ val = zs_get_total_size_bytes(meta->pool);
up_read(&zram->init_lock);

return sprintf(buf, "%llu\n", val);
@@ -143,19 +134,19 @@ static ssize_t mem_used_total_show(struct device *dev,
static int zram_test_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- return meta->table[index].flags & BIT(flag);
+ return meta->sector[index].flags & BIT(flag);
}

static void zram_set_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags |= BIT(flag);
+ meta->sector[index].flags |= BIT(flag);
}

static void zram_clear_flag(struct zram_meta *meta, u32 index,
enum zram_pageflags flag)
{
- meta->table[index].flags &= ~BIT(flag);
+ meta->sector[index].flags &= ~BIT(flag);
}

static inline int is_partial_io(struct bio_vec *bvec)
@@ -187,68 +178,166 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio)
return 1;
}

-static void zram_meta_free(struct zram_meta *meta)
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+/* free workmem, release workmem pages */
+static void free_workmem(struct list_head *wm)
{
- zs_destroy_pool(meta->mem_pool);
- kfree(meta->compress_workmem);
- free_pages((unsigned long)meta->compress_buffer, 1);
- vfree(meta->table);
+ struct zram_workmem *workmem = list_entry(wm,
+ struct zram_workmem, list);
+
+ kfree(workmem->dbuf);
+ kfree(workmem->cbuf);
+ kfree(workmem->mem);
+ kfree(workmem);
+}
+
+/* allocate new workmem structure, return ERR_PTR on error */
+static struct list_head *alloc_workmem(struct zram *zram)
+{
+ struct zram_workmem *workmem;
+
+ workmem = kzalloc(sizeof(*workmem), GFP_NOFS);
+ if (!workmem)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&workmem->list);
+
+ /* algorithm (lzo, lz4) specific working memory buffer */
+ workmem->mem = kzalloc(zram->ops.workmem_sz, GFP_KERNEL);
+ /* allocate 2 pages. 1 for compressed data, plus 1 extra for the
+ * case when compressed size is larger than the original one. */
+ workmem->dbuf = kmalloc(2 * PAGE_SIZE, GFP_KERNEL);
+ workmem->cbuf = kmalloc(2 * PAGE_SIZE, GFP_KERNEL);
+ if (!workmem->mem || !workmem->dbuf || !workmem->cbuf)
+ goto fail;
+
+ return &workmem->list;
+fail:
+ free_workmem(&workmem->list);
+ return ERR_PTR(-ENOMEM);
+}
+
+/* find idle workmem or allocate a new one if number of active workmem structures
+ * is less than online CPUs number */
+static struct list_head *find_workmem(struct zram *zram)
+{
+ struct zram_meta *meta = zram->meta;
+ struct list_head *workmem;
+ int cpus = num_online_cpus();
+retry:
+ /* get existing idle workmem or wait until other processes release
+ * one for us. */
+ spin_lock(&zram->lock);
+ if (!list_empty(&meta->idle_workmem)) {
+ workmem = meta->idle_workmem.next;
+ list_del(workmem);
+ spin_unlock(&zram->lock);
+ return workmem;
+ }
+
+ /* number of active workmem is limited to online CPUs number,
+ * wait for existing workmem to become idle */
+ if (atomic_read(&meta->num_workmem) >= cpus) {
+ DEFINE_WAIT(wait);
+
+ spin_unlock(&zram->lock);
+ prepare_to_wait_exclusive(&meta->workmem_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&meta->num_workmem) >= cpus)
+ schedule();
+ finish_wait(&meta->workmem_wait, &wait);
+ goto retry;
+ }
+
+ atomic_inc(&meta->num_workmem);
+ spin_unlock(&zram->lock);
+
+ workmem = alloc_workmem(zram);
+ if (IS_ERR(workmem)) {
+ atomic_dec(&meta->num_workmem);
+ if (waitqueue_active(&meta->workmem_wait))
+ wake_up(&meta->workmem_wait);
+ }
+ return workmem;
+}
+
+/* put workmem to idle list or release it, if the number of online
+ * CPUs has decreased */
+static void put_workmem(struct zram *zram, struct list_head *workmem)
+{
+ struct zram_meta *meta = zram->meta;
+ spin_lock(&zram->lock);
+ /* add workmem to idle list or release it if the number of
+ * online CPUs has decreased since the last time we checked it.*/
+ if (atomic_read(&meta->num_workmem) <= num_online_cpus()) {
+ list_add_tail(workmem, &meta->idle_workmem);
+ spin_unlock(&zram->lock);
+ goto wake;
+ }
+ spin_unlock(&zram->lock);
+
+ free_workmem(workmem);
+ atomic_dec(&meta->num_workmem);
+wake:
+ if (waitqueue_active(&meta->workmem_wait))
+ wake_up(&meta->workmem_wait);
+}
+
+static void zram_free_meta(struct zram_meta *meta)
+{
+ struct list_head *workmem;
+
+ while (!list_empty(&meta->idle_workmem)) {
+ workmem = meta->idle_workmem.next;
+ list_del(workmem);
+ free_workmem(workmem);
+ atomic_dec(&meta->num_workmem);
+ }
+
+ zs_destroy_pool(meta->pool);
+ vfree(meta->sector);
kfree(meta);
}

-static struct zram_meta *zram_meta_alloc(u64 disksize)
+static struct zram_meta *zram_alloc_meta(u64 disksize)
{
size_t num_pages;
struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
- goto out;
-
- meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
- if (!meta->compress_workmem)
- goto free_meta;
-
- meta->compress_buffer =
- (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
- if (!meta->compress_buffer) {
- pr_err("Error allocating compressor buffer space\n");
- goto free_workmem;
- }
+ goto out_error;

num_pages = disksize >> PAGE_SHIFT;
- meta->table = vzalloc(num_pages * sizeof(*meta->table));
- if (!meta->table) {
- pr_err("Error allocating zram address table\n");
- goto free_buffer;
+ meta->sector = vzalloc(num_pages * sizeof(*meta->sector));
+ if (!meta->sector) {
+ pr_err("Error allocating zram address sector\n");
+ goto out_error;
}

- meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
- if (!meta->mem_pool) {
+ meta->pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
+ if (!meta->pool) {
pr_err("Error creating memory pool\n");
- goto free_table;
+ goto out_error;
}

- return meta;
+ INIT_LIST_HEAD(&meta->idle_workmem);
+ atomic_set(&meta->num_workmem, 0);
+ init_waitqueue_head(&meta->workmem_wait);
+ init_waitqueue_head(&meta->io_wait);

-free_table:
- vfree(meta->table);
-free_buffer:
- free_pages((unsigned long)meta->compress_buffer, 1);
-free_workmem:
- kfree(meta->compress_workmem);
-free_meta:
+ return meta;
+out_error:
+ vfree(meta->sector);
kfree(meta);
- meta = NULL;
-out:
+ meta = ERR_PTR(-ENOMEM);
return meta;
}

-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
static int page_zero_filled(void *ptr)
{
unsigned int pos;
@@ -282,244 +371,269 @@ static void handle_zero_page(struct bio_vec *bvec)
static void zram_free_page(struct zram *zram, size_t index)
{
struct zram_meta *meta = zram->meta;
- unsigned long handle = meta->table[index].handle;
- u16 size = meta->table[index].size;
+ unsigned long handle = meta->sector[index].handle;
+ u16 size = meta->sector[index].size;

- if (unlikely(!handle)) {
- /*
- * No memory is allocated for zero filled pages.
- * Simply clear zero page flag.
- */
- if (zram_test_flag(meta, index, ZRAM_ZERO)) {
- zram_clear_flag(meta, index, ZRAM_ZERO);
- zram->stats.pages_zero--;
- }
+ if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+ zram_clear_flag(meta, index, ZRAM_ZERO);
return;
}

- if (unlikely(size > max_zpage_size))
- zram->stats.bad_compress--;
+ if (size > max_zpage_size)
+ atomic64_dec(&zram->stats.bad_compress);
+ else
+ atomic64_dec(&zram->stats.good_compress);
+
+ zs_free(meta->pool, handle);
+
+ atomic64_sub(meta->sector[index].size, &zram->stats.compr_size);
+ atomic64_dec(&zram->stats.pages_stored);
+
+ meta->sector[index].handle = 0;
+ meta->sector[index].size = 0;
+}

- zs_free(meta->mem_pool, handle);
+static unsigned long zram_get_pool_handle(struct zram *zram, size_t size,
+ u32 index)
+{
+ struct zram_meta *meta = zram->meta;
+ unsigned long handle = meta->sector[index].handle;

- if (size <= PAGE_SIZE / 2)
- zram->stats.good_compress--;
+ /* use existing memory, if its size is sufficient */
+ if (handle && meta->sector[index].size >= size) {
+ atomic64_sub(meta->sector[index].size,
+ &zram->stats.compr_size);
+ return handle;
+ }
+ /* free existing handle */
+ zram_free_page(zram, index);

- atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
- zram->stats.pages_stored--;
+ handle = zs_malloc(meta->pool, size);
+ if (!handle)
+ return handle;
+ /* update stats */
+ if (size < max_zpage_size)
+ atomic64_inc(&zram->stats.good_compress);
+ else
+ atomic64_inc(&zram->stats.bad_compress);

- meta->table[index].handle = 0;
- meta->table[index].size = 0;
+ atomic64_inc(&zram->stats.pages_stored);
+ atomic64_add(size, &zram->stats.compr_size);
+
+ return handle;
}

-static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
+static int zram_decompress_page(struct zram *zram, char *page, u32 index)
{
- int ret = LZO_E_OK;
- size_t clen = PAGE_SIZE;
unsigned char *cmem;
+ int ret = 0;
+ size_t clen = PAGE_SIZE, size;
struct zram_meta *meta = zram->meta;
- unsigned long handle = meta->table[index].handle;
+ unsigned long handle = meta->sector[index].handle;

if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
- clear_page(mem);
+ clear_page(page);
return 0;
}

- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
- if (meta->table[index].size == PAGE_SIZE)
- copy_page(mem, cmem);
+ size = meta->sector[index].size;
+ cmem = zs_map_object(meta->pool, handle, ZS_MM_RO);
+ if (meta->sector[index].size == PAGE_SIZE)
+ copy_page(page, cmem);
else
- ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
- mem, &clen);
- zs_unmap_object(meta->mem_pool, handle);
+ ret = zram->ops.decompress(cmem, size, page, &clen);
+ zs_unmap_object(meta->pool, handle);

- /* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
- pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
+ if (unlikely(ret))
atomic64_inc(&zram->stats.failed_reads);
- return ret;
- }
-
- return 0;
+ return ret;
}

-static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, int offset, struct bio *bio)
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, u32 index, int offset)
{
- int ret;
- struct page *page;
- unsigned char *user_mem, *uncmem = NULL;
+ int ret = -EINVAL;
+ unsigned char *page;
struct zram_meta *meta = zram->meta;
- page = bvec->bv_page;

- if (unlikely(!meta->table[index].handle) ||
+ if (!meta->sector[index].handle ||
zram_test_flag(meta, index, ZRAM_ZERO)) {
handle_zero_page(bvec);
return 0;
}

- if (is_partial_io(bvec))
- /* Use a temporary buffer to decompress the page */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
-
- user_mem = kmap_atomic(page);
- if (!is_partial_io(bvec))
- uncmem = user_mem;
-
- if (!uncmem) {
- pr_info("Unable to allocate temp memory\n");
- ret = -ENOMEM;
- goto out_cleanup;
- }
-
- ret = zram_decompress_page(zram, uncmem, index);
- /* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK))
- goto out_cleanup;
-
- if (is_partial_io(bvec))
- memcpy(user_mem + bvec->bv_offset, uncmem + offset,
- bvec->bv_len);
-
- flush_dcache_page(page);
- ret = 0;
-out_cleanup:
- kunmap_atomic(user_mem);
- if (is_partial_io(bvec))
- kfree(uncmem);
+ page = kmap_atomic(bvec->bv_page);
+ ret = zram_decompress_page(zram, page, index);
+ kunmap_atomic(page);
+ flush_dcache_page(bvec->bv_page);
return ret;
}

-static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset)
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset)
{
int ret = 0;
size_t clen;
unsigned long handle;
- struct page *page;
- unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
+ unsigned char *page = NULL, *pool_mem;
struct zram_meta *meta = zram->meta;
+ struct zram_workmem *wm;
+ struct list_head *workmem = find_workmem(zram);
+
+ if (IS_ERR(workmem))
+ return ret;

- page = bvec->bv_page;
- src = meta->compress_buffer;
+ wm = list_entry(workmem, struct zram_workmem, list);

+ page = kmap_atomic(bvec->bv_page);
if (is_partial_io(bvec)) {
- /*
- * This is a partial IO. We need to read the full page
- * before to write the changes.
- */
- uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
- if (!uncmem) {
- ret = -ENOMEM;
- goto out;
- }
- ret = zram_decompress_page(zram, uncmem, index);
+ ret = zram_decompress_page(zram, wm->dbuf, index);
if (ret)
goto out;
+ /* copy page bytes to working memory */
+ memcpy(wm->dbuf + offset, page + bvec->bv_offset, bvec->bv_len);
+ kunmap_atomic(page);
+ page = wm->dbuf;
}

- user_mem = kmap_atomic(page);
-
- if (is_partial_io(bvec)) {
- memcpy(uncmem + offset, user_mem + bvec->bv_offset,
- bvec->bv_len);
- kunmap_atomic(user_mem);
- user_mem = NULL;
- } else {
- uncmem = user_mem;
- }
-
- if (page_zero_filled(uncmem)) {
- kunmap_atomic(user_mem);
- /* Free memory associated with this sector now. */
- zram_free_page(zram, index);
-
- zram->stats.pages_zero++;
+ if (page_zero_filled(page)) {
zram_set_flag(meta, index, ZRAM_ZERO);
- ret = 0;
+ zram_free_page(zram, index);
goto out;
}

- ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
- meta->compress_workmem);
-
+ ret = zram->ops.compress(page, bvec->bv_len, wm->cbuf, &clen, wm->mem);
if (!is_partial_io(bvec)) {
- kunmap_atomic(user_mem);
- user_mem = NULL;
- uncmem = NULL;
+ kunmap_atomic(page);
+ page = NULL;
}

- if (unlikely(ret != LZO_E_OK)) {
- pr_err("Compression failed! err=%d\n", ret);
+ if (unlikely(ret)) {
+ pr_err("Compression failed: error=%d\n", ret);
goto out;
}

- if (unlikely(clen > max_zpage_size)) {
- zram->stats.bad_compress++;
+ if (clen >= max_zpage_size)
clen = PAGE_SIZE;
- src = NULL;
- if (is_partial_io(bvec))
- src = uncmem;
- }
-
- handle = zs_malloc(meta->mem_pool, clen);
- if (!handle) {
- pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
+
+ handle = zram_get_pool_handle(zram, clen, index);
+ if (unlikely(!handle)) {
+ pr_info("Allocation error: page=%u, size=%zu\n",
index, clen);
ret = -ENOMEM;
goto out;
}
- cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);

- if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
- src = kmap_atomic(page);
- copy_page(cmem, src);
- kunmap_atomic(src);
- } else {
- memcpy(cmem, src, clen);
+ pool_mem = zs_map_object(meta->pool, handle, ZS_MM_WO);
+ if (clen != PAGE_SIZE)
+ memcpy(pool_mem, wm->cbuf, clen);
+ else {
+ page = kmap_atomic(bvec->bv_page);
+ copy_page(pool_mem, page);
+ kunmap_atomic(page);
+ page = NULL;
}
+ zs_unmap_object(meta->pool, handle);

- zs_unmap_object(meta->mem_pool, handle);
-
- /*
- * Free memory associated with this sector
- * before overwriting unused sectors.
- */
- zram_free_page(zram, index);
-
- meta->table[index].handle = handle;
- meta->table[index].size = clen;
-
- /* Update stats */
- atomic64_add(clen, &zram->stats.compr_size);
- zram->stats.pages_stored++;
- if (clen <= PAGE_SIZE / 2)
- zram->stats.good_compress++;
-
+ meta->sector[index].handle = handle;
+ meta->sector[index].size = clen;
out:
- if (is_partial_io(bvec))
- kfree(uncmem);
+ put_workmem(zram, workmem);

- if (ret)
+ if (page && !is_partial_io(bvec))
+ kunmap_atomic(page);
+ if (unlikely(ret))
atomic64_inc(&zram->stats.failed_writes);
return ret;
}

+/* lock zram sector or sleep until sector is available for RW */
+static int zram_begin_sector_rw(struct zram *zram, struct zram_sector *sector, int type)
+{
+ struct zram_meta *meta = zram->meta;
+ int ret = 1;
+ spin_lock(&zram->lock);
+retry:
+ /* sector count:
+ * 0 -- free to use
+ * >0 -- number of active readers (many), writers are blocked
+ * -1 -- active writer (only one), readers and writers are blocked
+ */
+ if (type == WRITE) {
+ /* active RW or pending operation */
+ if (sector->count != 0 || sector->pending_write ||
+ sector->pending_read) {
+ DEFINE_WAIT(wait);
+
+ sector->pending_write++;
+ spin_unlock(&zram->lock);
+ prepare_to_wait_exclusive(&meta->io_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&meta->io_wait, &wait);
+ spin_lock(&zram->lock);
+ sector->pending_write--;
+ goto retry;
+ }
+ sector->count = -1;
+ } else {
+ /* active write or pending write */
+ if (sector->count < 0 || sector->pending_write) {
+ DEFINE_WAIT(wait);
+
+ sector->pending_read++;
+ spin_unlock(&zram->lock);
+ prepare_to_wait_exclusive(&meta->io_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&meta->io_wait, &wait);
+ spin_lock(&zram->lock);
+ sector->pending_read--;
+ goto retry;
+ }
+ sector->count++;
+ }
+ ret = 0;
+ spin_unlock(&zram->lock);
+ return ret;
+}
+
+/* unlock sector and wake up pending processes */
+static int zram_end_sector_rw(struct zram *zram, struct zram_sector *sector, int type)
+{
+ struct zram_meta *meta = zram->meta;
+ spin_lock(&zram->lock);
+ if (type == WRITE)
+ sector->count = 0;
+ else
+ sector->count--;
+ /* wake up pending oprocess only if sector count is zero */
+ if (sector->count == 0 && waitqueue_active(&meta->io_wait))
+ wake_up(&meta->io_wait);
+ spin_unlock(&zram->lock);
+ return 0;
+}
+
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, struct bio *bio, int rw)
+ int offset, struct bio *bio)
{
- int ret;
+ struct zram_meta *meta = zram->meta;
+ struct zram_sector *sector = &meta->sector[index];
+ int type = bio_data_dir(bio);
+ int ret = -EINVAL;

- if (rw == READ) {
- down_read(&zram->lock);
- ret = zram_bvec_read(zram, bvec, index, offset, bio);
- up_read(&zram->lock);
- } else {
- down_write(&zram->lock);
+ if (type == READA)
+ type = READ;
+
+ zram_begin_sector_rw(zram, sector, type);
+
+ if (type == WRITE) {
+ atomic64_inc(&zram->stats.num_writes);
ret = zram_bvec_write(zram, bvec, index, offset);
- up_write(&zram->lock);
+ } else {
+ atomic64_inc(&zram->stats.num_reads);
+ ret = zram_bvec_read(zram, bvec, index, offset);
}

+ zram_end_sector_rw(zram, sector, type);
return ret;
}

@@ -539,14 +653,14 @@ static void zram_reset_device(struct zram *zram)

/* Free all pages that are still in this zram device */
for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
- unsigned long handle = meta->table[index].handle;
+ unsigned long handle = meta->sector[index].handle;
if (!handle)
continue;

- zs_free(meta->mem_pool, handle);
+ zs_free(meta->pool, handle);
}

- zram_meta_free(zram->meta);
+ zram_free_meta(zram->meta);
zram->meta = NULL;
/* Reset stats */
memset(&zram->stats, 0, sizeof(zram->stats));
@@ -578,6 +692,10 @@ static void zram_init_device(struct zram *zram, struct zram_meta *meta)
zram->meta = meta;
zram->init_done = 1;

+ zram->ops.workmem_sz = LZO1X_MEM_COMPRESS;
+ zram->ops.compress = lzo1x_1_compress;
+ zram->ops.decompress = lzo1x_decompress_safe;
+
pr_debug("Initialization done!\n");
}

@@ -593,11 +711,14 @@ static ssize_t disksize_store(struct device *dev,
return -EINVAL;

disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(disksize);
+ meta = zram_alloc_meta(disksize);
+ if (IS_ERR(meta))
+ return -ENOMEM;
+
down_write(&zram->init_lock);
if (zram->init_done) {
up_write(&zram->init_lock);
- zram_meta_free(meta);
+ zram_free_meta(meta);
pr_info("Cannot change disksize for initialized device\n");
return -EBUSY;
}
@@ -640,21 +761,12 @@ static ssize_t reset_store(struct device *dev,
return len;
}

-static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
+static void __zram_make_request(struct zram *zram, struct bio *bio)
{
int i, offset;
u32 index;
struct bio_vec *bvec;

- switch (rw) {
- case READ:
- atomic64_inc(&zram->stats.num_reads);
- break;
- case WRITE:
- atomic64_inc(&zram->stats.num_writes);
- break;
- }
-
index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;

@@ -672,16 +784,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
bv.bv_len = max_transfer_size;
bv.bv_offset = bvec->bv_offset;

- if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
+ if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0)
goto out;

bv.bv_len = bvec->bv_len - max_transfer_size;
bv.bv_offset += max_transfer_size;
- if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
+ if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0)
goto out;
} else
- if (zram_bvec_rw(zram, bvec, index, offset, bio, rw)
- < 0)
+ if (zram_bvec_rw(zram, bvec, index, offset, bio) < 0)
goto out;

update_position(&index, &offset, bvec);
@@ -711,9 +822,8 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio)
goto error;
}

- __zram_make_request(zram, bio, bio_data_dir(bio));
+ __zram_make_request(zram, bio);
up_read(&zram->init_lock);
-
return;

error:
@@ -724,12 +834,10 @@ error:
static void zram_slot_free_notify(struct block_device *bdev,
unsigned long index)
{
- struct zram *zram;
-
- zram = bdev->bd_disk->private_data;
- down_write(&zram->lock);
+ struct zram *zram = bdev->bd_disk->private_data;
+ spin_lock(&zram->lock);
zram_free_page(zram, index);
- up_write(&zram->lock);
+ spin_unlock(&zram->lock);
atomic64_inc(&zram->stats.notify_free);
}

@@ -746,7 +854,6 @@ static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
-static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
@@ -759,7 +866,6 @@ static struct attribute *zram_disk_attrs[] = {
&dev_attr_num_writes.attr,
&dev_attr_invalid_io.attr,
&dev_attr_notify_free.attr,
- &dev_attr_zero_pages.attr,
&dev_attr_orig_data_size.attr,
&dev_attr_compr_data_size.attr,
&dev_attr_mem_used_total.attr,
@@ -774,9 +880,9 @@ static int create_device(struct zram *zram, int device_id)
{
int ret = -ENOMEM;

- init_rwsem(&zram->lock);
init_rwsem(&zram->init_lock);
-
+ spin_lock_init(&zram->lock);
+
zram->queue = blk_alloc_queue(GFP_KERNEL);
if (!zram->queue) {
pr_err("Error allocating disk queue for device %d\n",
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 9e57bfb..64d015f 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -16,7 +16,7 @@
#define _ZRAM_DRV_H_

#include <linux/spinlock.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>

#include "../zsmalloc/zsmalloc.h"

@@ -55,19 +55,20 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
enum zram_pageflags {
/* Page consists entirely of zeros */
ZRAM_ZERO,
-
__NR_ZRAM_PAGEFLAGS,
};

/*-- Data structures */

/* Allocated for each disk page */
-struct table {
+struct zram_sector {
unsigned long handle;
- u16 size; /* object size (excluding header) */
- u8 count; /* object ref count (not yet used) */
+ u16 size; /* object size (excluding header) */
+ u8 pending_write;
+ u8 pending_read;
+ s8 count; /* prevent concurrent sector read-write operations */
u8 flags;
-} __aligned(4);
+};

/*
* All 64bit fields should only be manipulated by 64bit atomic accessors.
@@ -77,39 +78,59 @@ struct zram_stats {
atomic64_t compr_size; /* compressed size of pages stored */
atomic64_t num_reads; /* failed + successful */
atomic64_t num_writes; /* --do-- */
+ atomic64_t pages_stored; /* no. of pages stored */
+ /* no. of pages with compression ratio<75% */
+ atomic64_t good_compress;
+ /* no. of pages with compression ratio>=75% */
+ atomic64_t bad_compress;
atomic64_t failed_reads; /* should NEVER! happen */
atomic64_t failed_writes; /* can happen when memory is too low */
atomic64_t invalid_io; /* non-page-aligned I/O requests */
atomic64_t notify_free; /* no. of swap slot free notifications */
- u32 pages_zero; /* no. of zero filled pages */
- u32 pages_stored; /* no. of pages currently stored */
- u32 good_compress; /* % of pages with compression ratio<=50% */
- u32 bad_compress; /* % of pages with compression ratio>=75% */
+};
+
+/*
+ * compression/decompression functions and algorithm workmem size.
+ */
+struct zram_compress_ops {
+ long workmem_sz;
+
+ int (*compress)(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+ int (*decompress)(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len);
+};
+
+struct zram_workmem {
+ struct list_head list;
+ void *mem; /* algorithm workmem */
+ void *dbuf; /* decompression buffer */
+ void *cbuf; /* compression buffer */
};

struct zram_meta {
- void *compress_workmem;
- void *compress_buffer;
- struct table *table;
- struct zs_pool *mem_pool;
+ struct zram_sector *sector;
+ struct zs_pool *pool;
+
+ struct list_head idle_workmem;
+ atomic_t num_workmem;
+ wait_queue_head_t workmem_wait;
+ wait_queue_head_t io_wait;
};

struct zram {
- struct zram_meta *meta;
- struct rw_semaphore lock; /* protect compression buffers, table,
- * 32bit stat counters against concurrent
- * notifications, reads and writes */
- struct request_queue *queue;
- struct gendisk *disk;
- int init_done;
- /* Prevent concurrent execution of device init, reset and R/W request */
struct rw_semaphore init_lock;
- /*
- * This is the limit on amount of *uncompressed* worth of data
- * we can store in a disk.
- */
- u64 disksize; /* bytes */
+ spinlock_t lock;
+ /* Prevent concurrent execution of device init, reset and R/W request */
+ int init_done;
+ struct zram_meta *meta;

struct zram_stats stats;
+ struct zram_compress_ops ops;
+
+ u64 disksize;
+ struct request_queue *queue;
+ struct gendisk *disk;
};
#endif

2013-07-31 10:17:23

by Piotr Sarna

[permalink] [raw]

Subject: Re: [PATCH 1/2] staging: zram: add Crypto API support

Hi,

On 07/30/2013 03:53 PM, Greg KH wrote:
> On Tue, Jul 30, 2013 at 02:30:48PM +0200, Piotr Sarna wrote:
>> Current version of zram does not allow any substitution of a default
>> compression algorithm. Therefore, I decided to change the existing
>> implementation of page compression by adding Crypto API compability.
>
> As I have stated before, I want this code to get merged out of the
> drivers/staging/ area _before_ any new features get added to it. People
> are taking too much time adding new stuff, like this, and no one seems
> to be working to get it merged to the proper place anymore.
>

OK, but we actually need those features in order to test zram
against other, competitive solutions - and then decide whether
and how to merge it out of /drivers/staging.

> So again, I'm going to have to say no to a new feature here, sorry.
> zcache, zram, and zsmalloc need to get cleaned up and merged out of
> staging before anything new can be added to them.
>
> Or, if no one is working on that, I guess I can just delete them,
> right?...
>

Is there any official TODO list of cleaning up and merging out zram?

Regards,
Piotr Sarna

2013-07-31 12:48:34

by Greg Kroah-Hartman

[permalink] [raw]

Subject: Re: [PATCH 1/2] staging: zram: add Crypto API support

On Wed, Jul 31, 2013 at 12:16:36PM +0200, Piotr Sarna wrote:
> Hi,
>
> On 07/30/2013 03:53 PM, Greg KH wrote:
> > On Tue, Jul 30, 2013 at 02:30:48PM +0200, Piotr Sarna wrote:
> >> Current version of zram does not allow any substitution of a default
> >> compression algorithm. Therefore, I decided to change the existing
> >> implementation of page compression by adding Crypto API compability.
> >
> > As I have stated before, I want this code to get merged out of the
> > drivers/staging/ area _before_ any new features get added to it. People
> > are taking too much time adding new stuff, like this, and no one seems
> > to be working to get it merged to the proper place anymore.
> >
>
> OK, but we actually need those features in order to test zram
> against other, competitive solutions - and then decide whether
> and how to merge it out of /drivers/staging.

Where is another "competitive solution" in the kernel?

And you are implying that as-is, zram isn't acceptable, right? If so, I
should just delete it now as I was originally told otherwise, that's why
I merged it :(

> > So again, I'm going to have to say no to a new feature here, sorry.
> > zcache, zram, and zsmalloc need to get cleaned up and merged out of
> > staging before anything new can be added to them.
> >
> > Or, if no one is working on that, I guess I can just delete them,
> > right?...
> >
>
> Is there any official TODO list of cleaning up and merging out zram?

As it came from the "zswap" code, there doesn't seem to be one :(

The code is over 2 years old now, with no percieved movement out of
staging. If it doesn't get fixed up in the next kernel version or so, I
will have to remove it entirely.

sorry,

greg k-h

2013-07-31 13:31:11

by Piotr Sarna

[permalink] [raw]

Subject: Re: [PATCH 1/2] staging: zram: add Crypto API support

On 07/31/2013 02:49 PM, Greg KH wrote:
> On Wed, Jul 31, 2013 at 12:16:36PM +0200, Piotr Sarna wrote:
>> Hi,
>>
>> On 07/30/2013 03:53 PM, Greg KH wrote:
>>> On Tue, Jul 30, 2013 at 02:30:48PM +0200, Piotr Sarna wrote:
>>>> Current version of zram does not allow any substitution of a default
>>>> compression algorithm. Therefore, I decided to change the existing
>>>> implementation of page compression by adding Crypto API compability.
>>>
>>> As I have stated before, I want this code to get merged out of the
>>> drivers/staging/ area _before_ any new features get added to it. People
>>> are taking too much time adding new stuff, like this, and no one seems
>>> to be working to get it merged to the proper place anymore.
>>>
>>
>> OK, but we actually need those features in order to test zram
>> against other, competitive solutions - and then decide whether
>> and how to merge it out of /drivers/staging.
>
> Where is another "competitive solution" in the kernel?
>
> And you are implying that as-is, zram isn't acceptable, right? If so, I
> should just delete it now as I was originally told otherwise, that's why
> I merged it :(
>
>>> So again, I'm going to have to say no to a new feature here, sorry.
>>> zcache, zram, and zsmalloc need to get cleaned up and merged out of
>>> staging before anything new can be added to them.
>>>
>>> Or, if no one is working on that, I guess I can just delete them,
>>> right?...
>>>
>>
>> Is there any official TODO list of cleaning up and merging out zram?
>
> As it came from the "zswap" code, there doesn't seem to be one :(
>
> The code is over 2 years old now, with no percieved movement out of
> staging. If it doesn't get fixed up in the next kernel version or so, I
> will have to remove it entirely.
>
> sorry,
>
> greg k-h
>

"Another competitive solutions" I thought of were zswap and zcache.

On one hand, during my tests (of compressed swap feature), zram
turned out to be no faster than zswap/zcache (but no slower either).

On the other, zram happens to have some more, perhaps reasonable
use cases (described in zram.txt), e.g. mounting it as /tmp.

So far, I haven't tested those other features of zram,
so I can't possibly say whether it should be considered for a merge-out
or kept in staging for now.

Regards,
Piotr Sarna

2013-07-31 13:48:08

by Minchan Kim

[permalink] [raw]

Subject: Re: [PATCH 1/2] staging: zram: add Crypto API support

Hello,

On Tue, Jul 30, 2013 at 02:30:48PM +0200, Piotr Sarna wrote:
> Current version of zram does not allow any substitution of a default
> compression algorithm. Therefore, I decided to change the existing
> implementation of page compression by adding Crypto API compability.
>
> All direct calls to lzo1x compression/decompression methods are now
> replaced by calls consistent with Crypto. Also, I removed "workmem"
> field from struct zram_meta, as it was there for lzo1x purposes only
> and is no longer needed. Finally, I added a set of functions required
> by Crypto API to work properly.
>
> In order to substitute the default algorithm (lzo), change the value
> of zram.compressor module parameter to a proper name (e.g. lz4).

Your patch [1,2/2] are totally same patch I made for using in-house only.
Why I made it for in-house only is that Greg doesn't want to merge any
new features before zram promotion is done.
I understand him totally because I will do same thing as if I were
maintainer.

I tried zram promotion several long time ago but unfortunately, failed.
Sine then, I have been busy by other stuffs.

http://comments.gmane.org/gmane.linux.kernel.mm/90264

The reason to prevent zram promotion is only zsmalloc while Jens
already acked zram part because zsmalloc was used for zcache,
zram and zswap at that time and they have different requirements
for zsmalloc. But recently, zsmalloc is used for only zram so I guess
we could put zsmalloc with zram in driver/block. Of course, we should
discuss it with akpm.

Okay. I think it's good time to promote zram again and I will retry
soon.

Thanks.

>
> Signed-off-by: Piotr Sarna <[email protected]>
> Acked-by: Bartlomiej Zolnierkiewicz <[email protected]>
> Signed-off-by: Kyungmin Park <[email protected]>

--
Kind regards,
Minchan Kim