2011-06-10 13:29:43

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH 1/4] Staging: zram: Remove useless offset calculation in handle_uncompressed_page()

The offset of uncompressed page is always zero: handle_uncompressed_page()
doesn't have to care about it.

Signed-off-by: Jerome Marchand <[email protected]>
---
drivers/staging/zram/zram_drv.c | 3 +--
1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index aab4ec4..3305e1a 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -194,8 +194,7 @@ static void handle_uncompressed_page(struct zram *zram,
unsigned char *user_mem, *cmem;

user_mem = kmap_atomic(page, KM_USER0);
- cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
- zram->table[index].offset;
+ cmem = kmap_atomic(zram->table[index].page, KM_USER1);

memcpy(user_mem, cmem, PAGE_SIZE);
kunmap_atomic(user_mem, KM_USER0);
--
1.6.2.5


2011-06-10 13:29:08

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH 2/4] Staging: zram: Refactor zram_read/write() functions

This patch refactor the code of zram_read/write() functions. It does
not removes a lot of duplicate code alone, but is mostly a helper for
the third patch of this series (Staging: zram: allow partial page
operations).

Signed-off-by: Jerome Marchand <[email protected]>
---
drivers/staging/zram/zram_drv.c | 315 +++++++++++++++++++--------------------
1 files changed, 155 insertions(+), 160 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 3305e1a..33060d1 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -203,196 +203,199 @@ static void handle_uncompressed_page(struct zram *zram,
flush_dcache_page(page);
}

-static void zram_read(struct zram *zram, struct bio *bio)
+static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
+ u32 index, struct bio *bio)
{
+ int ret;
+ size_t clen;
+ struct page *page;
+ struct zobj_header *zheader;
+ unsigned char *user_mem, *cmem;

- int i;
- u32 index;
- struct bio_vec *bvec;
-
- zram_stat64_inc(zram, &zram->stats.num_reads);
- index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
-
- bio_for_each_segment(bvec, bio, i) {
- int ret;
- size_t clen;
- struct page *page;
- struct zobj_header *zheader;
- unsigned char *user_mem, *cmem;
-
- page = bvec->bv_page;
-
- if (zram_test_flag(zram, index, ZRAM_ZERO)) {
- handle_zero_page(page);
- index++;
- continue;
- }
+ page = bvec->bv_page;

- /* Requested page is not present in compressed area */
- if (unlikely(!zram->table[index].page)) {
- pr_debug("Read before write: sector=%lu, size=%u",
- (ulong)(bio->bi_sector), bio->bi_size);
- handle_zero_page(page);
- index++;
- continue;
- }
+ if (zram_test_flag(zram, index, ZRAM_ZERO)) {
+ handle_zero_page(page);
+ return 0;
+ }

- /* Page is stored uncompressed since it's incompressible */
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- handle_uncompressed_page(zram, page, index);
- index++;
- continue;
- }
+ /* Requested page is not present in compressed area */
+ if (unlikely(!zram->table[index].page)) {
+ pr_debug("Read before write: sector=%lu, size=%u",
+ (ulong)(bio->bi_sector), bio->bi_size);
+ handle_zero_page(page);
+ return 0;
+ }

- user_mem = kmap_atomic(page, KM_USER0);
- clen = PAGE_SIZE;
+ /* Page is stored uncompressed since it's incompressible */
+ if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
+ handle_uncompressed_page(zram, page, index);
+ return 0;
+ }

- cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
- zram->table[index].offset;
+ user_mem = kmap_atomic(page, KM_USER0);
+ clen = PAGE_SIZE;

- ret = lzo1x_decompress_safe(
- cmem + sizeof(*zheader),
- xv_get_object_size(cmem) - sizeof(*zheader),
- user_mem, &clen);
+ cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
+ zram->table[index].offset;

- kunmap_atomic(user_mem, KM_USER0);
- kunmap_atomic(cmem, KM_USER1);
+ ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
+ xv_get_object_size(cmem) - sizeof(*zheader),
+ user_mem, &clen);

- /* Should NEVER happen. Return bio error if it does. */
- if (unlikely(ret != LZO_E_OK)) {
- pr_err("Decompression failed! err=%d, page=%u\n",
- ret, index);
- zram_stat64_inc(zram, &zram->stats.failed_reads);
- goto out;
- }
+ kunmap_atomic(user_mem, KM_USER0);
+ kunmap_atomic(cmem, KM_USER1);

- flush_dcache_page(page);
- index++;
+ /* Should NEVER happen. Return bio error if it does. */
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
+ zram_stat64_inc(zram, &zram->stats.failed_reads);
+ return ret;
}

- set_bit(BIO_UPTODATE, &bio->bi_flags);
- bio_endio(bio, 0);
- return;
+ flush_dcache_page(page);

-out:
- bio_io_error(bio);
+ return 0;
}

-static void zram_write(struct zram *zram, struct bio *bio)
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
{
- int i;
- u32 index;
- struct bio_vec *bvec;
+ int ret;
+ u32 offset;
+ size_t clen;
+ struct zobj_header *zheader;
+ struct page *page, *page_store;
+ unsigned char *user_mem, *cmem, *src;

- zram_stat64_inc(zram, &zram->stats.num_writes);
- index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ page = bvec->bv_page;
+ src = zram->compress_buffer;

- bio_for_each_segment(bvec, bio, i) {
- int ret;
- u32 offset;
- size_t clen;
- struct zobj_header *zheader;
- struct page *page, *page_store;
- unsigned char *user_mem, *cmem, *src;
+ /*
+ * System overwrites unused sectors. Free memory associated
+ * with this sector now.
+ */
+ if (zram->table[index].page ||
+ zram_test_flag(zram, index, ZRAM_ZERO))
+ zram_free_page(zram, index);

- page = bvec->bv_page;
- src = zram->compress_buffer;
+ mutex_lock(&zram->lock);

- /*
- * System overwrites unused sectors. Free memory associated
- * with this sector now.
- */
- if (zram->table[index].page ||
- zram_test_flag(zram, index, ZRAM_ZERO))
- zram_free_page(zram, index);
+ user_mem = kmap_atomic(page, KM_USER0);
+ if (page_zero_filled(user_mem)) {
+ kunmap_atomic(user_mem, KM_USER0);
+ mutex_unlock(&zram->lock);
+ zram_stat_inc(&zram->stats.pages_zero);
+ zram_set_flag(zram, index, ZRAM_ZERO);
+ return 0;
+ }

- mutex_lock(&zram->lock);
+ ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
+ zram->compress_workmem);

- user_mem = kmap_atomic(page, KM_USER0);
- if (page_zero_filled(user_mem)) {
- kunmap_atomic(user_mem, KM_USER0);
- mutex_unlock(&zram->lock);
- zram_stat_inc(&zram->stats.pages_zero);
- zram_set_flag(zram, index, ZRAM_ZERO);
- index++;
- continue;
- }
-
- ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
- zram->compress_workmem);
+ kunmap_atomic(user_mem, KM_USER0);

- kunmap_atomic(user_mem, KM_USER0);
+ if (unlikely(ret != LZO_E_OK)) {
+ mutex_unlock(&zram->lock);
+ pr_err("Compression failed! err=%d\n", ret);
+ zram_stat64_inc(zram, &zram->stats.failed_writes);
+ return ret;
+ }

- if (unlikely(ret != LZO_E_OK)) {
+ /*
+ * Page is incompressible. Store it as-is (uncompressed)
+ * since we do not want to return too many disk write
+ * errors which has side effect of hanging the system.
+ */
+ if (unlikely(clen > max_zpage_size)) {
+ clen = PAGE_SIZE;
+ page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
+ if (unlikely(!page_store)) {
mutex_unlock(&zram->lock);
- pr_err("Compression failed! err=%d\n", ret);
+ pr_info("Error allocating memory for "
+ "incompressible page: %u\n", index);
zram_stat64_inc(zram, &zram->stats.failed_writes);
- goto out;
- }
-
- /*
- * Page is incompressible. Store it as-is (uncompressed)
- * since we do not want to return too many disk write
- * errors which has side effect of hanging the system.
- */
- if (unlikely(clen > max_zpage_size)) {
- clen = PAGE_SIZE;
- page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
- if (unlikely(!page_store)) {
- mutex_unlock(&zram->lock);
- pr_info("Error allocating memory for "
- "incompressible page: %u\n", index);
- zram_stat64_inc(zram,
- &zram->stats.failed_writes);
- goto out;
+ return -ENOMEM;
}

- offset = 0;
- zram_set_flag(zram, index, ZRAM_UNCOMPRESSED);
- zram_stat_inc(&zram->stats.pages_expand);
- zram->table[index].page = page_store;
- src = kmap_atomic(page, KM_USER0);
- goto memstore;
- }
+ offset = 0;
+ zram_set_flag(zram, index, ZRAM_UNCOMPRESSED);
+ zram_stat_inc(&zram->stats.pages_expand);
+ zram->table[index].page = page_store;
+ src = kmap_atomic(page, KM_USER0);
+ goto memstore;
+ }

- if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader),
- &zram->table[index].page, &offset,
- GFP_NOIO | __GFP_HIGHMEM)) {
- mutex_unlock(&zram->lock);
- pr_info("Error allocating memory for compressed "
- "page: %u, size=%zu\n", index, clen);
- zram_stat64_inc(zram, &zram->stats.failed_writes);
- goto out;
- }
+ if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader),
+ &zram->table[index].page, &offset,
+ GFP_NOIO | __GFP_HIGHMEM)) {
+ mutex_unlock(&zram->lock);
+ pr_info("Error allocating memory for compressed "
+ "page: %u, size=%zu\n", index, clen);
+ zram_stat64_inc(zram, &zram->stats.failed_writes);
+ return -ENOMEM;
+ }

memstore:
- zram->table[index].offset = offset;
+ zram->table[index].offset = offset;

- cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
- zram->table[index].offset;
+ cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
+ zram->table[index].offset;

#if 0
- /* Back-reference needed for memory defragmentation */
- if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) {
- zheader = (struct zobj_header *)cmem;
- zheader->table_idx = index;
- cmem += sizeof(*zheader);
- }
+ /* Back-reference needed for memory defragmentation */
+ if (!zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)) {
+ zheader = (struct zobj_header *)cmem;
+ zheader->table_idx = index;
+ cmem += sizeof(*zheader);
+ }
#endif

- memcpy(cmem, src, clen);
+ memcpy(cmem, src, clen);

- kunmap_atomic(cmem, KM_USER1);
- if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)))
- kunmap_atomic(src, KM_USER0);
+ kunmap_atomic(cmem, KM_USER1);
+ if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED)))
+ kunmap_atomic(src, KM_USER0);

- /* Update stats */
- zram_stat64_add(zram, &zram->stats.compr_size, clen);
- zram_stat_inc(&zram->stats.pages_stored);
- if (clen <= PAGE_SIZE / 2)
- zram_stat_inc(&zram->stats.good_compress);
+ /* Update stats */
+ zram_stat64_add(zram, &zram->stats.compr_size, clen);
+ zram_stat_inc(&zram->stats.pages_stored);
+ if (clen <= PAGE_SIZE / 2)
+ zram_stat_inc(&zram->stats.good_compress);

- mutex_unlock(&zram->lock);
+ mutex_unlock(&zram->lock);
+
+ return 0;
+}
+
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+ struct bio *bio, int rw)
+{
+ if (rw == READ)
+ return zram_bvec_read(zram, bvec, index, bio);
+
+ return zram_bvec_write(zram, bvec, index);
+}
+
+static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
+{
+ int i;
+ u32 index;
+ struct bio_vec *bvec;
+
+ switch (rw) {
+ case READ:
+ zram_stat64_inc(zram, &zram->stats.num_reads);
+ break;
+ case WRITE:
+ zram_stat64_inc(zram, &zram->stats.num_writes);
+ break;
+ }
+
+ index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
+
+ bio_for_each_segment(bvec, bio, i) {
+ if (zram_bvec_rw(zram, bvec, index, bio, rw) < 0)
+ goto out;
index++;
}

@@ -439,15 +442,7 @@ static int zram_make_request(struct request_queue *queue, struct bio *bio)
return 0;
}

- switch (bio_data_dir(bio)) {
- case READ:
- zram_read(zram, bio);
- break;
-
- case WRITE:
- zram_write(zram, bio);
- break;
- }
+ __zram_make_request(zram, bio, bio_data_dir(bio));

return 0;
}
--
1.6.2.5

2011-06-10 13:29:09

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH 3/4] Staging: zram: allow partial page operations

Commit 7b19b8d45b216ff3186f066b31937bdbde066f08 (zram: Prevent overflow
in logical block size) introduced ZRAM_LOGICAL_BLOCK_SIZE constant to
prevent overflow of logical block size on 64k page kernel.
However, the current implementation of zram only allow operation on block
of the same size as a page. That makes theorically legit 4k requests fail
on 64k page kernel.

This patch makes zram allow operation on partial pages. Basically, it
means we still do operations on full pages internally, but only copy the
relevent segments from/to the user memory.

Signed-off-by: Jerome Marchand <[email protected]>
---
drivers/staging/zram/zram_drv.c | 202 ++++++++++++++++++++++++++++++++-------
drivers/staging/zram/zram_drv.h | 5 +-
2 files changed, 169 insertions(+), 38 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 33060d1..54ad29d 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -177,45 +177,52 @@ out:
zram->table[index].offset = 0;
}

-static void handle_zero_page(struct page *page)
+static void handle_zero_page(struct bio_vec *bvec)
{
+ struct page *page = bvec->bv_page;
void *user_mem;

user_mem = kmap_atomic(page, KM_USER0);
- memset(user_mem, 0, PAGE_SIZE);
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
kunmap_atomic(user_mem, KM_USER0);

flush_dcache_page(page);
}

-static void handle_uncompressed_page(struct zram *zram,
- struct page *page, u32 index)
+static void handle_uncompressed_page(struct zram *zram, struct bio_vec *bvec,
+ u32 index, int offset)
{
+ struct page *page = bvec->bv_page;
unsigned char *user_mem, *cmem;

user_mem = kmap_atomic(page, KM_USER0);
cmem = kmap_atomic(zram->table[index].page, KM_USER1);

- memcpy(user_mem, cmem, PAGE_SIZE);
+ memcpy(user_mem + bvec->bv_offset, cmem + offset, bvec->bv_len);
kunmap_atomic(user_mem, KM_USER0);
kunmap_atomic(cmem, KM_USER1);

flush_dcache_page(page);
}

+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
- u32 index, struct bio *bio)
+ u32 index, int offset, struct bio *bio)
{
int ret;
size_t clen;
struct page *page;
struct zobj_header *zheader;
- unsigned char *user_mem, *cmem;
+ unsigned char *user_mem, *cmem, *uncmem = NULL;

page = bvec->bv_page;

if (zram_test_flag(zram, index, ZRAM_ZERO)) {
- handle_zero_page(page);
+ handle_zero_page(bvec);
return 0;
}

@@ -223,17 +230,28 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
if (unlikely(!zram->table[index].page)) {
pr_debug("Read before write: sector=%lu, size=%u",
(ulong)(bio->bi_sector), bio->bi_size);
- handle_zero_page(page);
+ handle_zero_page(bvec);
return 0;
}

/* Page is stored uncompressed since it's incompressible */
if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
- handle_uncompressed_page(zram, page, index);
+ handle_uncompressed_page(zram, bvec, index, offset);
return 0;
}

+ if (is_partial_io(bvec)) {
+ /* Use a temporary buffer to decompress the page */
+ uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!uncmem) {
+ pr_info("Error allocating temp memory!\n");
+ return -ENOMEM;
+ }
+ }
+
user_mem = kmap_atomic(page, KM_USER0);
+ if (!is_partial_io(bvec))
+ uncmem = user_mem;
clen = PAGE_SIZE;

cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
@@ -241,7 +259,13 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,

ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
xv_get_object_size(cmem) - sizeof(*zheader),
- user_mem, &clen);
+ uncmem, &clen);
+
+ if (is_partial_io(bvec)) {
+ memcpy(user_mem + bvec->bv_offset, uncmem + offset,
+ bvec->bv_len);
+ kfree(uncmem);
+ }

kunmap_atomic(user_mem, KM_USER0);
kunmap_atomic(cmem, KM_USER1);
@@ -258,18 +282,75 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
return 0;
}

-static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
+static int zram_read_before_write(struct zram *zram, char *mem, u32 index)
+{
+ int ret;
+ size_t clen = PAGE_SIZE;
+ struct zobj_header *zheader;
+ unsigned char *cmem;
+
+ if (zram_test_flag(zram, index, ZRAM_ZERO) ||
+ !zram->table[index].page) {
+ memset(mem, 0, PAGE_SIZE);
+ return 0;
+ }
+
+ cmem = kmap_atomic(zram->table[index].page, KM_USER0) +
+ zram->table[index].offset;
+
+ /* Page is stored uncompressed since it's incompressible */
+ if (unlikely(zram_test_flag(zram, index, ZRAM_UNCOMPRESSED))) {
+ memcpy(mem, cmem, PAGE_SIZE);
+ kunmap_atomic(cmem, KM_USER0);
+ return 0;
+ }
+
+ ret = lzo1x_decompress_safe(cmem + sizeof(*zheader),
+ xv_get_object_size(cmem) - sizeof(*zheader),
+ mem, &clen);
+ kunmap_atomic(cmem, KM_USER0);
+
+ /* Should NEVER happen. Return bio error if it does. */
+ if (unlikely(ret != LZO_E_OK)) {
+ pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
+ zram_stat64_inc(zram, &zram->stats.failed_reads);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset)
{
int ret;
- u32 offset;
+ u32 store_offset;
size_t clen;
struct zobj_header *zheader;
struct page *page, *page_store;
- unsigned char *user_mem, *cmem, *src;
+ unsigned char *user_mem, *cmem, *src, *uncmem = NULL;

page = bvec->bv_page;
src = zram->compress_buffer;

+ if (is_partial_io(bvec)) {
+ /*
+ * This is a partial IO. We need to read the full page
+ * before to write the changes.
+ */
+ uncmem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!uncmem) {
+ pr_info("Error allocating temp memory!\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = zram_read_before_write(zram, uncmem, index);
+ if (ret) {
+ kfree(uncmem);
+ goto out;
+ }
+ }
+
/*
* System overwrites unused sectors. Free memory associated
* with this sector now.
@@ -281,24 +362,35 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
mutex_lock(&zram->lock);

user_mem = kmap_atomic(page, KM_USER0);
- if (page_zero_filled(user_mem)) {
+
+ if (is_partial_io(bvec))
+ memcpy(uncmem + offset, user_mem + bvec->bv_offset,
+ bvec->bv_len);
+ else
+ uncmem = user_mem;
+
+ if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem, KM_USER0);
mutex_unlock(&zram->lock);
+ if (is_partial_io(bvec))
+ kfree(uncmem);
zram_stat_inc(&zram->stats.pages_zero);
zram_set_flag(zram, index, ZRAM_ZERO);
- return 0;
+ ret = 0;
+ goto out;
}

- ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
+ ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
zram->compress_workmem);

kunmap_atomic(user_mem, KM_USER0);
+ if (is_partial_io(bvec))
+ kfree(uncmem);

if (unlikely(ret != LZO_E_OK)) {
mutex_unlock(&zram->lock);
pr_err("Compression failed! err=%d\n", ret);
- zram_stat64_inc(zram, &zram->stats.failed_writes);
- return ret;
+ goto out;
}

/*
@@ -313,11 +405,11 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
mutex_unlock(&zram->lock);
pr_info("Error allocating memory for "
"incompressible page: %u\n", index);
- zram_stat64_inc(zram, &zram->stats.failed_writes);
- return -ENOMEM;
- }
+ ret = -ENOMEM;
+ goto out;
+ }

- offset = 0;
+ store_offset = 0;
zram_set_flag(zram, index, ZRAM_UNCOMPRESSED);
zram_stat_inc(&zram->stats.pages_expand);
zram->table[index].page = page_store;
@@ -326,17 +418,17 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
}

if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader),
- &zram->table[index].page, &offset,
+ &zram->table[index].page, &store_offset,
GFP_NOIO | __GFP_HIGHMEM)) {
mutex_unlock(&zram->lock);
pr_info("Error allocating memory for compressed "
"page: %u, size=%zu\n", index, clen);
- zram_stat64_inc(zram, &zram->stats.failed_writes);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}

memstore:
- zram->table[index].offset = offset;
+ zram->table[index].offset = store_offset;

cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
zram->table[index].offset;
@@ -365,20 +457,32 @@ memstore:
mutex_unlock(&zram->lock);

return 0;
+
+out:
+ if (ret)
+ zram_stat64_inc(zram, &zram->stats.failed_writes);
+ return ret;
}

static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- struct bio *bio, int rw)
+ int offset, struct bio *bio, int rw)
{
if (rw == READ)
- return zram_bvec_read(zram, bvec, index, bio);
+ return zram_bvec_read(zram, bvec, index, offset, bio);

- return zram_bvec_write(zram, bvec, index);
+ return zram_bvec_write(zram, bvec, index, offset);
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
{
- int i;
+ int i, offset;
u32 index;
struct bio_vec *bvec;

@@ -392,11 +496,35 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
}

index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
+ offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;

bio_for_each_segment(bvec, bio, i) {
- if (zram_bvec_rw(zram, bvec, index, bio, rw) < 0)
- goto out;
- index++;
+ int max_transfer_size = PAGE_SIZE - offset;
+
+ if (bvec->bv_len > max_transfer_size) {
+ /*
+ * zram_bvec_rw() can only make operation on a single
+ * zram page. Split the bio vector.
+ */
+ struct bio_vec bv;
+
+ bv.bv_page = bvec->bv_page;
+ bv.bv_len = max_transfer_size;
+ bv.bv_offset = bvec->bv_offset;
+
+ if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
+ goto out;
+
+ bv.bv_len = bvec->bv_len - max_transfer_size;
+ bv.bv_offset += max_transfer_size;
+ if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
+ goto out;
+ } else
+ if (zram_bvec_rw(zram, bvec, index, offset, bio, rw)
+ < 0)
+ goto out;
+
+ update_position(&index, &offset, bvec);
}

set_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -408,14 +536,14 @@ out:
}

/*
- * Check if request is within bounds and page aligned.
+ * Check if request is within bounds and aligned on zram logical blocks.
*/
static inline int valid_io_request(struct zram *zram, struct bio *bio)
{
if (unlikely(
(bio->bi_sector >= (zram->disksize >> SECTOR_SHIFT)) ||
- (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
- (bio->bi_size & (PAGE_SIZE - 1)))) {
+ (bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)) ||
+ (bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))) {

return 0;
}
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 408b2c0..8acf9eb 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -61,7 +61,10 @@ static const unsigned max_zpage_size = PAGE_SIZE / 4 * 3;
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT)
-#define ZRAM_LOGICAL_BLOCK_SIZE 4096
+#define ZRAM_LOGICAL_BLOCK_SHIFT 12
+#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT)
+#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
+ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))

/* Flags for zram pages (table[page_no].flags) */
enum zram_pageflags {
--
1.6.2.5

2011-06-10 13:29:18

by Jerome Marchand

[permalink] [raw]
Subject: [PATCH 4/4] Staging: zram: Replace mutex lock by a R/W semaphore

Currently, nothing protects zram table from concurrent access.
For instance, ZRAM_UNCOMPRESSED bit can be cleared by zram_free_page()
called from a concurrent write between the time ZRAM_UNCOMPRESSED has
been set and the time it is tested to unmap KM_USER0 in
zram_bvec_write(). This ultimately leads to kernel panic.

Also, a read request can occurs when the page has been freed by a
running write request and before it has been updated, leading to
zero filled block being incorrectly read and "Read before write"
error message.

This patch replace the current mutex by a rw_semaphore. It extends
the protection to zram table (currently, only compression buffers are
protected) and read requests (currently, only write requests are
protected).

Signed-off-by: Jerome Marchand <[email protected]>
---
drivers/staging/zram/zram_drv.c | 25 +++++++++++++------------
drivers/staging/zram/zram_drv.h | 4 ++--
2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 54ad29d..c5fdc55 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -359,8 +359,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
zram_test_flag(zram, index, ZRAM_ZERO))
zram_free_page(zram, index);

- mutex_lock(&zram->lock);
-
user_mem = kmap_atomic(page, KM_USER0);

if (is_partial_io(bvec))
@@ -371,7 +369,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,

if (page_zero_filled(uncmem)) {
kunmap_atomic(user_mem, KM_USER0);
- mutex_unlock(&zram->lock);
if (is_partial_io(bvec))
kfree(uncmem);
zram_stat_inc(&zram->stats.pages_zero);
@@ -388,7 +385,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
kfree(uncmem);

if (unlikely(ret != LZO_E_OK)) {
- mutex_unlock(&zram->lock);
pr_err("Compression failed! err=%d\n", ret);
goto out;
}
@@ -402,7 +398,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
clen = PAGE_SIZE;
page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (unlikely(!page_store)) {
- mutex_unlock(&zram->lock);
pr_info("Error allocating memory for "
"incompressible page: %u\n", index);
ret = -ENOMEM;
@@ -420,7 +415,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (xv_malloc(zram->mem_pool, clen + sizeof(*zheader),
&zram->table[index].page, &store_offset,
GFP_NOIO | __GFP_HIGHMEM)) {
- mutex_unlock(&zram->lock);
pr_info("Error allocating memory for compressed "
"page: %u, size=%zu\n", index, clen);
ret = -ENOMEM;
@@ -454,8 +448,6 @@ memstore:
if (clen <= PAGE_SIZE / 2)
zram_stat_inc(&zram->stats.good_compress);

- mutex_unlock(&zram->lock);
-
return 0;

out:
@@ -467,10 +459,19 @@ out:
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset, struct bio *bio, int rw)
{
- if (rw == READ)
- return zram_bvec_read(zram, bvec, index, offset, bio);
+ int ret;

- return zram_bvec_write(zram, bvec, index, offset);
+ if (rw == READ) {
+ down_read(&zram->lock);
+ ret = zram_bvec_read(zram, bvec, index, offset, bio);
+ up_read(&zram->lock);
+ } else {
+ down_write(&zram->lock);
+ ret = zram_bvec_write(zram, bvec, index, offset);
+ up_write(&zram->lock);
+ }
+
+ return ret;
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
@@ -701,7 +702,7 @@ static int create_device(struct zram *zram, int device_id)
{
int ret = 0;

- mutex_init(&zram->lock);
+ init_rwsem(&zram->lock);
mutex_init(&zram->init_lock);
spin_lock_init(&zram->stat64_lock);

diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 8acf9eb..abe5221 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -107,8 +107,8 @@ struct zram {
void *compress_buffer;
struct table *table;
spinlock_t stat64_lock; /* protect 64-bit stats */
- struct mutex lock; /* protect compression buffers against
- * concurrent writes */
+ struct rw_semaphore lock; /* protect compression buffers and table
+ * against concurrent read and writes */
struct request_queue *queue;
struct gendisk *disk;
int init_done;
--
1.6.2.5

2011-06-10 16:38:37

by Nitin Gupta

[permalink] [raw]
Subject: Re: [PATCH 1/4] Staging: zram: Remove useless offset calculation in handle_uncompressed_page()

On 06/10/2011 06:28 AM, Jerome Marchand wrote:
> The offset of uncompressed page is always zero: handle_uncompressed_page()
> doesn't have to care about it.
>
> Signed-off-by: Jerome Marchand<[email protected]>
> ---
> drivers/staging/zram/zram_drv.c | 3 +--
> 1 files changed, 1 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
> index aab4ec4..3305e1a 100644
> --- a/drivers/staging/zram/zram_drv.c
> +++ b/drivers/staging/zram/zram_drv.c
> @@ -194,8 +194,7 @@ static void handle_uncompressed_page(struct zram *zram,
> unsigned char *user_mem, *cmem;
>
> user_mem = kmap_atomic(page, KM_USER0);
> - cmem = kmap_atomic(zram->table[index].page, KM_USER1) +
> - zram->table[index].offset;
> + cmem = kmap_atomic(zram->table[index].page, KM_USER1);
>
> memcpy(user_mem, cmem, PAGE_SIZE);
> kunmap_atomic(user_mem, KM_USER0);


kmap/kunmap requests needs to be strictly nested, so here kunmap(...,
KM_USER1) must be done before kunmap(..., KM_USER0). This needs to be
fixed in zram_bvec_read() also. Though these bugs are not introduced
by your patches, it would be nice to have them include them in your
patch series only.

Thanks for the fixes.
Nitin

2011-06-10 16:42:14

by Nitin Gupta

[permalink] [raw]
Subject: Re: [PATCH 3/4] Staging: zram: allow partial page operations

On 06/10/2011 06:28 AM, Jerome Marchand wrote:
> Commit 7b19b8d45b216ff3186f066b31937bdbde066f08 (zram: Prevent overflow
> in logical block size) introduced ZRAM_LOGICAL_BLOCK_SIZE constant to
> prevent overflow of logical block size on 64k page kernel.
> However, the current implementation of zram only allow operation on block
> of the same size as a page. That makes theorically legit 4k requests fail
> on 64k page kernel.
>
> This patch makes zram allow operation on partial pages. Basically, it
> means we still do operations on full pages internally, but only copy the
> relevent segments from/to the user memory.
>

Couldn't we just change struct queue_limits.logical_block_size type to
unsigned int or something so it could hold value of 64K? Then we could
avoid making all these changes to handle partial page requests.

Thanks,
Nitin

2011-06-10 16:47:24

by Nitin Gupta

[permalink] [raw]
Subject: Re: [PATCH 4/4] Staging: zram: Replace mutex lock by a R/W semaphore

On 06/10/2011 06:28 AM, Jerome Marchand wrote:
> Currently, nothing protects zram table from concurrent access.
> For instance, ZRAM_UNCOMPRESSED bit can be cleared by zram_free_page()
> called from a concurrent write between the time ZRAM_UNCOMPRESSED has
> been set and the time it is tested to unmap KM_USER0 in
> zram_bvec_write(). This ultimately leads to kernel panic.
>
> Also, a read request can occurs when the page has been freed by a
> running write request and before it has been updated, leading to
> zero filled block being incorrectly read and "Read before write"
> error message.
>
> This patch replace the current mutex by a rw_semaphore. It extends
> the protection to zram table (currently, only compression buffers are
> protected) and read requests (currently, only write requests are
> protected).
>

These locking issues are probably remnants of earlier versions where
zram could be used only as a swap disks under which case it was not
possible for a read and write on the same sector (page) to happen
concurrently and thus there was no need to protect the table.


> Signed-off-by: Jerome Marchand<[email protected]>

Reviewed-by: Nitin Gupta <[email protected]>

Thanks,
Nitin

2011-06-13 09:43:03

by Jerome Marchand

[permalink] [raw]
Subject: Re: [PATCH 3/4] Staging: zram: allow partial page operations

On 06/10/2011 06:41 PM, Nitin Gupta wrote:
> On 06/10/2011 06:28 AM, Jerome Marchand wrote:
>> Commit 7b19b8d45b216ff3186f066b31937bdbde066f08 (zram: Prevent overflow
>> in logical block size) introduced ZRAM_LOGICAL_BLOCK_SIZE constant to
>> prevent overflow of logical block size on 64k page kernel.
>> However, the current implementation of zram only allow operation on block
>> of the same size as a page. That makes theorically legit 4k requests fail
>> on 64k page kernel.
>>
>> This patch makes zram allow operation on partial pages. Basically, it
>> means we still do operations on full pages internally, but only copy the
>> relevent segments from/to the user memory.
>>
>
> Couldn't we just change struct queue_limits.logical_block_size type to
> unsigned int or something so it could hold value of 64K? Then we could
> avoid making all these changes to handle partial page requests.
>
> Thanks,
> Nitin

I believe logical_block_size is meant to be small. I don't know if it is
reasonable to set it to such a big value as 64k. I CCed Jens and Martin to
have a more valuable opinion on the matter.

Regards,
Jerome

2011-06-14 14:52:38

by Jeff Moyer

[permalink] [raw]
Subject: Re: [PATCH 3/4] Staging: zram: allow partial page operations

Jerome Marchand <[email protected]> writes:

> On 06/10/2011 06:41 PM, Nitin Gupta wrote:
>> On 06/10/2011 06:28 AM, Jerome Marchand wrote:
>>> Commit 7b19b8d45b216ff3186f066b31937bdbde066f08 (zram: Prevent overflow
>>> in logical block size) introduced ZRAM_LOGICAL_BLOCK_SIZE constant to
>>> prevent overflow of logical block size on 64k page kernel.
>>> However, the current implementation of zram only allow operation on block
>>> of the same size as a page. That makes theorically legit 4k requests fail
>>> on 64k page kernel.
>>>
>>> This patch makes zram allow operation on partial pages. Basically, it
>>> means we still do operations on full pages internally, but only copy the
>>> relevent segments from/to the user memory.
>>>
>>
>> Couldn't we just change struct queue_limits.logical_block_size type to
>> unsigned int or something so it could hold value of 64K? Then we could
>> avoid making all these changes to handle partial page requests.
>>
>> Thanks,
>> Nitin
>
> I believe logical_block_size is meant to be small. I don't know if it is
> reasonable to set it to such a big value as 64k. I CCed Jens and Martin to
> have a more valuable opinion on the matter.

I don't think there's any reason the logical block size can't be
increased. For zram, so long as you don't care that the minimum I/O
size is 64k on these systems (and by you, I mean the users of zram, like
file systems, or anything using the block device directly), then it's
a fine trade-off to make.

Jens, Martin, what do you guys think about bumping the size of the
queue_limits.logical_block_size?

Cheers,
Jeff

2011-06-14 16:37:17

by Martin K. Petersen

[permalink] [raw]
Subject: Re: [PATCH 3/4] Staging: zram: allow partial page operations

>>>>> "Jeff" == Jeff Moyer <[email protected]> writes:

Jeff> I don't think there's any reason the logical block size can't be
Jeff> increased. For zram, so long as you don't care that the minimum
Jeff> I/O size is 64k on these systems (and by you, I mean the users of
Jeff> zram, like file systems, or anything using the block device
Jeff> directly), then it's a fine trade-off to make.

Jeff> Jens, Martin, what do you guys think about bumping the size of the
Jeff> queue_limits.logical_block_size?

When I wrote the code I thought 64K ought to be enough for anybody.

I don't have a problem bumping it as long as people are aware of the
implications bigger blocks have on the filesystems they put on top.

--
Martin K. Petersen Oracle Linux Engineering