2012-11-30 06:54:58

by Nitin Gupta

[permalink] [raw]
Subject: [PATCH v2 1/2] zsmalloc: add function to query object size

Changelog v2 vs v1:
- None

Adds zs_get_object_size(handle) which provides the size of
the given object. This is useful since the user (zram etc.)
now do not have to maintain object sizes separately, saving
on some metadata size (4b per page).

The object handle encodes <page, offset> pair which currently points
to the start of the object. Now, the handle implicitly stores the size
information by pointing to the object's end instead. Since zsmalloc is
a slab based allocator, the start of the object can be easily determined
and the difference between the end offset encoded in the handle and the
start gives us the object size.

Signed-off-by: Nitin Gupta <[email protected]>
---
drivers/staging/zsmalloc/zsmalloc-main.c | 177 +++++++++++++++++++++---------
drivers/staging/zsmalloc/zsmalloc.h | 1 +
2 files changed, 127 insertions(+), 51 deletions(-)

diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index 09a9d35..65c9d3b 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -112,20 +112,20 @@
#define MAX_PHYSMEM_BITS 36
#else /* !CONFIG_HIGHMEM64G */
/*
- * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
+ * If this definition of MAX_PHYSMEM_BITS is used, OFFSET_BITS will just
* be PAGE_SHIFT
*/
#define MAX_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
-#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
-#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+#define OFFSET_BITS (BITS_PER_LONG - _PFN_BITS)
+#define OFFSET_MASK ((_AC(1, UL) << OFFSET_BITS) - 1)

#define MAX(a, b) ((a) >= (b) ? (a) : (b))
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
- MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OFFSET_BITS))
#define ZS_MAX_ALLOC_SIZE PAGE_SIZE

/*
@@ -256,6 +256,11 @@ static int is_last_page(struct page *page)
return PagePrivate2(page);
}

+static unsigned long get_page_index(struct page *page)
+{
+ return is_first_page(page) ? 0 : page->index;
+}
+
static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
enum fullness_group *fullness)
{
@@ -433,39 +438,86 @@ static struct page *get_next_page(struct page *page)
return next;
}

-/* Encode <page, obj_idx> as a single handle value */
-static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
+static struct page *get_prev_page(struct page *page)
{
- unsigned long handle;
+ struct page *prev, *first_page;

- if (!page) {
- BUG_ON(obj_idx);
- return NULL;
- }
+ first_page = get_first_page(page);
+ if (page == first_page)
+ prev = NULL;
+ else if (page == (struct page *)first_page->private)
+ prev = first_page;
+ else
+ prev = list_entry(page->lru.prev, struct page, lru);

- handle = page_to_pfn(page) << OBJ_INDEX_BITS;
- handle |= (obj_idx & OBJ_INDEX_MASK);
+ return prev;

- return (void *)handle;
}

-/* Decode <page, obj_idx> pair from the given object handle */
-static void obj_handle_to_location(unsigned long handle, struct page **page,
- unsigned long *obj_idx)
+static void *encode_ptr(struct page *page, unsigned long offset)
{
- *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
- *obj_idx = handle & OBJ_INDEX_MASK;
+ unsigned long ptr;
+ ptr = page_to_pfn(page) << OFFSET_BITS;
+ ptr |= offset & OFFSET_MASK;
+ return (void *)ptr;
+}
+
+static void decode_ptr(unsigned long ptr, struct page **page,
+ unsigned int *offset)
+{
+ *page = pfn_to_page(ptr >> OFFSET_BITS);
+ *offset = ptr & OFFSET_MASK;
+}
+
+static struct page *obj_handle_to_page(unsigned long handle)
+{
+ struct page *page;
+ unsigned int offset;
+
+ decode_ptr(handle, &page, &offset);
+ if (offset < get_page_index(page))
+ page = get_prev_page(page);
+
+ return page;
+}
+
+static unsigned int obj_handle_to_offset(unsigned long handle,
+ unsigned int class_size)
+{
+ struct page *page;
+ unsigned int offset;
+
+ decode_ptr(handle, &page, &offset);
+ if (offset < get_page_index(page))
+ offset = PAGE_SIZE - class_size + get_page_index(page);
+ else
+ offset = roundup(offset, class_size) - class_size;
+
+ return offset;
}

-static unsigned long obj_idx_to_offset(struct page *page,
- unsigned long obj_idx, int class_size)
+/* Encode <page, offset, size> as a single handle value */
+static void *obj_location_to_handle(struct page *page, unsigned int offset,
+ unsigned int size, unsigned int class_size)
{
- unsigned long off = 0;
+ struct page *endpage;
+ unsigned int endoffset;

- if (!is_first_page(page))
- off = page->index;
+ if (!page) {
+ BUG_ON(offset);
+ return NULL;
+ }
+ BUG_ON(offset >= PAGE_SIZE);
+
+ endpage = page;
+ endoffset = offset + size - 1;
+ if (endoffset >= PAGE_SIZE) {
+ endpage = get_next_page(page);
+ BUG_ON(!endpage);
+ endoffset -= PAGE_SIZE;
+ }

- return off + obj_idx * class_size;
+ return encode_ptr(endpage, endoffset);
}

static void reset_page(struct page *page)
@@ -506,14 +558,13 @@ static void free_zspage(struct page *first_page)
/* Initialize a newly allocated zspage */
static void init_zspage(struct page *first_page, struct size_class *class)
{
- unsigned long off = 0;
+ unsigned long off = 0, next_off = 0;
struct page *page = first_page;

BUG_ON(!is_first_page(first_page));
while (page) {
struct page *next_page;
struct link_free *link;
- unsigned int i, objs_on_page;

/*
* page->index stores offset of first object starting
@@ -526,14 +577,12 @@ static void init_zspage(struct page *first_page, struct size_class *class)

link = (struct link_free *)kmap_atomic(page) +
off / sizeof(*link);
- objs_on_page = (PAGE_SIZE - off) / class->size;

- for (i = 1; i <= objs_on_page; i++) {
- off += class->size;
- if (off < PAGE_SIZE) {
- link->next = obj_location_to_handle(page, i);
- link += class->size / sizeof(*link);
- }
+ next_off = off + class->size;
+ while (next_off < PAGE_SIZE) {
+ link->next = encode_ptr(page, next_off);
+ link += class->size / sizeof(*link);
+ next_off += class->size;
}

/*
@@ -542,10 +591,11 @@ static void init_zspage(struct page *first_page, struct size_class *class)
* page (if present)
*/
next_page = get_next_page(page);
- link->next = obj_location_to_handle(next_page, 0);
+ next_off = next_page ? next_off - PAGE_SIZE : 0;
+ link->next = encode_ptr(next_page, next_off);
kunmap_atomic(link);
page = next_page;
- off = (off + class->size) % PAGE_SIZE;
+ off = next_off;
}
}

@@ -596,7 +646,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)

init_zspage(first_page, class);

- first_page->freelist = obj_location_to_handle(first_page, 0);
+ first_page->freelist = encode_ptr(first_page, 0);
/* Maximum number of objects we can store in this zspage */
first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;

@@ -871,7 +921,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
struct size_class *class;

struct page *first_page, *m_page;
- unsigned long m_objidx, m_offset;
+ unsigned int m_offset;

if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
return 0;
@@ -895,8 +945,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
}

obj = (unsigned long)first_page->freelist;
- obj_handle_to_location(obj, &m_page, &m_objidx);
- m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+ decode_ptr(obj, &m_page, &m_offset);

link = (struct link_free *)kmap_atomic(m_page) +
m_offset / sizeof(*link);
@@ -907,6 +956,9 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
first_page->inuse++;
/* Now move the zspage to another fullness group, if required */
fix_fullness_group(pool, first_page);
+
+ obj = (unsigned long)obj_location_to_handle(m_page, m_offset,
+ size, class->size);
spin_unlock(&class->lock);

return obj;
@@ -917,7 +969,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
{
struct link_free *link;
struct page *first_page, *f_page;
- unsigned long f_objidx, f_offset;
+ unsigned long f_offset;

int class_idx;
struct size_class *class;
@@ -926,12 +978,12 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
if (unlikely(!obj))
return;

- obj_handle_to_location(obj, &f_page, &f_objidx);
+ f_page = obj_handle_to_page(obj);
first_page = get_first_page(f_page);

get_zspage_mapping(first_page, &class_idx, &fullness);
class = &pool->size_class[class_idx];
- f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
+ f_offset = obj_handle_to_offset(obj, class->size);

spin_lock(&class->lock);

@@ -940,7 +992,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
+ f_offset);
link->next = first_page->freelist;
kunmap_atomic(link);
- first_page->freelist = (void *)obj;
+ first_page->freelist = encode_ptr(f_page, f_offset);

first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
@@ -970,10 +1022,10 @@ EXPORT_SYMBOL_GPL(zs_free);
* This function returns with preemption and page faults disabled.
*/
void *zs_map_object(struct zs_pool *pool, unsigned long handle,
- enum zs_mapmode mm)
+ enum zs_mapmode mm)
{
struct page *page;
- unsigned long obj_idx, off;
+ unsigned long off;

unsigned int class_idx;
enum fullness_group fg;
@@ -990,10 +1042,10 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
*/
BUG_ON(in_interrupt());

- obj_handle_to_location(handle, &page, &obj_idx);
+ page = obj_handle_to_page(handle);
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
class = &pool->size_class[class_idx];
- off = obj_idx_to_offset(page, obj_idx, class->size);
+ off = obj_handle_to_offset(handle, class->size);

area = &get_cpu_var(zs_map_area);
area->vm_mm = mm;
@@ -1015,7 +1067,7 @@ EXPORT_SYMBOL_GPL(zs_map_object);
void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
{
struct page *page;
- unsigned long obj_idx, off;
+ unsigned long off;

unsigned int class_idx;
enum fullness_group fg;
@@ -1024,10 +1076,10 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)

BUG_ON(!handle);

- obj_handle_to_location(handle, &page, &obj_idx);
+ page = obj_handle_to_page(handle);
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
class = &pool->size_class[class_idx];
- off = obj_idx_to_offset(page, obj_idx, class->size);
+ off = obj_handle_to_offset(handle, class->size);

area = &__get_cpu_var(zs_map_area);
if (off + class->size <= PAGE_SIZE)
@@ -1045,6 +1097,29 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
}
EXPORT_SYMBOL_GPL(zs_unmap_object);

+size_t zs_get_object_size(struct zs_pool *pool, unsigned long handle)
+{
+ struct page *endpage;
+ unsigned int endoffset, size;
+
+ unsigned int class_idx;
+ enum fullness_group fg;
+ struct size_class *class;
+
+ decode_ptr(handle, &endpage, &endoffset);
+ get_zspage_mapping(endpage, &class_idx, &fg);
+ class = &pool->size_class[class_idx];
+
+ size = endoffset + 1;
+ if (endoffset < get_page_index(endpage))
+ size += class->size - get_page_index(endpage);
+ else
+ size -= rounddown(endoffset, class->size);
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(zs_get_object_size);
+
u64 zs_get_total_size_bytes(struct zs_pool *pool)
{
int i;
diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h
index de2e8bf..2830fdf 100644
--- a/drivers/staging/zsmalloc/zsmalloc.h
+++ b/drivers/staging/zsmalloc/zsmalloc.h
@@ -38,6 +38,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum zs_mapmode mm);
void zs_unmap_object(struct zs_pool *pool, unsigned long handle);

+size_t zs_get_object_size(struct zs_pool *pool, unsigned long handle);
u64 zs_get_total_size_bytes(struct zs_pool *pool);

#endif
--
1.7.10.4


2012-11-30 06:55:04

by Nitin Gupta

[permalink] [raw]
Subject: [PATCH v2 2/2] zram: reduce metadata overhead

Changelog v2 vs v1:
- Use is_zero_page() instead of direct handle comparison
- Use 1 as invalid handle value instead of -1 since handle
is unsigned and thus -1 may refer to a valid object. While 1
is guaranteed to be invalid since <pfn:0,offset:1> can never
refer to (end of) a valid object.
- Remove references to 'table' in comments and messages since
we just have a plain array of handles now.

For every allocated object, zram maintains the the handle, size,
flags and count fields. Of these, only the handle is required
since zsmalloc now provides the object size given the handle.
The flags field was needed only to mark a given page as zero-filled.
Instead of this field, we now use an invalid value (-1) to mark such
pages. Lastly, the count field was unused, so was simply removed.

Signed-off-by: Nitin Gupta <[email protected]>
Reviewed-by: Jerome Marchand <[email protected]>
---
drivers/staging/zram/zram_drv.c | 97 ++++++++++++++++-----------------------
drivers/staging/zram/zram_drv.h | 20 ++------
2 files changed, 43 insertions(+), 74 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index f2a73bd..e6c9bec 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -71,24 +71,6 @@ static void zram_stat64_inc(struct zram *zram, u64 *v)
zram_stat64_add(zram, v, 1);
}

-static int zram_test_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
-{
- return zram->table[index].flags & BIT(flag);
-}
-
-static void zram_set_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
-{
- zram->table[index].flags |= BIT(flag);
-}
-
-static void zram_clear_flag(struct zram *zram, u32 index,
- enum zram_pageflags flag)
-{
- zram->table[index].flags &= ~BIT(flag);
-}
-
static int page_zero_filled(void *ptr)
{
unsigned int pos;
@@ -104,6 +86,11 @@ static int page_zero_filled(void *ptr)
return 1;
}

+static inline int is_zero_page(unsigned long handle)
+{
+ return handle == zero_page_handle;
+}
+
static void zram_set_disksize(struct zram *zram, size_t totalram_bytes)
{
if (!zram->disksize) {
@@ -135,21 +122,20 @@ static void zram_set_disksize(struct zram *zram, size_t totalram_bytes)

static void zram_free_page(struct zram *zram, size_t index)
{
- unsigned long handle = zram->table[index].handle;
- u16 size = zram->table[index].size;
+ unsigned long handle = zram->handle[index];
+ size_t size;

- if (unlikely(!handle)) {
- /*
- * No memory is allocated for zero filled pages.
- * Simply clear zero page flag.
- */
- if (zram_test_flag(zram, index, ZRAM_ZERO)) {
- zram_clear_flag(zram, index, ZRAM_ZERO);
- zram_stat_dec(&zram->stats.pages_zero);
- }
+ if (unlikely(!handle))
+ return;
+
+ if (is_zero_page(handle)) {
+ /* No memory is allocated for zero filled pages */
+ zram->handle[index] = 0;
+ zram_stat_dec(&zram->stats.pages_zero);
return;
}

+ size = zs_get_object_size(zram->mem_pool, handle);
if (unlikely(size > max_zpage_size))
zram_stat_dec(&zram->stats.bad_compress);

@@ -158,12 +144,10 @@ static void zram_free_page(struct zram *zram, size_t index)
if (size <= PAGE_SIZE / 2)
zram_stat_dec(&zram->stats.good_compress);

- zram_stat64_sub(zram, &zram->stats.compr_size,
- zram->table[index].size);
+ zram_stat64_sub(zram, &zram->stats.compr_size, size);
zram_stat_dec(&zram->stats.pages_stored);

- zram->table[index].handle = 0;
- zram->table[index].size = 0;
+ zram->handle[index] = 0;
}

static void handle_zero_page(struct bio_vec *bvec)
@@ -188,19 +172,20 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
int ret = LZO_E_OK;
size_t clen = PAGE_SIZE;
unsigned char *cmem;
- unsigned long handle = zram->table[index].handle;
+ unsigned long handle = zram->handle[index];
+ size_t objsize;

- if (!handle || zram_test_flag(zram, index, ZRAM_ZERO)) {
+ if (!handle || is_zero_page(handle)) {
memset(mem, 0, PAGE_SIZE);
return 0;
}

+ objsize = zs_get_object_size(zram->mem_pool, handle);
cmem = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
- if (zram->table[index].size == PAGE_SIZE)
+ if (objsize == PAGE_SIZE)
memcpy(mem, cmem, PAGE_SIZE);
else
- ret = lzo1x_decompress_safe(cmem, zram->table[index].size,
- mem, &clen);
+ ret = lzo1x_decompress_safe(cmem, objsize, mem, &clen);
zs_unmap_object(zram->mem_pool, handle);

/* Should NEVER happen. Return bio error if it does. */
@@ -222,8 +207,8 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,

page = bvec->bv_page;

- if (unlikely(!zram->table[index].handle) ||
- zram_test_flag(zram, index, ZRAM_ZERO)) {
+ if (unlikely(!zram->handle[index]) ||
+ is_zero_page(zram->handle[index])) {
handle_zero_page(bvec);
return 0;
}
@@ -294,8 +279,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
* System overwrites unused sectors. Free memory associated
* with this sector now.
*/
- if (zram->table[index].handle ||
- zram_test_flag(zram, index, ZRAM_ZERO))
+ if (zram->handle[index])
zram_free_page(zram, index);

user_mem = kmap_atomic(page);
@@ -313,7 +297,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if (!is_partial_io(bvec))
kunmap_atomic(user_mem);
zram_stat_inc(&zram->stats.pages_zero);
- zram_set_flag(zram, index, ZRAM_ZERO);
+ zram->handle[index] = zero_page_handle;
ret = 0;
goto out;
}
@@ -357,8 +341,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,

zs_unmap_object(zram->mem_pool, handle);

- zram->table[index].handle = handle;
- zram->table[index].size = clen;
+ zram->handle[index] = handle;

/* Update stats */
zram_stat64_add(zram, &zram->stats.compr_size, clen);
@@ -517,15 +500,15 @@ void __zram_reset_device(struct zram *zram)

/* Free all pages that are still in this zram device */
for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
- unsigned long handle = zram->table[index].handle;
- if (!handle)
+ unsigned long handle = zram->handle[index];
+ if (!handle || is_zero_page(handle))
continue;

zs_free(zram->mem_pool, handle);
}

- vfree(zram->table);
- zram->table = NULL;
+ vfree(zram->handle);
+ zram->handle = NULL;

zs_destroy_pool(zram->mem_pool);
zram->mem_pool = NULL;
@@ -561,7 +544,7 @@ int zram_init_device(struct zram *zram)
if (!zram->compress_workmem) {
pr_err("Error allocating compressor working memory!\n");
ret = -ENOMEM;
- goto fail_no_table;
+ goto fail_no_handles;
}

zram->compress_buffer =
@@ -569,15 +552,15 @@ int zram_init_device(struct zram *zram)
if (!zram->compress_buffer) {
pr_err("Error allocating compressor buffer space\n");
ret = -ENOMEM;
- goto fail_no_table;
+ goto fail_no_handles;
}

num_pages = zram->disksize >> PAGE_SHIFT;
- zram->table = vzalloc(num_pages * sizeof(*zram->table));
- if (!zram->table) {
- pr_err("Error allocating zram address table\n");
+ zram->handle = vzalloc(num_pages * sizeof(*zram->handle));
+ if (!zram->handle) {
+ pr_err("Error allocating object handle space\n");
ret = -ENOMEM;
- goto fail_no_table;
+ goto fail_no_handles;
}

set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
@@ -598,8 +581,8 @@ int zram_init_device(struct zram *zram)
pr_debug("Initialization done!\n");
return 0;

-fail_no_table:
- /* To prevent accessing table entries during cleanup */
+fail_no_handles:
+ /* To prevent accessing handle values during cleanup */
zram->disksize = 0;
fail:
__zram_reset_device(zram);
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index df2eec4..837068f 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -54,24 +54,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3;
#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \
(1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT))

-/* Flags for zram pages (table[page_no].flags) */
-enum zram_pageflags {
- /* Page consists entirely of zeros */
- ZRAM_ZERO,
-
- __NR_ZRAM_PAGEFLAGS,
-};
+static const unsigned long zero_page_handle = 1;

/*-- Data structures */

-/* Allocated for each disk page */
-struct table {
- unsigned long handle;
- u16 size; /* object size (excluding header) */
- u8 count; /* object ref count (not yet used) */
- u8 flags;
-} __aligned(4);
-
struct zram_stats {
u64 compr_size; /* compressed size of pages stored */
u64 num_reads; /* failed + successful */
@@ -90,9 +76,9 @@ struct zram {
struct zs_pool *mem_pool;
void *compress_workmem;
void *compress_buffer;
- struct table *table;
+ unsigned long *handle; /* memory handle for each disk page */
spinlock_t stat64_lock; /* protect 64-bit stats */
- struct rw_semaphore lock; /* protect compression buffers and table
+ struct rw_semaphore lock; /* protect compression buffers and handles
* against concurrent read and writes */
struct request_queue *queue;
struct gendisk *disk;
--
1.7.10.4

2012-11-30 13:54:32

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] zsmalloc: add function to query object size

On Thu, Nov 29, 2012 at 10:54:48PM -0800, Nitin Gupta wrote:
> Changelog v2 vs v1:
> - None
>
> Adds zs_get_object_size(handle) which provides the size of
> the given object. This is useful since the user (zram etc.)
> now do not have to maintain object sizes separately, saving
> on some metadata size (4b per page).
>
> The object handle encodes <page, offset> pair which currently points
> to the start of the object. Now, the handle implicitly stores the size
> information by pointing to the object's end instead. Since zsmalloc is
> a slab based allocator, the start of the object can be easily determined
> and the difference between the end offset encoded in the handle and the
> start gives us the object size.
>
> Signed-off-by: Nitin Gupta <[email protected]>
Acked-by: Minchan Kim <[email protected]>

I already had a few comment in your previous versoin.
I'm OK although you ignore them because I can make follow up patch about
my nitpick but could you answer below my question?

> ---
> drivers/staging/zsmalloc/zsmalloc-main.c | 177 +++++++++++++++++++++---------
> drivers/staging/zsmalloc/zsmalloc.h | 1 +
> 2 files changed, 127 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
> index 09a9d35..65c9d3b 100644
> --- a/drivers/staging/zsmalloc/zsmalloc-main.c
> +++ b/drivers/staging/zsmalloc/zsmalloc-main.c
> @@ -112,20 +112,20 @@
> #define MAX_PHYSMEM_BITS 36
> #else /* !CONFIG_HIGHMEM64G */
> /*
> - * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
> + * If this definition of MAX_PHYSMEM_BITS is used, OFFSET_BITS will just
> * be PAGE_SHIFT
> */
> #define MAX_PHYSMEM_BITS BITS_PER_LONG
> #endif
> #endif
> #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
> -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
> -#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
> +#define OFFSET_BITS (BITS_PER_LONG - _PFN_BITS)
> +#define OFFSET_MASK ((_AC(1, UL) << OFFSET_BITS) - 1)
>
> #define MAX(a, b) ((a) >= (b) ? (a) : (b))
> /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
> #define ZS_MIN_ALLOC_SIZE \
> - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
> + MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OFFSET_BITS))
> #define ZS_MAX_ALLOC_SIZE PAGE_SIZE
>
> /*
> @@ -256,6 +256,11 @@ static int is_last_page(struct page *page)
> return PagePrivate2(page);
> }
>
> +static unsigned long get_page_index(struct page *page)
> +{
> + return is_first_page(page) ? 0 : page->index;
> +}
> +
> static void get_zspage_mapping(struct page *page, unsigned int *class_idx,
> enum fullness_group *fullness)
> {
> @@ -433,39 +438,86 @@ static struct page *get_next_page(struct page *page)
> return next;
> }
>
> -/* Encode <page, obj_idx> as a single handle value */
> -static void *obj_location_to_handle(struct page *page, unsigned long obj_idx)
> +static struct page *get_prev_page(struct page *page)
> {
> - unsigned long handle;
> + struct page *prev, *first_page;
>
> - if (!page) {
> - BUG_ON(obj_idx);
> - return NULL;
> - }
> + first_page = get_first_page(page);
> + if (page == first_page)
> + prev = NULL;
> + else if (page == (struct page *)first_page->private)
> + prev = first_page;
> + else
> + prev = list_entry(page->lru.prev, struct page, lru);
>
> - handle = page_to_pfn(page) << OBJ_INDEX_BITS;
> - handle |= (obj_idx & OBJ_INDEX_MASK);
> + return prev;
>
> - return (void *)handle;
> }
>
> -/* Decode <page, obj_idx> pair from the given object handle */
> -static void obj_handle_to_location(unsigned long handle, struct page **page,
> - unsigned long *obj_idx)
> +static void *encode_ptr(struct page *page, unsigned long offset)
> {
> - *page = pfn_to_page(handle >> OBJ_INDEX_BITS);
> - *obj_idx = handle & OBJ_INDEX_MASK;
> + unsigned long ptr;
> + ptr = page_to_pfn(page) << OFFSET_BITS;
> + ptr |= offset & OFFSET_MASK;
> + return (void *)ptr;
> +}
> +
> +static void decode_ptr(unsigned long ptr, struct page **page,
> + unsigned int *offset)
> +{
> + *page = pfn_to_page(ptr >> OFFSET_BITS);
> + *offset = ptr & OFFSET_MASK;
> +}
> +
> +static struct page *obj_handle_to_page(unsigned long handle)
> +{
> + struct page *page;
> + unsigned int offset;
> +
> + decode_ptr(handle, &page, &offset);
> + if (offset < get_page_index(page))
> + page = get_prev_page(page);
> +
> + return page;
> +}
> +
> +static unsigned int obj_handle_to_offset(unsigned long handle,
> + unsigned int class_size)
> +{
> + struct page *page;
> + unsigned int offset;
> +
> + decode_ptr(handle, &page, &offset);
> + if (offset < get_page_index(page))
> + offset = PAGE_SIZE - class_size + get_page_index(page);
> + else
> + offset = roundup(offset, class_size) - class_size;
> +
> + return offset;
> }
>
> -static unsigned long obj_idx_to_offset(struct page *page,
> - unsigned long obj_idx, int class_size)
> +/* Encode <page, offset, size> as a single handle value */
> +static void *obj_location_to_handle(struct page *page, unsigned int offset,
> + unsigned int size, unsigned int class_size)
> {
> - unsigned long off = 0;
> + struct page *endpage;
> + unsigned int endoffset;
>
> - if (!is_first_page(page))
> - off = page->index;
> + if (!page) {
> + BUG_ON(offset);
> + return NULL;
> + }

What do you expect to catch with above check?

--
Kind regards,
Minchan Kim

2012-11-30 14:37:54

by Minchan Kim

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] zram: reduce metadata overhead

On Thu, Nov 29, 2012 at 10:54:49PM -0800, Nitin Gupta wrote:
> Changelog v2 vs v1:
> - Use is_zero_page() instead of direct handle comparison
> - Use 1 as invalid handle value instead of -1 since handle
> is unsigned and thus -1 may refer to a valid object. While 1
> is guaranteed to be invalid since <pfn:0,offset:1> can never
> refer to (end of) a valid object.

Ho, Hmm, another coupling between zram and zsmalloc.
The zram knows internal of zsmalloc very well. Sigh.
I don't like it really. Nonetheless, if you really want it,
please put "#define ZS_INVALID_HANDLE 1" in zsmalloc.h and use it.
But the concern about my suggestion is that user can imagine
it's equal to 0 so they might try to use it instead of 0.
Maybe we need more clear name.

Off-topic:
Anyway, my assumption about user's mistake is only vaild in case of
general allocator but zsmalloc already wasn't general allocator by
following as.

zs_map_object
zs_get_objsize
ZS_INVALID_HANDLE

Now I'm sure we shouldn't put it in under /lib. :(

> - Remove references to 'table' in comments and messages since
> we just have a plain array of handles now.
>
> For every allocated object, zram maintains the the handle, size,
> flags and count fields. Of these, only the handle is required
> since zsmalloc now provides the object size given the handle.
> The flags field was needed only to mark a given page as zero-filled.
> Instead of this field, we now use an invalid value (-1) to mark such

ZS_INAVLID_HANDLE or something.

> pages. Lastly, the count field was unused, so was simply removed.
>
> Signed-off-by: Nitin Gupta <[email protected]>
> Reviewed-by: Jerome Marchand <[email protected]>
> ---
> drivers/staging/zram/zram_drv.c | 97 ++++++++++++++++-----------------------
> drivers/staging/zram/zram_drv.h | 20 ++------
> 2 files changed, 43 insertions(+), 74 deletions(-)
>

Otherwise, looks good to me.
--
Kind regards,
Minchan Kim