Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S965442Ab0GPMiS (ORCPT ); Fri, 16 Jul 2010 08:38:18 -0400 Received: from mail-pv0-f174.google.com ([74.125.83.174]:52272 "EHLO mail-pv0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S965428Ab0GPMiK (ORCPT ); Fri, 16 Jul 2010 08:38:10 -0400 From: Nitin Gupta To: Pekka Enberg , Hugh Dickins , Andrew Morton , Greg KH , Dan Magenheimer , Rik van Riel , Avi Kivity , Christoph Hellwig , Minchan Kim , Konrad Rzeszutek Wilk Cc: linux-mm , linux-kernel Subject: [PATCH 7/8] Use xvmalloc to store compressed chunks Date: Fri, 16 Jul 2010 18:07:49 +0530 Message-Id: <1279283870-18549-8-git-send-email-ngupta@vflare.org> X-Mailer: git-send-email 1.7.1.1 In-Reply-To: <1279283870-18549-1-git-send-email-ngupta@vflare.org> References: <1279283870-18549-1-git-send-email-ngupta@vflare.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10042 Lines: 350 xvmalloc is an O(1) memory allocator designed specifically for storing variable sized compressed chunks. It is already being used by zram driver for the same purpose. A new statistic is also exported: /sys/kernel/mm/zcache/pool/mem_used_total This gives pool's total memory usage, including allocator fragmentation and metadata overhead. Currently, we use just one xvmalloc pool per zcache pool. If this proves to be a performance bottleneck, they will also be created per-cpu. xvmalloc details, performance numbers and its comparison with kmalloc (SLUB): http://code.google.com/p/compcache/wiki/xvMalloc http://code.google.com/p/compcache/wiki/xvMallocPerformance http://code.google.com/p/compcache/wiki/AllocatorsComparison Signed-off-by: Nitin Gupta --- drivers/staging/zram/zcache_drv.c | 150 +++++++++++++++++++++++++++++------- drivers/staging/zram/zcache_drv.h | 6 ++ 2 files changed, 127 insertions(+), 29 deletions(-) diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c index 2a02606..71ca48a 100644 --- a/drivers/staging/zram/zcache_drv.c +++ b/drivers/staging/zram/zcache_drv.c @@ -47,6 +47,7 @@ #include #include +#include "xvmalloc.h" #include "zcache_drv.h" static DEFINE_PER_CPU(unsigned char *, compress_buffer); @@ -179,6 +180,7 @@ static void zcache_destroy_pool(struct zcache_pool *zpool) } free_percpu(zpool->stats); + xv_destroy_pool(zpool->xv_pool); kfree(zpool); } @@ -219,6 +221,12 @@ int zcache_create_pool(void) goto out; } + zpool->xv_pool = xv_create_pool(); + if (!zpool->xv_pool) { + ret = -ENOMEM; + goto out; + } + rwlock_init(&zpool->tree_lock); seqlock_init(&zpool->memlimit_lock); zpool->inode_tree = RB_ROOT; @@ -446,35 +454,81 @@ static void *zcache_index_to_ptr(unsigned long index) } /* + * Encode as a single "pointer" value which is stored + * in corresponding radix node. + */ +static void *zcache_xv_location_to_ptr(struct page *page, u32 offset) +{ + unsigned long ptrval; + + ptrval = page_to_pfn(page) << PAGE_SHIFT; + ptrval |= (offset & ~PAGE_MASK); + + return (void *)ptrval; +} + +/* + * Decode pair from "pointer" value returned from + * radix tree lookup. + */ +static void zcache_ptr_to_xv_location(void *ptr, struct page **page, + u32 *offset) +{ + unsigned long ptrval = (unsigned long)ptr; + + *page = pfn_to_page(ptrval >> PAGE_SHIFT); + *offset = ptrval & ~PAGE_MASK; +} + +/* * Radix node contains "pointer" value which encode * pair, locating the compressed object. Header of the object then * contains corresponding 'index' value. */ -static unsigned long zcache_ptr_to_index(struct page *page) +static unsigned long zcache_ptr_to_index(void *ptr) { + u32 offset; + struct page *page; unsigned long index; + struct zcache_objheader *zheader; - if (zcache_is_zero_page(page)) - index = (unsigned long)(page) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT; - else - index = page->index; + if (zcache_is_zero_page(ptr)) + return (unsigned long)(ptr) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT; + + zcache_ptr_to_xv_location(ptr, &page, &offset); + + zheader = kmap_atomic(page, KM_USER0) + offset; + index = zheader->index; + kunmap_atomic(zheader, KM_USER0); return index; } -void zcache_free_page(struct zcache_pool *zpool, struct page *page) +void zcache_free_page(struct zcache_pool *zpool, void *ptr) { int is_zero; + unsigned long flags; - if (unlikely(!page)) + if (unlikely(!ptr)) return; - is_zero = zcache_is_zero_page(page); + is_zero = zcache_is_zero_page(ptr); if (!is_zero) { - int clen = page->private; + int clen; + void *obj; + u32 offset; + struct page *page; + + zcache_ptr_to_xv_location(ptr, &page, &offset); + obj = kmap_atomic(page, KM_USER0) + offset; + clen = xv_get_object_size(obj) - + sizeof(struct zcache_objheader); + kunmap_atomic(obj, KM_USER0); zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -clen); - __free_page(page); + local_irq_save(flags); + xv_free(zpool->xv_pool, page, offset); + local_irq_restore(flags); } zcache_dec_pages(zpool, is_zero); @@ -491,24 +545,23 @@ static int zcache_store_page(struct zcache_inode_rb *znode, pgoff_t index, struct page *page, int is_zero) { int ret; + void *nodeptr; size_t clen; unsigned long flags; + + u32 zoffset; struct page *zpage; unsigned char *zbuffer, *zworkmem; unsigned char *src_data, *dest_data; + + struct zcache_objheader *zheader; struct zcache_pool *zpool = znode->pool; if (is_zero) { - zpage = zcache_index_to_ptr(index); + nodeptr = zcache_index_to_ptr(index); goto out_store; } - zpage = alloc_page(GFP_NOWAIT); - if (!zpage) { - ret = -ENOMEM; - goto out; - } - preempt_disable(); zbuffer = __get_cpu_var(compress_buffer); zworkmem = __get_cpu_var(compress_workmem); @@ -528,17 +581,32 @@ static int zcache_store_page(struct zcache_inode_rb *znode, goto out; } - dest_data = kmap_atomic(zpage, KM_USER0); + local_irq_save(flags); + ret = xv_malloc(zpool->xv_pool, clen + sizeof(*zheader), + &zpage, &zoffset, GFP_NOWAIT); + local_irq_restore(flags); + if (unlikely(ret)) { + ret = -ENOMEM; + preempt_enable(); + goto out; + } + + dest_data = kmap_atomic(zpage, KM_USER0) + zoffset; + + /* Store index value in header */ + zheader = (struct zcache_objheader *)dest_data; + zheader->index = index; + dest_data += sizeof(*zheader); + memcpy(dest_data, zbuffer, clen); kunmap_atomic(dest_data, KM_USER0); preempt_enable(); - zpage->index = index; - zpage->private = clen; + nodeptr = zcache_xv_location_to_ptr(zpage, zoffset); out_store: spin_lock_irqsave(&znode->tree_lock, flags); - ret = radix_tree_insert(&znode->page_tree, index, zpage); + ret = radix_tree_insert(&znode->page_tree, index, nodeptr); if (unlikely(ret)) { spin_unlock_irqrestore(&znode->tree_lock, flags); if (!is_zero) @@ -752,6 +820,19 @@ static ssize_t compr_data_size_show(struct kobject *kobj, } ZCACHE_POOL_ATTR_RO(compr_data_size); +/* + * Total memory used by this pool, including allocator fragmentation + * and metadata overhead. + */ +static ssize_t mem_used_total_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct zcache_pool *zpool = zcache_kobj_to_pool(kobj); + + return sprintf(buf, "%llu\n", xv_get_total_size_bytes(zpool->xv_pool)); +} +ZCACHE_POOL_ATTR_RO(mem_used_total); + static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store) { struct zcache_pool *zpool = zcache_kobj_to_pool(kobj); @@ -795,6 +876,7 @@ static struct attribute *zcache_pool_attrs[] = { &zero_pages_attr.attr, &orig_data_size_attr.attr, &compr_data_size_attr.attr, + &mem_used_total_attr.attr, &memlimit_attr.attr, NULL, }; @@ -904,13 +986,17 @@ static int zcache_init_shared_fs(char *uuid, size_t pagesize) static int zcache_get_page(int pool_id, ino_t inode_no, pgoff_t index, struct page *page) { - int ret = -1; + int ret; + void *nodeptr; size_t clen; unsigned long flags; + + u32 offset; struct page *src_page; unsigned char *src_data, *dest_data; struct zcache_inode_rb *znode; + struct zcache_objheader *zheader; struct zcache_pool *zpool = zcache->pools[pool_id]; znode = zcache_find_inode(zpool, inode_no); @@ -922,29 +1008,35 @@ static int zcache_get_page(int pool_id, ino_t inode_no, BUG_ON(znode->inode_no != inode_no); spin_lock_irqsave(&znode->tree_lock, flags); - src_page = radix_tree_delete(&znode->page_tree, index); + nodeptr = radix_tree_delete(&znode->page_tree, index); if (zcache_inode_is_empty(znode)) zcache_inode_isolate(znode); spin_unlock_irqrestore(&znode->tree_lock, flags); kref_put(&znode->refcount, zcache_inode_release); - if (!src_page) { + if (!nodeptr) { ret = -EFAULT; goto out; } - if (zcache_is_zero_page(src_page)) { + if (zcache_is_zero_page(nodeptr)) { zcache_handle_zero_page(page); goto out_free; } clen = PAGE_SIZE; - src_data = kmap_atomic(src_page, KM_USER0); + zcache_ptr_to_xv_location(nodeptr, &src_page, &offset); + + src_data = kmap_atomic(src_page, KM_USER0) + offset; + zheader = (struct zcache_objheader *)src_data; + BUG_ON(zheader->index != index); + dest_data = kmap_atomic(page, KM_USER1); - ret = lzo1x_decompress_safe(src_data, src_page->private, - dest_data, &clen); + ret = lzo1x_decompress_safe(src_data + sizeof(*zheader), + xv_get_object_size(src_data) - sizeof(*zheader), + dest_data, &clen); kunmap_atomic(src_data, KM_USER0); kunmap_atomic(dest_data, KM_USER1); @@ -956,7 +1048,7 @@ static int zcache_get_page(int pool_id, ino_t inode_no, flush_dcache_page(page); out_free: - zcache_free_page(zpool, src_page); + zcache_free_page(zpool, nodeptr); ret = 0; /* success */ out: diff --git a/drivers/staging/zram/zcache_drv.h b/drivers/staging/zram/zcache_drv.h index 9ce97da..7283116 100644 --- a/drivers/staging/zram/zcache_drv.h +++ b/drivers/staging/zram/zcache_drv.h @@ -41,6 +41,11 @@ static const unsigned zcache_pool_default_memlimit_perc_ram = 10; /* We only keep pages that compress to less than this size */ static const int zcache_max_page_size = PAGE_SIZE / 2; +/* Stored in the beginning of each compressed object */ +struct zcache_objheader { + unsigned long index; +}; + /* Red-Black tree node. Maps inode to its page-tree */ struct zcache_inode_rb { struct radix_tree_root page_tree; /* maps inode index to page */ @@ -64,6 +69,7 @@ struct zcache_pool { seqlock_t memlimit_lock; /* protects memlimit */ u64 memlimit; /* bytes */ + struct xv_pool *xv_pool; /* xvmalloc pool */ struct zcache_pool_stats_cpu *stats; /* percpu stats */ #ifdef CONFIG_SYSFS unsigned char name[MAX_ZPOOL_NAME_LEN]; -- 1.7.1.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/