Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751999AbbFYUWg (ORCPT ); Thu, 25 Jun 2015 16:22:36 -0400 Received: from mail-ig0-f182.google.com ([209.85.213.182]:33478 "EHLO mail-ig0-f182.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751585AbbFYUW1 (ORCPT ); Thu, 25 Jun 2015 16:22:27 -0400 MIME-Version: 1.0 In-Reply-To: References: <1433419358-21820-1-git-send-email-ddstreet@ieee.org> From: Dan Streetman Date: Thu, 25 Jun 2015 16:22:07 -0400 X-Google-Sender-Auth: wyTHhA70Pl1ypHYZjEOumDsixuI Message-ID: Subject: Re: [PATCH] zswap: dynamic pool creation To: Seth Jennings , Andrew Morton Cc: Linux-MM , linux-kernel , Dan Streetman Content-Type: text/plain; charset=UTF-8 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 32259 Lines: 835 On Wed, Jun 17, 2015 at 7:13 PM, Dan Streetman wrote: > On Wed, Jun 10, 2015 at 4:54 PM, Dan Streetman wrote: >> On Thu, Jun 4, 2015 at 8:13 AM, Dan Streetman wrote: >>> On Thu, Jun 4, 2015 at 8:02 AM, Dan Streetman wrote: >>>> Add dynamic creation of pools. Move the static crypto compression >>>> per-cpu transforms into each pool. Add a pointer to zswap_entry to >>>> the pool it's in. >>> >>> Seth, as far as the design, from your previous comments I assume you >>> were thinking of maintaining separate lists of zpools and compressors? >>> I do see how that will reduce duplication of zpools and compressors, >>> but it also requires adding a new pointer to each zswap_entry, and >>> increasing the amount of code to manage each list separately. And the >>> most common case in zswap will be just a single zpool and compressor, >>> not repeatedly changing params. What do you think? >> >> Any opinion on this patch? If you want, I can break it up so there's >> a list of zpools and a list of compressors. Either the combined way >> (this patch) or separate lists works for me, as long as the params are >> changeable at runtime :-) > > You on vacation Seth? Let me know what direction you prefer for this... So... Andrew (or anyone else) do you have any objection to this patch? I assume Seth is on an extended vacation; maybe you could add this to your mmotm? If Seth prefers changes to it he still has a couple months-ish until the next merge window. I could then send the follow-on patches, that allow zswap params to be set at runtime. I can resend them all as a series, if you prefer that. I think the main issue Seth had was keeping separate lists of zpools and compressors, instead of this patch's approach of a single list with each element containing a zpool and compressor together. My preference is a single list, for the reasons I gave above - mainly, that 99+% of the time, zswap will only be using a single zpool and compressor; users will just set the zpool and compressor immediately after boot, instead of having to add boot parameters, and never change them again. So keeping 2 lists only adds complexity for little to no benefit in the common case. > >> >> >>> >>>> >>>> This is required by a separate patch which enables changing the >>>> zswap zpool and compressor params at runtime. >>>> >>>> Signed-off-by: Dan Streetman >>>> --- >>>> mm/zswap.c | 550 +++++++++++++++++++++++++++++++++++++++++++++---------------- >>>> 1 file changed, 408 insertions(+), 142 deletions(-) >>>> >>>> diff --git a/mm/zswap.c b/mm/zswap.c >>>> index 2d5727b..fc93770 100644 >>>> --- a/mm/zswap.c >>>> +++ b/mm/zswap.c >>>> @@ -99,66 +99,19 @@ module_param_named(zpool, zswap_zpool_type, charp, 0444); >>>> static struct zpool *zswap_pool; >>>> >>>> /********************************* >>>> -* compression functions >>>> +* data structures >>>> **********************************/ >>>> -/* per-cpu compression transforms */ >>>> -static struct crypto_comp * __percpu *zswap_comp_pcpu_tfms; >>>> >>>> -enum comp_op { >>>> - ZSWAP_COMPOP_COMPRESS, >>>> - ZSWAP_COMPOP_DECOMPRESS >>>> +struct zswap_pool { >>>> + struct zpool *zpool; >>>> + struct kref kref; >>>> + struct list_head list; >>>> + struct rcu_head rcu_head; >>>> + struct notifier_block notifier; >>>> + char tfm_name[CRYPTO_MAX_ALG_NAME]; >>>> + struct crypto_comp * __percpu *tfm; >>>> }; >>>> >>>> -static int zswap_comp_op(enum comp_op op, const u8 *src, unsigned int slen, >>>> - u8 *dst, unsigned int *dlen) >>>> -{ >>>> - struct crypto_comp *tfm; >>>> - int ret; >>>> - >>>> - tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, get_cpu()); >>>> - switch (op) { >>>> - case ZSWAP_COMPOP_COMPRESS: >>>> - ret = crypto_comp_compress(tfm, src, slen, dst, dlen); >>>> - break; >>>> - case ZSWAP_COMPOP_DECOMPRESS: >>>> - ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); >>>> - break; >>>> - default: >>>> - ret = -EINVAL; >>>> - } >>>> - >>>> - put_cpu(); >>>> - return ret; >>>> -} >>>> - >>>> -static int __init zswap_comp_init(void) >>>> -{ >>>> - if (!crypto_has_comp(zswap_compressor, 0, 0)) { >>>> - pr_info("%s compressor not available\n", zswap_compressor); >>>> - /* fall back to default compressor */ >>>> - zswap_compressor = ZSWAP_COMPRESSOR_DEFAULT; >>>> - if (!crypto_has_comp(zswap_compressor, 0, 0)) >>>> - /* can't even load the default compressor */ >>>> - return -ENODEV; >>>> - } >>>> - pr_info("using %s compressor\n", zswap_compressor); >>>> - >>>> - /* alloc percpu transforms */ >>>> - zswap_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); >>>> - if (!zswap_comp_pcpu_tfms) >>>> - return -ENOMEM; >>>> - return 0; >>>> -} >>>> - >>>> -static void __init zswap_comp_exit(void) >>>> -{ >>>> - /* free percpu transforms */ >>>> - free_percpu(zswap_comp_pcpu_tfms); >>>> -} >>>> - >>>> -/********************************* >>>> -* data structures >>>> -**********************************/ >>>> /* >>>> * struct zswap_entry >>>> * >>>> @@ -166,22 +119,24 @@ static void __init zswap_comp_exit(void) >>>> * page within zswap. >>>> * >>>> * rbnode - links the entry into red-black tree for the appropriate swap type >>>> + * offset - the swap offset for the entry. Index into the red-black tree. >>>> * refcount - the number of outstanding reference to the entry. This is needed >>>> * to protect against premature freeing of the entry by code >>>> * concurrent calls to load, invalidate, and writeback. The lock >>>> * for the zswap_tree structure that contains the entry must >>>> * be held while changing the refcount. Since the lock must >>>> * be held, there is no reason to also make refcount atomic. >>>> - * offset - the swap offset for the entry. Index into the red-black tree. >>>> - * handle - zpool allocation handle that stores the compressed page data >>>> * length - the length in bytes of the compressed page data. Needed during >>>> * decompression >>>> + * pool - the zswap_pool the entry's data is in >>>> + * handle - zpool allocation handle that stores the compressed page data >>>> */ >>>> struct zswap_entry { >>>> struct rb_node rbnode; >>>> pgoff_t offset; >>>> int refcount; >>>> unsigned int length; >>>> + struct zswap_pool *pool; >>>> unsigned long handle; >>>> }; >>>> >>>> @@ -201,6 +156,44 @@ struct zswap_tree { >>>> >>>> static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; >>>> >>>> +/* RCU-protected iteration */ >>>> +static LIST_HEAD(zswap_pools); >>>> +/* protects zswap_pools list modification */ >>>> +static DEFINE_SPINLOCK(zswap_pools_lock); >>>> + >>>> +/********************************* >>>> +* helpers and fwd declarations >>>> +**********************************/ >>>> + >>>> +#define zswap_pool_debug(msg, p) \ >>>> + pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ >>>> + zpool_get_type((p)->zpool)) >>>> + >>>> +static int zswap_writeback_entry(struct zpool *pool, unsigned long handle); >>>> +static int zswap_pool_get(struct zswap_pool *pool); >>>> +static void zswap_pool_put(struct zswap_pool *pool); >>>> + >>>> +static bool zswap_is_full(void) >>>> +{ >>>> + return totalram_pages * zswap_max_pool_percent / 100 < >>>> + DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); >>>> +} >>>> + >>>> +static void zswap_update_total_size(void) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + u64 total = 0; >>>> + >>>> + rcu_read_lock(); >>>> + >>>> + list_for_each_entry_rcu(pool, &zswap_pools, list) >>>> + total += zpool_get_total_size(pool->zpool); >>>> + >>>> + rcu_read_unlock(); >>>> + >>>> + zswap_pool_total_size = total; >>>> +} >>>> + >>>> /********************************* >>>> * zswap entry functions >>>> **********************************/ >>>> @@ -294,10 +287,11 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry) >>>> */ >>>> static void zswap_free_entry(struct zswap_entry *entry) >>>> { >>>> - zpool_free(zswap_pool, entry->handle); >>>> + zpool_free(entry->pool->zpool, entry->handle); >>>> + zswap_pool_put(entry->pool); >>>> zswap_entry_cache_free(entry); >>>> atomic_dec(&zswap_stored_pages); >>>> - zswap_pool_total_size = zpool_get_total_size(zswap_pool); >>>> + zswap_update_total_size(); >>>> } >>>> >>>> /* caller must hold the tree lock */ >>>> @@ -339,35 +333,21 @@ static struct zswap_entry *zswap_entry_find_get(struct rb_root *root, >>>> **********************************/ >>>> static DEFINE_PER_CPU(u8 *, zswap_dstmem); >>>> >>>> -static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) >>>> +static int __zswap_cpu_dstmem_notifier(unsigned long action, unsigned long cpu) >>>> { >>>> - struct crypto_comp *tfm; >>>> u8 *dst; >>>> >>>> switch (action) { >>>> case CPU_UP_PREPARE: >>>> - tfm = crypto_alloc_comp(zswap_compressor, 0, 0); >>>> - if (IS_ERR(tfm)) { >>>> - pr_err("can't allocate compressor transform\n"); >>>> - return NOTIFY_BAD; >>>> - } >>>> - *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = tfm; >>>> dst = kmalloc_node(PAGE_SIZE * 2, GFP_KERNEL, cpu_to_node(cpu)); >>>> if (!dst) { >>>> pr_err("can't allocate compressor buffer\n"); >>>> - crypto_free_comp(tfm); >>>> - *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; >>>> return NOTIFY_BAD; >>>> } >>>> per_cpu(zswap_dstmem, cpu) = dst; >>>> break; >>>> case CPU_DEAD: >>>> case CPU_UP_CANCELED: >>>> - tfm = *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu); >>>> - if (tfm) { >>>> - crypto_free_comp(tfm); >>>> - *per_cpu_ptr(zswap_comp_pcpu_tfms, cpu) = NULL; >>>> - } >>>> dst = per_cpu(zswap_dstmem, cpu); >>>> kfree(dst); >>>> per_cpu(zswap_dstmem, cpu) = NULL; >>>> @@ -378,43 +358,309 @@ static int __zswap_cpu_notifier(unsigned long action, unsigned long cpu) >>>> return NOTIFY_OK; >>>> } >>>> >>>> -static int zswap_cpu_notifier(struct notifier_block *nb, >>>> - unsigned long action, void *pcpu) >>>> +static int zswap_cpu_dstmem_notifier(struct notifier_block *nb, >>>> + unsigned long action, void *pcpu) >>>> { >>>> - unsigned long cpu = (unsigned long)pcpu; >>>> - return __zswap_cpu_notifier(action, cpu); >>>> + return __zswap_cpu_dstmem_notifier(action, (unsigned long)pcpu); >>>> } >>>> >>>> -static struct notifier_block zswap_cpu_notifier_block = { >>>> - .notifier_call = zswap_cpu_notifier >>>> +static struct notifier_block zswap_dstmem_notifier = { >>>> + .notifier_call = zswap_cpu_dstmem_notifier, >>>> }; >>>> >>>> -static int __init zswap_cpu_init(void) >>>> +static int __init zswap_cpu_dstmem_init(void) >>>> { >>>> unsigned long cpu; >>>> >>>> cpu_notifier_register_begin(); >>>> for_each_online_cpu(cpu) >>>> - if (__zswap_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) >>>> + if (__zswap_cpu_dstmem_notifier(CPU_UP_PREPARE, cpu) == >>>> + NOTIFY_BAD) >>>> goto cleanup; >>>> - __register_cpu_notifier(&zswap_cpu_notifier_block); >>>> + __register_cpu_notifier(&zswap_dstmem_notifier); >>>> cpu_notifier_register_done(); >>>> return 0; >>>> >>>> cleanup: >>>> for_each_online_cpu(cpu) >>>> - __zswap_cpu_notifier(CPU_UP_CANCELED, cpu); >>>> + __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu); >>>> cpu_notifier_register_done(); >>>> return -ENOMEM; >>>> } >>>> >>>> +static void zswap_cpu_dstmem_destroy(void) >>>> +{ >>>> + unsigned long cpu; >>>> + >>>> + cpu_notifier_register_begin(); >>>> + for_each_online_cpu(cpu) >>>> + __zswap_cpu_dstmem_notifier(CPU_UP_CANCELED, cpu); >>>> + __unregister_cpu_notifier(&zswap_dstmem_notifier); >>>> + cpu_notifier_register_done(); >>>> +} >>>> + >>>> +static int __zswap_cpu_comp_notifier(struct zswap_pool *pool, >>>> + unsigned long action, unsigned long cpu) >>>> +{ >>>> + struct crypto_comp *tfm; >>>> + >>>> + switch (action) { >>>> + case CPU_UP_PREPARE: >>>> + if (WARN_ON(*per_cpu_ptr(pool->tfm, cpu))) >>>> + break; >>>> + tfm = crypto_alloc_comp(pool->tfm_name, 0, 0); >>>> + if (IS_ERR_OR_NULL(tfm)) { >>>> + pr_err("could not alloc crypto comp %s : %ld\n", >>>> + pool->tfm_name, PTR_ERR(tfm)); >>>> + return NOTIFY_BAD; >>>> + } >>>> + *per_cpu_ptr(pool->tfm, cpu) = tfm; >>>> + break; >>>> + case CPU_DEAD: >>>> + case CPU_UP_CANCELED: >>>> + tfm = *per_cpu_ptr(pool->tfm, cpu); >>>> + if (!IS_ERR_OR_NULL(tfm)) >>>> + crypto_free_comp(tfm); >>>> + *per_cpu_ptr(pool->tfm, cpu) = NULL; >>>> + break; >>>> + default: >>>> + break; >>>> + } >>>> + return NOTIFY_OK; >>>> +} >>>> + >>>> +static int zswap_cpu_comp_notifier(struct notifier_block *nb, >>>> + unsigned long action, void *pcpu) >>>> +{ >>>> + unsigned long cpu = (unsigned long)pcpu; >>>> + struct zswap_pool *pool = container_of(nb, typeof(*pool), notifier); >>>> + >>>> + return __zswap_cpu_comp_notifier(pool, action, cpu); >>>> +} >>>> + >>>> +static int zswap_cpu_comp_init(struct zswap_pool *pool) >>>> +{ >>>> + unsigned long cpu; >>>> + >>>> + memset(&pool->notifier, 0, sizeof(pool->notifier)); >>>> + pool->notifier.notifier_call = zswap_cpu_comp_notifier; >>>> + >>>> + cpu_notifier_register_begin(); >>>> + for_each_online_cpu(cpu) >>>> + if (__zswap_cpu_comp_notifier(pool, CPU_UP_PREPARE, cpu) == >>>> + NOTIFY_BAD) >>>> + goto cleanup; >>>> + __register_cpu_notifier(&pool->notifier); >>>> + cpu_notifier_register_done(); >>>> + return 0; >>>> + >>>> +cleanup: >>>> + for_each_online_cpu(cpu) >>>> + __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu); >>>> + cpu_notifier_register_done(); >>>> + return -ENOMEM; >>>> +} >>>> + >>>> +static void zswap_cpu_comp_destroy(struct zswap_pool *pool) >>>> +{ >>>> + unsigned long cpu; >>>> + >>>> + cpu_notifier_register_begin(); >>>> + for_each_online_cpu(cpu) >>>> + __zswap_cpu_comp_notifier(pool, CPU_UP_CANCELED, cpu); >>>> + __unregister_cpu_notifier(&pool->notifier); >>>> + cpu_notifier_register_done(); >>>> +} >>>> + >>>> /********************************* >>>> -* helpers >>>> +* pool functions >>>> **********************************/ >>>> -static bool zswap_is_full(void) >>>> + >>>> +static struct zswap_pool *__zswap_pool_current(void) >>>> { >>>> - return totalram_pages * zswap_max_pool_percent / 100 < >>>> - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); >>>> + struct zswap_pool *pool; >>>> + >>>> + pool = list_first_or_null_rcu(&zswap_pools, typeof(*pool), list); >>>> + WARN_ON(!pool); >>>> + >>>> + return pool; >>>> +} >>>> + >>>> +static struct zswap_pool *zswap_pool_current(void) >>>> +{ >>>> + assert_spin_locked(&zswap_pools_lock); >>>> + >>>> + return __zswap_pool_current(); >>>> +} >>>> + >>>> +static struct zswap_pool *zswap_pool_current_get(void) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + >>>> + rcu_read_lock(); >>>> + >>>> + pool = __zswap_pool_current(); >>>> + if (!pool || !zswap_pool_get(pool)) >>>> + pool = NULL; >>>> + >>>> + rcu_read_unlock(); >>>> + >>>> + return pool; >>>> +} >>>> + >>>> +static struct zswap_pool *zswap_pool_last_get(void) >>>> +{ >>>> + struct zswap_pool *pool, *last = NULL; >>>> + >>>> + rcu_read_lock(); >>>> + >>>> + list_for_each_entry_rcu(pool, &zswap_pools, list) >>>> + last = pool; >>>> + if (!WARN_ON(!last) && !zswap_pool_get(last)) >>>> + last = NULL; >>>> + >>>> + rcu_read_unlock(); >>>> + >>>> + return last; >>>> +} >>>> + >>>> +static struct zpool_ops zswap_zpool_ops = { >>>> + .evict = zswap_writeback_entry >>>> +}; >>>> + >>>> +static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + >>>> + assert_spin_locked(&zswap_pools_lock); >>>> + >>>> + list_for_each_entry_rcu(pool, &zswap_pools, list) { >>>> + if (strncmp(pool->tfm_name, compressor, sizeof(pool->tfm_name))) >>>> + continue; >>>> + if (strncmp(zpool_get_type(pool->zpool), type, >>>> + sizeof(zswap_zpool_type))) >>>> + continue; >>>> + /* if we can't get it, it's about to be destroyed */ >>>> + if (!zswap_pool_get(pool)) >>>> + continue; >>>> + return pool; >>>> + } >>>> + >>>> + return NULL; >>>> +} >>>> + >>>> +static struct zswap_pool *zswap_pool_create(char *type, char *compressor) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; >>>> + >>>> + pool = kzalloc(sizeof(*pool), GFP_KERNEL); >>>> + if (!pool) { >>>> + pr_err("pool alloc failed\n"); >>>> + return NULL; >>>> + } >>>> + >>>> + pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops); >>>> + if (!pool->zpool) { >>>> + pr_err("%s zpool not available\n", type); >>>> + goto error; >>>> + } >>>> + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); >>>> + >>>> + strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); >>>> + pool->tfm = alloc_percpu(struct crypto_comp *); >>>> + if (!pool->tfm) { >>>> + pr_err("percpu alloc failed\n"); >>>> + goto error; >>>> + } >>>> + >>>> + if (zswap_cpu_comp_init(pool)) >>>> + goto error; >>>> + pr_debug("using %s compressor\n", pool->tfm_name); >>>> + >>>> + /* being the current pool takes 1 ref; this func expects the >>>> + * caller to always add the new pool as the current pool >>>> + */ >>>> + kref_init(&pool->kref); >>>> + INIT_LIST_HEAD(&pool->list); >>>> + >>>> + zswap_pool_debug("created", pool); >>>> + >>>> + return pool; >>>> + >>>> +error: >>>> + free_percpu(pool->tfm); >>>> + if (pool->zpool) >>>> + zpool_destroy_pool(pool->zpool); >>>> + kfree(pool); >>>> + return NULL; >>>> +} >>>> + >>>> +static struct zswap_pool *__zswap_pool_create_fallback(void) >>>> +{ >>>> + if (!crypto_has_comp(zswap_compressor, 0, 0)) { >>>> + pr_err("compressor %s not available, using default %s\n", >>>> + zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT); >>>> + strncpy(zswap_compressor, ZSWAP_COMPRESSOR_DEFAULT, >>>> + sizeof(zswap_compressor)); >>>> + } >>>> + if (!zpool_has_pool(zswap_zpool_type)) { >>>> + pr_err("zpool %s not available, using default %s\n", >>>> + zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT); >>>> + strncpy(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT, >>>> + sizeof(zswap_zpool_type)); >>>> + } >>>> + >>>> + return zswap_pool_create(zswap_zpool_type, zswap_compressor); >>>> +} >>>> + >>>> +static void zswap_pool_destroy(struct zswap_pool *pool) >>>> +{ >>>> + zswap_pool_debug("destroying", pool); >>>> + >>>> + zswap_cpu_comp_destroy(pool); >>>> + free_percpu(pool->tfm); >>>> + zpool_destroy_pool(pool->zpool); >>>> + kfree(pool); >>>> +} >>>> + >>>> +static int __must_check zswap_pool_get(struct zswap_pool *pool) >>>> +{ >>>> + return kref_get_unless_zero(&pool->kref); >>>> +} >>>> + >>>> +static void __zswap_pool_release(struct rcu_head *head) >>>> +{ >>>> + struct zswap_pool *pool = container_of(head, typeof(*pool), rcu_head); >>>> + >>>> + /* nobody should have been able to get a kref... */ >>>> + WARN_ON(kref_get_unless_zero(&pool->kref)); >>>> + >>>> + /* pool is now off zswap_pools list and has no references. */ >>>> + zswap_pool_destroy(pool); >>>> +} >>>> + >>>> +static void __zswap_pool_empty(struct kref *kref) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + >>>> + pool = container_of(kref, typeof(*pool), kref); >>>> + >>>> + spin_lock(&zswap_pools_lock); >>>> + >>>> + WARN_ON(pool == zswap_pool_current()); >>>> + >>>> + list_del_rcu(&pool->list); >>>> + call_rcu(&pool->rcu_head, __zswap_pool_release); >>>> + >>>> + spin_unlock(&zswap_pools_lock); >>>> +} >>>> + >>>> +static void zswap_pool_put(struct zswap_pool *pool) >>>> +{ >>>> + kref_put(&pool->kref, __zswap_pool_empty); >>>> +} >>>> + >>>> } >>>> >>>> /********************************* >>>> @@ -538,6 +784,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) >>>> pgoff_t offset; >>>> struct zswap_entry *entry; >>>> struct page *page; >>>> + struct crypto_comp *tfm; >>>> u8 *src, *dst; >>>> unsigned int dlen; >>>> int ret; >>>> @@ -578,13 +825,15 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) >>>> case ZSWAP_SWAPCACHE_NEW: /* page is locked */ >>>> /* decompress */ >>>> dlen = PAGE_SIZE; >>>> - src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, >>>> + src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, >>>> ZPOOL_MM_RO) + sizeof(struct zswap_header); >>>> dst = kmap_atomic(page); >>>> - ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, >>>> - entry->length, dst, &dlen); >>>> + tfm = *get_cpu_ptr(entry->pool->tfm); >>>> + ret = crypto_comp_decompress(tfm, src, entry->length, >>>> + dst, &dlen); >>>> + put_cpu_ptr(entry->pool->tfm); >>>> kunmap_atomic(dst); >>>> - zpool_unmap_handle(zswap_pool, entry->handle); >>>> + zpool_unmap_handle(entry->pool->zpool, entry->handle); >>>> BUG_ON(ret); >>>> BUG_ON(dlen != PAGE_SIZE); >>>> >>>> @@ -633,6 +882,22 @@ end: >>>> return ret; >>>> } >>>> >>>> +static int zswap_shrink(void) >>>> +{ >>>> + struct zswap_pool *pool; >>>> + int ret; >>>> + >>>> + pool = zswap_pool_last_get(); >>>> + if (!pool) >>>> + return -ENOENT; >>>> + >>>> + ret = zpool_shrink(pool->zpool, 1, NULL); >>>> + >>>> + zswap_pool_put(pool); >>>> + >>>> + return ret; >>>> +} >>>> + >>>> /********************************* >>>> * frontswap hooks >>>> **********************************/ >>>> @@ -642,6 +907,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, >>>> { >>>> struct zswap_tree *tree = zswap_trees[type]; >>>> struct zswap_entry *entry, *dupentry; >>>> + struct crypto_comp *tfm; >>>> int ret; >>>> unsigned int dlen = PAGE_SIZE, len; >>>> unsigned long handle; >>>> @@ -657,7 +923,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, >>>> /* reclaim space if needed */ >>>> if (zswap_is_full()) { >>>> zswap_pool_limit_hit++; >>>> - if (zpool_shrink(zswap_pool, 1, NULL)) { >>>> + if (zswap_shrink()) { >>>> zswap_reject_reclaim_fail++; >>>> ret = -ENOMEM; >>>> goto reject; >>>> @@ -672,33 +938,42 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, >>>> goto reject; >>>> } >>>> >>>> + /* if entry is successfully added, it keeps the reference */ >>>> + entry->pool = zswap_pool_current_get(); >>>> + if (!entry->pool) { >>>> + ret = -EINVAL; >>>> + goto freepage; >>>> + } >>>> + >>>> /* compress */ >>>> dst = get_cpu_var(zswap_dstmem); >>>> + tfm = *get_cpu_ptr(entry->pool->tfm); >>>> src = kmap_atomic(page); >>>> - ret = zswap_comp_op(ZSWAP_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &dlen); >>>> + ret = crypto_comp_compress(tfm, src, PAGE_SIZE, dst, &dlen); >>>> kunmap_atomic(src); >>>> + put_cpu_ptr(entry->pool->tfm); >>>> if (ret) { >>>> ret = -EINVAL; >>>> - goto freepage; >>>> + goto put_dstmem; >>>> } >>>> >>>> /* store */ >>>> len = dlen + sizeof(struct zswap_header); >>>> - ret = zpool_malloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN, >>>> - &handle); >>>> + ret = zpool_malloc(entry->pool->zpool, len, >>>> + __GFP_NORETRY | __GFP_NOWARN, &handle); >>>> if (ret == -ENOSPC) { >>>> zswap_reject_compress_poor++; >>>> - goto freepage; >>>> + goto put_dstmem; >>>> } >>>> if (ret) { >>>> zswap_reject_alloc_fail++; >>>> - goto freepage; >>>> + goto put_dstmem; >>>> } >>>> - zhdr = zpool_map_handle(zswap_pool, handle, ZPOOL_MM_RW); >>>> + zhdr = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_RW); >>>> zhdr->swpentry = swp_entry(type, offset); >>>> buf = (u8 *)(zhdr + 1); >>>> memcpy(buf, dst, dlen); >>>> - zpool_unmap_handle(zswap_pool, handle); >>>> + zpool_unmap_handle(entry->pool->zpool, handle); >>>> put_cpu_var(zswap_dstmem); >>>> >>>> /* populate entry */ >>>> @@ -721,12 +996,14 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset, >>>> >>>> /* update stats */ >>>> atomic_inc(&zswap_stored_pages); >>>> - zswap_pool_total_size = zpool_get_total_size(zswap_pool); >>>> + zswap_update_total_size(); >>>> >>>> return 0; >>>> >>>> -freepage: >>>> +put_dstmem: >>>> put_cpu_var(zswap_dstmem); >>>> + zswap_pool_put(entry->pool); >>>> +freepage: >>>> zswap_entry_cache_free(entry); >>>> reject: >>>> return ret; >>>> @@ -741,6 +1018,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, >>>> { >>>> struct zswap_tree *tree = zswap_trees[type]; >>>> struct zswap_entry *entry; >>>> + struct crypto_comp *tfm; >>>> u8 *src, *dst; >>>> unsigned int dlen; >>>> int ret; >>>> @@ -757,13 +1035,14 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, >>>> >>>> /* decompress */ >>>> dlen = PAGE_SIZE; >>>> - src = (u8 *)zpool_map_handle(zswap_pool, entry->handle, >>>> + src = (u8 *)zpool_map_handle(entry->pool->zpool, entry->handle, >>>> ZPOOL_MM_RO) + sizeof(struct zswap_header); >>>> dst = kmap_atomic(page); >>>> - ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, >>>> - dst, &dlen); >>>> + tfm = *get_cpu_ptr(entry->pool->tfm); >>>> + ret = crypto_comp_decompress(tfm, src, entry->length, dst, &dlen); >>>> + put_cpu_ptr(entry->pool->tfm); >>>> kunmap_atomic(dst); >>>> - zpool_unmap_handle(zswap_pool, entry->handle); >>>> + zpool_unmap_handle(entry->pool->zpool, entry->handle); >>>> BUG_ON(ret); >>>> >>>> spin_lock(&tree->lock); >>>> @@ -816,10 +1095,6 @@ static void zswap_frontswap_invalidate_area(unsigned type) >>>> zswap_trees[type] = NULL; >>>> } >>>> >>>> -static struct zpool_ops zswap_zpool_ops = { >>>> - .evict = zswap_writeback_entry >>>> -}; >>>> - >>>> static void zswap_frontswap_init(unsigned type) >>>> { >>>> struct zswap_tree *tree; >>>> @@ -900,49 +1175,40 @@ static void __exit zswap_debugfs_exit(void) { } >>>> **********************************/ >>>> static int __init init_zswap(void) >>>> { >>>> - gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN; >>>> + struct zswap_pool *pool; >>>> >>>> - pr_info("loading zswap\n"); >>>> - >>>> - zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, >>>> - &zswap_zpool_ops); >>>> - if (!zswap_pool && strcmp(zswap_zpool_type, ZSWAP_ZPOOL_DEFAULT)) { >>>> - pr_info("%s zpool not available\n", zswap_zpool_type); >>>> - zswap_zpool_type = ZSWAP_ZPOOL_DEFAULT; >>>> - zswap_pool = zpool_create_pool(zswap_zpool_type, "zswap", gfp, >>>> - &zswap_zpool_ops); >>>> - } >>>> - if (!zswap_pool) { >>>> - pr_err("%s zpool not available\n", zswap_zpool_type); >>>> - pr_err("zpool creation failed\n"); >>>> - goto error; >>>> - } >>>> - pr_info("using %s pool\n", zswap_zpool_type); >>>> + pr_info("loading\n"); >>>> >>>> if (zswap_entry_cache_create()) { >>>> pr_err("entry cache creation failed\n"); >>>> - goto cachefail; >>>> + goto cache_fail; >>>> } >>>> - if (zswap_comp_init()) { >>>> - pr_err("compressor initialization failed\n"); >>>> - goto compfail; >>>> + >>>> + if (zswap_cpu_dstmem_init()) { >>>> + pr_err("dstmem alloc failed\n"); >>>> + goto dstmem_fail; >>>> } >>>> - if (zswap_cpu_init()) { >>>> - pr_err("per-cpu initialization failed\n"); >>>> - goto pcpufail; >>>> + >>>> + pool = __zswap_pool_create_fallback(); >>>> + if (!pool) { >>>> + pr_err("pool creation failed\n"); >>>> + goto pool_fail; >>>> } >>>> + pr_info("loaded using pool %s/%s\n", pool->tfm_name, >>>> + zpool_get_type(pool->zpool)); >>>> + >>>> + list_add(&pool->list, &zswap_pools); >>>> >>>> frontswap_register_ops(&zswap_frontswap_ops); >>>> if (zswap_debugfs_init()) >>>> pr_warn("debugfs initialization failed\n"); >>>> return 0; >>>> -pcpufail: >>>> - zswap_comp_exit(); >>>> -compfail: >>>> + >>>> +pool_fail: >>>> + zswap_cpu_dstmem_destroy(); >>>> +dstmem_fail: >>>> zswap_entry_cache_destroy(); >>>> -cachefail: >>>> - zpool_destroy_pool(zswap_pool); >>>> -error: >>>> +cache_fail: >>>> return -ENOMEM; >>>> } >>>> /* must be late so crypto has time to come up */ >>>> -- >>>> 2.1.0 >>>> -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/