From: =?UTF-8?Q?Andreas_Gr=C3=BCnbacher?= Subject: Re: [PATCH 1/6] mbcache2: Reimplement mbcache Date: Sat, 12 Dec 2015 00:58:30 +0100 Message-ID: References: <1449683858-28936-1-git-send-email-jack@suse.cz> <1449683858-28936-2-git-send-email-jack@suse.cz> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Cc: Ted Tso , linux-ext4@vger.kernel.org, Laurent GUERBY , Andreas Dilger To: Jan Kara Return-path: Received: from mail-wm0-f48.google.com ([74.125.82.48]:33081 "EHLO mail-wm0-f48.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754479AbbLKX6b (ORCPT ); Fri, 11 Dec 2015 18:58:31 -0500 Received: by mail-wm0-f48.google.com with SMTP id c201so93053811wme.0 for ; Fri, 11 Dec 2015 15:58:30 -0800 (PST) In-Reply-To: <1449683858-28936-2-git-send-email-jack@suse.cz> Sender: linux-ext4-owner@vger.kernel.org List-ID: Jan, 2015-12-09 18:57 GMT+01:00 Jan Kara : > diff --git a/fs/mbcache2.c b/fs/mbcache2.c > new file mode 100644 > index 000000000000..4ccf0752c6d1 > --- /dev/null > +++ b/fs/mbcache2.c > @@ -0,0 +1,388 @@ > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +/* > + * Mbcache is a simple key-value store. > + Keys need not be unique, however > + * key-value pairs are expected to be unique (we use this in > + * mb2_cache_entry_delete_block()). This comment is very confusing. Could you say what the keys and values are and what that kind of cache is used for so that people will have a chance of understanding what's going on? > + * We provide functions for creation and removal of entries, search by key, > + * and a special "delete entry with given key-value pair" operation. Fixed > + * size hash table is used for fast key lookups. > + */ Have you had a look at rhashtables? They would give us lockless lookups and they would automatically grow, at somewhat more complexity. > +struct mb2_cache { > + /* Hash table of entries */ > + struct hlist_bl_head *c_hash; > + /* log2 of hash table size */ > + int c_bucket_bits; > + /* Protects c_lru_list, c_entry_count */ > + spinlock_t c_lru_list_lock; > + struct list_head c_lru_list; > + /* Number of entries in cache */ > + unsigned long c_entry_count; > + struct shrinker c_shrink; > +}; > + > +static struct kmem_cache *mb2_entry_cache; > + > +/* > + * mb2_cache_entry_create - create entry in cache > + * @cache - cache where the entry should be created > + * @mask - gfp mask with which the entry should be allocated > + * @key - key of the entry > + * @block - block that contains data > + * > + * Creates entry in @cache with key @key and records that data is stored in > + * block @block. The function returns -EBUSY if entry with the same key > + * and for the same block already exists in cache. Otherwise reference to > + * the created entry is returned. > + */ > +struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, > + gfp_t mask, > + unsigned int key, > + sector_t block) > +{ > + struct mb2_cache_entry *entry, *dup; > + struct hlist_bl_node *dup_node; > + struct hlist_bl_head *head; > + > + entry = kmem_cache_alloc(mb2_entry_cache, mask); > + if (!entry) > + return ERR_PTR(-ENOMEM); > + > + INIT_LIST_HEAD(&entry->e_lru_list); > + /* One ref for hash, one ref returned */ > + atomic_set(&entry->e_refcnt, 2); > + entry->e_key = key; > + entry->e_block = block; > + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; > + entry->e_hash_list_head = head; > + hlist_bl_lock(head); > + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { > + if (dup->e_key == key && dup->e_block == block) { > + hlist_bl_unlock(head); > + kmem_cache_free(mb2_entry_cache, entry); > + return ERR_PTR(-EBUSY); > + } > + } > + hlist_bl_add_head(&entry->e_hash_list, head); > + hlist_bl_unlock(head); > + > + spin_lock(&cache->c_lru_list_lock); > + list_add_tail(&entry->e_lru_list, &cache->c_lru_list); > + /* Grab ref for LRU list */ > + atomic_inc(&entry->e_refcnt); > + cache->c_entry_count++; > + spin_unlock(&cache->c_lru_list_lock); > + > + return entry; > +} > +EXPORT_SYMBOL(mb2_cache_entry_create); > + > +void __mb2_cache_entry_free(struct mb2_cache_entry *entry) > +{ > + kmem_cache_free(mb2_entry_cache, entry); > +} > +EXPORT_SYMBOL(__mb2_cache_entry_free); > + > +/* > + * mb2_cache_entry_delete - delete entry from cache > + * @cache - cache where the entry is > + * @entry - entry to delete > + * > + * Delete entry from cache. The entry is unhashed and deleted from the lru list > + * so it cannot be found. We also drop the reference to @entry caller gave us. > + * However entry need not be freed if there's someone else still holding a > + * reference to it. Freeing happens when the last reference is dropped. > + */ > +void mb2_cache_entry_delete(struct mb2_cache *cache, > + struct mb2_cache_entry *entry) This function should become static; there are no external users. > +{ > + struct hlist_bl_head *head = entry->e_hash_list_head; > + > + hlist_bl_lock(head); > + if (!hlist_bl_unhashed(&entry->e_hash_list)) { > + hlist_bl_del_init(&entry->e_hash_list); > + atomic_dec(&entry->e_refcnt); > + } > + hlist_bl_unlock(head); > + spin_lock(&cache->c_lru_list_lock); > + if (!list_empty(&entry->e_lru_list)) { > + list_del_init(&entry->e_lru_list); > + cache->c_entry_count--; > + atomic_dec(&entry->e_refcnt); > + } > + spin_unlock(&cache->c_lru_list_lock); > + mb2_cache_entry_put(cache, entry); > +} > +EXPORT_SYMBOL(mb2_cache_entry_delete); > + > +static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache, > + struct mb2_cache_entry *entry, > + unsigned int key) > +{ > + struct mb2_cache_entry *old_entry = entry; > + struct hlist_bl_node *node; > + struct hlist_bl_head *head; > + > + if (entry) > + head = entry->e_hash_list_head; > + else > + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; > + hlist_bl_lock(head); > + if (entry && !hlist_bl_unhashed(&entry->e_hash_list)) > + node = entry->e_hash_list.next; > + else > + node = hlist_bl_first(head); > + while (node) { > + entry = hlist_bl_entry(node, struct mb2_cache_entry, > + e_hash_list); > + if (entry->e_key == key) { > + atomic_inc(&entry->e_refcnt); > + goto out; > + } > + node = node->next; > + } > + entry = NULL; > +out: > + hlist_bl_unlock(head); > + if (old_entry) > + mb2_cache_entry_put(cache, old_entry); > + > + return entry; > +} > + > +/* > + * mb2_cache_entry_find_first - find the first entry in cache with given key > + * @cache: cache where we should search > + * @key: key to look for > + * > + * Search in @cache for entry with key @key. Grabs reference to the first > + * entry found and returns the entry. > + */ > +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, > + unsigned int key) > +{ > + return __entry_find(cache, NULL, key); > +} > +EXPORT_SYMBOL(mb2_cache_entry_find_first); > + > +/* > + * mb2_cache_entry_find_next - find next entry in cache with the same > + * @cache: cache where we should search > + * @entry: entry to start search from > + * > + * Finds next entry in the hash chain which has the same key as @entry. > + * If @entry is unhashed (which can happen when deletion of entry races > + * with the search), finds the first entry in the hash chain. The function > + * drops reference to @entry and returns with a reference to the found entry. > + */ > +struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, > + struct mb2_cache_entry *entry) > +{ > + return __entry_find(cache, entry, entry->e_key); > +} > +EXPORT_SYMBOL(mb2_cache_entry_find_next); > + > +/* mb2_cache_entry_delete_block - remove information about block from cache > + * @cache - cache we work with > + * @key - key of the entry to remove > + * @block - block containing data for @key > + * > + * Remove entry from cache @cache with key @key with data stored in @block. > + */ > +void mb2_cache_entry_delete_block(struct mb2_cache *cache, unsigned int key, > + sector_t block) > +{ > + struct hlist_bl_node *node; > + struct hlist_bl_head *head; > + struct mb2_cache_entry *entry; > + > + head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)]; > + hlist_bl_lock(head); > + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { > + if (entry->e_key == key && entry->e_block == block) { > + /* We keep hash list reference to keep entry alive */ > + hlist_bl_del_init(&entry->e_hash_list); > + hlist_bl_unlock(head); > + spin_lock(&cache->c_lru_list_lock); > + if (!list_empty(&entry->e_lru_list)) { > + list_del_init(&entry->e_lru_list); > + cache->c_entry_count--; > + atomic_dec(&entry->e_refcnt); > + } > + spin_unlock(&cache->c_lru_list_lock); > + mb2_cache_entry_put(cache, entry); > + return; > + } > + } > + hlist_bl_unlock(head); > +} > +EXPORT_SYMBOL(mb2_cache_entry_delete_block); > + > +/* mb2_cache_entry_touch - cache entry got used > + * @cache - cache the entry belongs to > + * @entry - entry that got used > + * > + * Move entry in lru list to reflect the fact that it was used. > + */ > +void mb2_cache_entry_touch(struct mb2_cache *cache, > + struct mb2_cache_entry *entry) > +{ > + spin_lock(&cache->c_lru_list_lock); > + if (!list_empty(&entry->e_lru_list)) > + list_move_tail(&cache->c_lru_list, &entry->e_lru_list); > + spin_unlock(&cache->c_lru_list_lock); > +} > +EXPORT_SYMBOL(mb2_cache_entry_touch); > + > +static unsigned long mb2_cache_count(struct shrinker *shrink, > + struct shrink_control *sc) > +{ > + struct mb2_cache *cache = container_of(shrink, struct mb2_cache, > + c_shrink); > + > + return cache->c_entry_count; > +} > + > +/* Shrink number of entries in cache */ > +static unsigned long mb2_cache_scan(struct shrinker *shrink, > + struct shrink_control *sc) > +{ > + int nr_to_scan = sc->nr_to_scan; > + struct mb2_cache *cache = container_of(shrink, struct mb2_cache, > + c_shrink); > + struct mb2_cache_entry *entry; > + struct hlist_bl_head *head; > + unsigned int shrunk = 0; > + > + spin_lock(&cache->c_lru_list_lock); > + while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) { > + entry = list_first_entry(&cache->c_lru_list, > + struct mb2_cache_entry, e_lru_list); > + list_del_init(&entry->e_lru_list); > + cache->c_entry_count--; > + /* > + * We keep LRU list reference so that entry doesn't go away > + * from under us. > + */ > + spin_unlock(&cache->c_lru_list_lock); > + head = entry->e_hash_list_head; > + hlist_bl_lock(head); Instead of taking and dropping c_lru_list_lock in the loop, could we get away with a simple-to-implement hlist_bl_trylock() and cond_resched_lock()? > + if (!hlist_bl_unhashed(&entry->e_hash_list)) { > + hlist_bl_del_init(&entry->e_hash_list); > + atomic_dec(&entry->e_refcnt); > + } > + hlist_bl_unlock(head); > + if (mb2_cache_entry_put(cache, entry)) > + shrunk++; > + cond_resched(); > + spin_lock(&cache->c_lru_list_lock); > + } > + spin_unlock(&cache->c_lru_list_lock); > + > + return shrunk; > +} > + > +/* > + * mb2_cache_create - create cache > + * @bucket_bits: log2 of the hash table size > + * > + * Create cache for keys with 2^bucket_bits hash entries. > + */ > +struct mb2_cache *mb2_cache_create(int bucket_bits) > +{ > + struct mb2_cache *cache; > + int bucket_count = 1 << bucket_bits; > + int i; > + > + if (!try_module_get(THIS_MODULE)) > + return NULL; > + > + cache = kzalloc(sizeof(struct mb2_cache), GFP_KERNEL); > + if (!cache) > + goto err_out; > + cache->c_bucket_bits = bucket_bits; > + INIT_LIST_HEAD(&cache->c_lru_list); > + spin_lock_init(&cache->c_lru_list_lock); > + cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head), > + GFP_KERNEL); > + if (!cache->c_hash) { > + kfree(cache); > + goto err_out; > + } > + for (i = 0; i < bucket_count; i++) > + INIT_HLIST_BL_HEAD(&cache->c_hash[i]); > + > + cache->c_shrink.count_objects = mb2_cache_count; > + cache->c_shrink.scan_objects = mb2_cache_scan; > + cache->c_shrink.seeks = DEFAULT_SEEKS; > + register_shrinker(&cache->c_shrink); > + > + return cache; > + > +err_out: > + module_put(THIS_MODULE); > + return NULL; > +} > +EXPORT_SYMBOL(mb2_cache_create); > + > +/* > + * mb2_cache_destroy - destroy cache > + * @cache: the cache to destroy > + * > + * Free all entries in cache and cache itself. Caller must make sure nobody > + * (except shrinker) can reach @cache when calling this. > + */ > +void mb2_cache_destroy(struct mb2_cache *cache) > +{ > + struct mb2_cache_entry *entry, *next; > + > + unregister_shrinker(&cache->c_shrink); > + > + /* > + * We don't bother with any locking. Cache must not be used at this > + * point. > + */ > + list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) { > + if (!hlist_bl_unhashed(&entry->e_hash_list)) { > + hlist_bl_del_init(&entry->e_hash_list); > + atomic_dec(&entry->e_refcnt); > + } else > + WARN_ON(1); > + list_del(&entry->e_lru_list); > + WARN_ON(atomic_read(&entry->e_refcnt) != 1); > + mb2_cache_entry_put(cache, entry); > + } > + kfree(cache->c_hash); > + kfree(cache); > + module_put(THIS_MODULE); > +} > +EXPORT_SYMBOL(mb2_cache_destroy); > + > +static int __init mb2cache_init(void) > +{ > + mb2_entry_cache = kmem_cache_create("mbcache", > + sizeof(struct mb2_cache_entry), 0, > + SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); > + BUG_ON(!mb2_entry_cache); > + return 0; > +} > + > +static void __exit mb2cache_exit(void) > +{ > + kmem_cache_destroy(mb2_entry_cache); > +} > + > +module_init(mb2cache_init) > +module_exit(mb2cache_exit) > + > +MODULE_AUTHOR("Jan Kara "); > +MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); > +MODULE_LICENSE("GPL"); > diff --git a/include/linux/mbcache2.h b/include/linux/mbcache2.h > new file mode 100644 > index 000000000000..2a58c51c3a0a > --- /dev/null > +++ b/include/linux/mbcache2.h > @@ -0,0 +1,54 @@ > +#ifndef _LINUX_MB2CACHE_H > +#define _LINUX_MB2CACHE_H > + > +#include > +#include > +#include > +#include > +#include > + > +struct mb2_cache; > + > +struct mb2_cache_entry { > + /* LRU list - protected by cache->c_lru_list_lock */ > + struct list_head e_lru_list; > + /* Hash table list - protected by bitlock in e_hash_list_head */ > + struct hlist_bl_node e_hash_list; > + atomic_t e_refcnt; > + /* Key in hash - stable during lifetime of the entry */ > + unsigned int e_key; > + /* Block number of hashed block - stable during lifetime of the entry */ > + sector_t e_block; > + /* Head of hash list (for list bit lock) - stable */ > + struct hlist_bl_head *e_hash_list_head; > +}; > + > +struct mb2_cache *mb2_cache_create(int bucket_bits); > +void mb2_cache_destroy(struct mb2_cache *cache); > + > +struct mb2_cache_entry *mb2_cache_entry_create(struct mb2_cache *cache, > + gfp_t mask, > + unsigned int key, > + sector_t block); > +void mb2_cache_entry_delete(struct mb2_cache *cache, > + struct mb2_cache_entry *entry); > +void __mb2_cache_entry_free(struct mb2_cache_entry *entry); > +static inline int mb2_cache_entry_put(struct mb2_cache *cache, > + struct mb2_cache_entry *entry) > +{ > + if (!atomic_dec_and_test(&entry->e_refcnt)) > + return 0; > + __mb2_cache_entry_free(entry); > + return 1; > +} > + > +void mb2_cache_entry_delete_block(struct mb2_cache *cache, unsigned int key, > + sector_t block); > +struct mb2_cache_entry *mb2_cache_entry_find_first(struct mb2_cache *cache, > + unsigned int key); > +struct mb2_cache_entry *mb2_cache_entry_find_next(struct mb2_cache *cache, > + struct mb2_cache_entry *entry); > +void mb2_cache_entry_touch(struct mb2_cache *cache, > + struct mb2_cache_entry *entry); > + > +#endif /* _LINUX_MB2CACHE_H */ > -- > 2.1.4 Thanks, Andreas