Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754645AbbLaLmd (ORCPT ); Thu, 31 Dec 2015 06:42:33 -0500 Received: from mo4-p00-ob.smtp.rzone.de ([81.169.146.221]:54184 "EHLO mo4-p00-ob.smtp.rzone.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751558AbbLaLhW (ORCPT ); Thu, 31 Dec 2015 06:37:22 -0500 X-RZG-AUTH: :OH8QVVOrc/CP6za/qRmbF3BWedPGA1vjs2ejZCzW8NRdwTYefHi0LhjeQF0sTFwGWOFPJQ== X-RZG-CLASS-ID: mo00 From: Thomas Schoebel-Theuer To: linux-kernel@vger.kernel.org, tst@schoebel-theuer.de Subject: [RFC 15/31] mars: add new module lib_mapfree Date: Thu, 31 Dec 2015 12:36:10 +0100 Message-Id: <5eabb0bead3e0b59f973f04915d486c96e0cf931.1451558672.git.tst@schoebel-theuer.de> X-Mailer: git-send-email 2.6.4 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13031 Lines: 492 Signed-off-by: Thomas Schoebel-Theuer --- drivers/staging/mars/xio_bricks/lib_mapfree.c | 380 ++++++++++++++++++++++++++ include/linux/xio/lib_mapfree.h | 84 ++++++ 2 files changed, 464 insertions(+) create mode 100644 drivers/staging/mars/xio_bricks/lib_mapfree.c create mode 100644 include/linux/xio/lib_mapfree.h diff --git a/drivers/staging/mars/xio_bricks/lib_mapfree.c b/drivers/staging/mars/xio_bricks/lib_mapfree.c new file mode 100644 index 0000000..6b464d7 --- /dev/null +++ b/drivers/staging/mars/xio_bricks/lib_mapfree.c @@ -0,0 +1,380 @@ +/* + * MARS Long Distance Replication Software + * + * Copyright (C) 2010-2014 Thomas Schoebel-Theuer + * Copyright (C) 2011-2014 1&1 Internet AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* time to wait between background mapfree operations */ +int mapfree_period_sec = 10; + +/* some grace space where no regular cleanup should occur */ +int mapfree_grace_keep_mb = 16; + +static +DECLARE_RWSEM(mapfree_mutex); + +static +LIST_HEAD(mapfree_list); + +void mapfree_pages(struct mapfree_info *mf, int grace_keep) +{ + struct address_space *mapping; + pgoff_t start; + pgoff_t end; + + if (unlikely(!mf)) + goto done; + if (unlikely(!mf->mf_filp)) + goto done; + + mapping = mf->mf_filp->f_mapping; + if (unlikely(!mapping)) + goto done; + + if (grace_keep < 0) { /* force full flush */ + start = 0; + end = -1; + } else { + unsigned long flags; + loff_t tmp; + loff_t min; + + spin_lock_irqsave(&mf->mf_lock, flags); + + min = tmp = mf->mf_min[0]; + if (likely(mf->mf_min[1] < min)) + min = mf->mf_min[1]; + if (tmp) { + mf->mf_min[1] = tmp; + mf->mf_min[0] = 0; + } + + spin_unlock_irqrestore(&mf->mf_lock, flags); + + min -= (loff_t)grace_keep * (1024 * 1024); /* megabytes */ + end = 0; + + if (min > 0 || mf->mf_last) { + start = mf->mf_last / PAGE_SIZE; + /* add some grace overlapping */ + if (likely(start > 0)) + start--; + mf->mf_last = min; + end = min / PAGE_SIZE; + } else { /* there was no progress for at least 2 rounds */ + start = 0; + if (!grace_keep) /* also flush thoroughly */ + end = -1; + } + + XIO_DBG("file = '%s' start = %lu end = %lu\n", mf->mf_name, start, end); + } + + if (end > start || end == -1) + invalidate_mapping_pages(mapping, start, end); + +done:; +} + +static +void _mapfree_put(struct mapfree_info *mf) +{ + if (atomic_dec_and_test(&mf->mf_count)) { + XIO_DBG("closing file '%s' filp = %p\n", mf->mf_name, mf->mf_filp); + list_del_init(&mf->mf_head); + CHECK_HEAD_EMPTY(&mf->mf_dirty_anchor); + if (likely(mf->mf_filp)) { + mapfree_pages(mf, -1); + filp_close(mf->mf_filp, NULL); + } + brick_string_free(mf->mf_name); + brick_mem_free(mf); + } +} + +void mapfree_put(struct mapfree_info *mf) +{ + if (likely(mf)) { + down_write(&mapfree_mutex); + _mapfree_put(mf); + up_write(&mapfree_mutex); + } +} + +struct mapfree_info *mapfree_get(const char *name, int flags) +{ + struct mapfree_info *mf = NULL; + struct list_head *tmp; + + if (!(flags & O_DIRECT)) { + down_read(&mapfree_mutex); + for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) { + struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head); + + if (_mf->mf_flags == flags && !strcmp(_mf->mf_name, name)) { + mf = _mf; + atomic_inc(&mf->mf_count); + break; + } + } + up_read(&mapfree_mutex); + + if (mf) + goto done; + } + + for (;;) { + struct address_space *mapping; + struct inode *inode = NULL; + int ra = 1; + int prot = 0600; + + mm_segment_t oldfs; + + mf = brick_zmem_alloc(sizeof(struct mapfree_info)); + + mf->mf_name = brick_strdup(name); + + mf->mf_flags = flags; + INIT_LIST_HEAD(&mf->mf_head); + INIT_LIST_HEAD(&mf->mf_dirty_anchor); + atomic_set(&mf->mf_count, 1); + spin_lock_init(&mf->mf_lock); + mf->mf_max = -1; + + oldfs = get_fs(); + set_fs(get_ds()); + mf->mf_filp = filp_open(name, flags, prot); + set_fs(oldfs); + + XIO_DBG("file '%s' flags = %d prot = %d filp = %p\n", name, flags, prot, mf->mf_filp); + + if (unlikely(!mf->mf_filp || IS_ERR(mf->mf_filp))) { + int err = PTR_ERR(mf->mf_filp); + + XIO_ERR("can't open file '%s' status=%d\n", name, err); + mf->mf_filp = NULL; + _mapfree_put(mf); + mf = NULL; + break; + } + + mapping = mf->mf_filp->f_mapping; + if (likely(mapping)) + inode = mapping->host; + if (unlikely(!mapping || !inode)) { + XIO_ERR("file '%s' has no mapping\n", name); + mf->mf_filp = NULL; + _mapfree_put(mf); + mf = NULL; + break; + } + + mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & ~(__GFP_IO | __GFP_FS)); + + mf->mf_max = i_size_read(inode); + + if (S_ISBLK(inode->i_mode)) { + XIO_INF("changing blkdev readahead from %lu to %d\n", + inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages, + ra); + inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages = ra; + } + + if (flags & O_DIRECT) { /* never share them */ + break; + } + + /* maintain global list of all open files */ + down_write(&mapfree_mutex); + for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) { + struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head); + + if (unlikely(_mf->mf_flags == flags && !strcmp(_mf->mf_name, name))) { + XIO_WRN("race on creation of '%s' detected\n", name); + _mapfree_put(mf); + mf = _mf; + atomic_inc(&mf->mf_count); + goto leave; + } + } + list_add_tail(&mf->mf_head, &mapfree_list); +leave: + up_write(&mapfree_mutex); + break; + } +done: + return mf; +} + +void mapfree_set(struct mapfree_info *mf, loff_t min, loff_t max) +{ + unsigned long flags; + + if (likely(mf)) { + spin_lock_irqsave(&mf->mf_lock, flags); + if (!mf->mf_min[0] || mf->mf_min[0] > min) + mf->mf_min[0] = min; + if (max >= 0 && mf->mf_max < max) + mf->mf_max = max; + spin_unlock_irqrestore(&mf->mf_lock, flags); + } +} + +static +int mapfree_thread(void *data) +{ + while (!brick_thread_should_stop()) { + struct mapfree_info *mf = NULL; + struct list_head *tmp; + long long eldest = 0; + + brick_msleep(500); + + if (mapfree_period_sec <= 0) + continue; + + down_read(&mapfree_mutex); + + for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) { + struct mapfree_info *_mf = container_of(tmp, struct mapfree_info, mf_head); + + if (unlikely(!_mf->mf_jiffies)) { + _mf->mf_jiffies = jiffies; + continue; + } + if ((long long)jiffies - _mf->mf_jiffies > mapfree_period_sec * HZ && + (!mf || _mf->mf_jiffies < eldest)) { + mf = _mf; + eldest = _mf->mf_jiffies; + } + } + if (mf) + atomic_inc(&mf->mf_count); + + up_read(&mapfree_mutex); + + if (!mf) + continue; + + mapfree_pages(mf, mapfree_grace_keep_mb); + + mf->mf_jiffies = jiffies; + mapfree_put(mf); + } + return 0; +} + +/***************** dirty IOs on the fly *****************/ + +void mf_insert_dirty(struct mapfree_info *mf, struct dirty_info *di) +{ + unsigned long flags; + + if (likely(di->dirty_aio && mf)) { + spin_lock_irqsave(&mf->mf_lock, flags); + list_del(&di->dirty_head); + list_add(&di->dirty_head, &mf->mf_dirty_anchor); + spin_unlock_irqrestore(&mf->mf_lock, flags); + } +} + +void mf_remove_dirty(struct mapfree_info *mf, struct dirty_info *di) +{ + unsigned long flags; + + if (!list_empty(&di->dirty_head) && mf) { + spin_lock_irqsave(&mf->mf_lock, flags); + list_del_init(&di->dirty_head); + spin_unlock_irqrestore(&mf->mf_lock, flags); + } +} + +void mf_get_dirty(struct mapfree_info *mf, loff_t *min, loff_t *max, int min_stage, int max_stage) +{ + unsigned long flags; + + struct list_head *tmp; + + if (unlikely(!mf)) + goto done; + + spin_lock_irqsave(&mf->mf_lock, flags); + for (tmp = mf->mf_dirty_anchor.next; tmp != &mf->mf_dirty_anchor; tmp = tmp->next) { + struct dirty_info *di = container_of(tmp, struct dirty_info, dirty_head); + struct aio_object *aio = di->dirty_aio; + + if (unlikely(!aio)) + continue; + if (di->dirty_stage < min_stage || di->dirty_stage > max_stage) + continue; + if (aio->io_pos < *min) + *min = aio->io_pos; + if (aio->io_pos + aio->io_len > *max) + *max = aio->io_pos + aio->io_len; + } + spin_unlock_irqrestore(&mf->mf_lock, flags); +done:; +} + +void mf_get_any_dirty(const char *filename, loff_t *min, loff_t *max, int min_stage, int max_stage) +{ + struct list_head *tmp; + + down_read(&mapfree_mutex); + for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) { + struct mapfree_info *mf = container_of(tmp, struct mapfree_info, mf_head); + + if (!strcmp(mf->mf_name, filename)) + mf_get_dirty(mf, min, max, min_stage, max_stage); + } + up_read(&mapfree_mutex); +} + +/***************** module init stuff ************************/ + +static +struct task_struct *mf_thread; + +int __init init_xio_mapfree(void) +{ + XIO_DBG("init_mapfree()\n"); + mf_thread = brick_thread_create(mapfree_thread, NULL, "xio_mapfree"); + if (unlikely(!mf_thread)) { + XIO_ERR("could not create mapfree thread\n"); + return -ENOMEM; + } + return 0; +} + +void exit_xio_mapfree(void) +{ + XIO_DBG("exit_mapfree()\n"); + if (likely(mf_thread)) { + brick_thread_stop(mf_thread); + mf_thread = NULL; + } +} diff --git a/include/linux/xio/lib_mapfree.h b/include/linux/xio/lib_mapfree.h new file mode 100644 index 0000000..e7594e12 --- /dev/null +++ b/include/linux/xio/lib_mapfree.h @@ -0,0 +1,84 @@ +/* + * MARS Long Distance Replication Software + * + * Copyright (C) 2010-2014 Thomas Schoebel-Theuer + * Copyright (C) 2011-2014 1&1 Internet AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef XIO_LIB_MAPFREE_H +#define XIO_LIB_MAPFREE_H + +/* Mapfree infrastructure. + * + * Purposes: + * + * 1) Open files only once when possible, do ref-counting on struct mapfree_info + * + * 2) Automatically call invalidate_mapping_pages() in the background on + * "unused" areas to free resources. + * Used areas can be indicated by calling mapfree_set() frequently. + * Usage model: tailored to sequential logfiles. + * + * 3) Do it all in a completely decoupled manner, in order to prevent resource deadlocks. + * + * 4) Also to prevent deadlocks: always set mapping_set_gfp_mask() accordingly. + */ + +#include + +extern int mapfree_period_sec; +extern int mapfree_grace_keep_mb; + +struct mapfree_info { + struct list_head mf_head; + struct list_head mf_dirty_anchor; + char *mf_name; + struct file *mf_filp; + int mf_flags; + int mf_mode; + atomic_t mf_count; + spinlock_t mf_lock; + loff_t mf_min[2]; + loff_t mf_last; + loff_t mf_max; + long long mf_jiffies; +}; + +struct dirty_info { + struct list_head dirty_head; + struct aio_object *dirty_aio; + int dirty_stage; +}; + +struct mapfree_info *mapfree_get(const char *filename, int flags); + +void mapfree_put(struct mapfree_info *mf); + +void mapfree_set(struct mapfree_info *mf, loff_t min, loff_t max); + +void mapfree_pages(struct mapfree_info *mf, int grace_keep); + +/***************** dirty IOs on the fly *****************/ + +void mf_insert_dirty(struct mapfree_info *mf, struct dirty_info *di); +void mf_remove_dirty(struct mapfree_info *mf, struct dirty_info *di); +void mf_get_dirty(struct mapfree_info *mf, loff_t *min, loff_t *max, int min_stage, int max_stage); +void mf_get_any_dirty(const char *filename, loff_t *min, loff_t *max, int min_stage, int max_stage); + +/***************** module init stuff ************************/ + +int __init init_xio_mapfree(void); + +void exit_xio_mapfree(void); + +#endif -- 2.6.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/