Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754231AbbLaLl6 (ORCPT ); Thu, 31 Dec 2015 06:41:58 -0500 Received: from mo4-p00-ob.smtp.rzone.de ([81.169.146.220]:62060 "EHLO mo4-p00-ob.smtp.rzone.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751829AbbLaLhY (ORCPT ); Thu, 31 Dec 2015 06:37:24 -0500 X-RZG-AUTH: :OH8QVVOrc/CP6za/qRmbF3BWedPGA1vjs2ejZCzW8NRdwTYefHi0LhjeQF0sTFwGWOFPJQ== X-RZG-CLASS-ID: mo00 From: Thomas Schoebel-Theuer To: linux-kernel@vger.kernel.org, tst@schoebel-theuer.de Subject: [RFC 17/31] mars: add new module xio_bio Date: Thu, 31 Dec 2015 12:36:12 +0100 Message-Id: <1ad9cee560223010a88061c10e8d6d05aa6acb7f.1451558672.git.tst@schoebel-theuer.de> X-Mailer: git-send-email 2.6.4 In-Reply-To: References: In-Reply-To: References: Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 24827 Lines: 958 Signed-off-by: Thomas Schoebel-Theuer --- drivers/staging/mars/xio_bricks/xio_bio.c | 845 ++++++++++++++++++++++++++++++ include/linux/xio/xio_bio.h | 85 +++ 2 files changed, 930 insertions(+) create mode 100644 drivers/staging/mars/xio_bricks/xio_bio.c create mode 100644 include/linux/xio/xio_bio.h diff --git a/drivers/staging/mars/xio_bricks/xio_bio.c b/drivers/staging/mars/xio_bricks/xio_bio.c new file mode 100644 index 0000000..ef18325 --- /dev/null +++ b/drivers/staging/mars/xio_bricks/xio_bio.c @@ -0,0 +1,845 @@ +/* + * MARS Long Distance Replication Software + * + * Copyright (C) 2010-2014 Thomas Schoebel-Theuer + * Copyright (C) 2011-2014 1&1 Internet AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Bio brick (interface to blkdev IO via kernel bios) */ + +#include +#include +#include +#include + +#include +#include +#include + +#include +static struct timing_stats timings[2]; + +struct threshold bio_submit_threshold = { + .thr_ban = &xio_global_ban, + .thr_parent = &global_io_threshold, + .thr_limit = BIO_SUBMIT_MAX_LATENCY, + .thr_factor = 100, + .thr_plus = 0, +}; + +struct threshold bio_io_threshold[2] = { + [0] = { + .thr_ban = &xio_global_ban, + .thr_parent = &global_io_threshold, + .thr_limit = BIO_IO_R_MAX_LATENCY, + .thr_factor = 10, + .thr_plus = 10000, + }, + [1] = { + .thr_ban = &xio_global_ban, + .thr_parent = &global_io_threshold, + .thr_limit = BIO_IO_W_MAX_LATENCY, + .thr_factor = 10, + .thr_plus = 10000, + }, +}; + +/************************ own type definitions ***********************/ + +/************************ own helper functions ***********************/ + +/* This is called from the kernel bio layer. + */ +static +void bio_callback(struct bio *bio) +{ + struct bio_aio_aspect *aio_a = bio->bi_private; + struct bio_brick *brick; + unsigned long flags; + + CHECK_PTR(aio_a, err); + CHECK_PTR(aio_a->output, err); + brick = aio_a->output->brick; + CHECK_PTR(brick, err); + + aio_a->status_code = bio->bi_error; + + spin_lock_irqsave(&brick->lock, flags); + list_del(&aio_a->io_head); + list_add_tail(&aio_a->io_head, &brick->completed_list); + atomic_inc(&brick->completed_count); + spin_unlock_irqrestore(&brick->lock, flags); + + wake_up_interruptible(&brick->response_event); + goto out_return; +err: + XIO_FAT("cannot handle bio callback\n"); +out_return:; +} + +/* Map from kernel address/length to struct page (if not already known), + * check alignment constraints, create bio from it. + * Return the length (may be smaller than requested). + */ +static +int make_bio(struct bio_brick *brick, + void *data, + int len, + loff_t pos, + struct bio_aio_aspect *private, + struct bio **_bio) +{ + unsigned long long sector; + int sector_offset; + int data_offset; + int page_offset; + int page_len; + int bvec_count; + int rest_len = len; + int result_len = 0; + int status; + int i; + struct bio *bio = NULL; + struct block_device *bdev; + + status = -EINVAL; + CHECK_PTR(brick, out); + bdev = brick->bdev; + CHECK_PTR(bdev, out); + + if (unlikely(rest_len <= 0)) { + XIO_ERR("bad bio len %d\n", rest_len); + goto out; + } + + sector = pos >> 9; /* TODO: make dynamic */ + sector_offset = pos & ((1 << 9) - 1); /* TODO: make dynamic */ + data_offset = ((unsigned long)data) & ((1 << 9) - 1); /* TODO: make dynamic */ + + if (unlikely(sector_offset > 0)) { + XIO_ERR("odd sector offset %d\n", sector_offset); + goto out; + } + if (unlikely(sector_offset != data_offset)) { + XIO_ERR("bad alignment: sector_offset %d != data_offset %d\n", sector_offset, data_offset); + goto out; + } + if (unlikely(rest_len & ((1 << 9) - 1))) { + XIO_ERR("odd length %d\n", rest_len); + goto out; + } + + page_offset = ((unsigned long)data) & (PAGE_SIZE-1); + page_len = rest_len + page_offset; + bvec_count = (page_len - 1) / PAGE_SIZE + 1; + if (bvec_count > brick->bvec_max) { + bvec_count = brick->bvec_max; + } else if (unlikely(bvec_count <= 0)) { + XIO_WRN("bvec_count=%d\n", bvec_count); + bvec_count = 1; + } + + bio = bio_alloc(GFP_BRICK, bvec_count); + status = -ENOMEM; + + for (i = 0; i < bvec_count && rest_len > 0; i++) { + struct page *page; + int this_rest = PAGE_SIZE - page_offset; + int this_len = rest_len; + + if (this_len > this_rest) + this_len = this_rest; + + page = brick_iomap(data, &page_offset, &this_len); + if (unlikely(!page)) { + XIO_ERR("cannot iomap() kernel address %p\n", data); + status = -EINVAL; + goto out; + } + + bio->bi_io_vec[i].bv_page = page; + bio->bi_io_vec[i].bv_len = this_len; + bio->bi_io_vec[i].bv_offset = page_offset; + + data += this_len; + rest_len -= this_len; + result_len += this_len; + page_offset = 0; + } + + if (unlikely(rest_len != 0)) { + XIO_ERR("computation of bvec_count %d was wrong, diff=%d\n", bvec_count, rest_len); + status = -EINVAL; + goto out; + } + + bio->bi_vcnt = i; + bio->bi_iter.bi_idx = 0; + bio->bi_iter.bi_size = result_len; + bio->bi_iter.bi_sector = sector; + bio->bi_bdev = bdev; + bio->bi_private = private; + bio->bi_end_io = bio_callback; + bio->bi_rw = 0; /* must be filled in later */ + status = result_len; + +out: + if (unlikely(status < 0)) { + XIO_ERR("error %d\n", status); + if (bio) { + bio_put(bio); + bio = NULL; + } + } + *_bio = bio; + return status; +} + +/***************** own brick * input * output operations *****************/ + +#define PRIO_INDEX(aio) ((aio)->io_prio + 1) + +static int bio_get_info(struct bio_output *output, struct xio_info *info) +{ + struct bio_brick *brick = output->brick; + struct inode *inode; + int status = -ENOENT; + + if (unlikely(!brick->mf || + !brick->mf->mf_filp || + !brick->mf->mf_filp->f_mapping)) { + goto done; + } + inode = brick->mf->mf_filp->f_mapping->host; + if (unlikely(!inode)) + goto done; + + info->tf_align = 512; + info->tf_min_size = 512; + brick->total_size = i_size_read(inode); + info->current_size = brick->total_size; + XIO_DBG("determined device size = %lld\n", info->current_size); + status = 0; + +done: + return status; +} + +static int bio_io_get(struct bio_output *output, struct aio_object *aio) +{ + struct bio_aio_aspect *aio_a; + int status = -EINVAL; + + CHECK_PTR(output, done); + CHECK_PTR(output->brick, done); + + if (aio->obj_initialized) { + obj_get(aio); + return aio->io_len; + } + + aio_a = bio_aio_get_aspect(output->brick, aio); + CHECK_PTR(aio_a, done); + aio_a->output = output; + aio_a->bio = NULL; + + if (!aio->io_data) { /* buffered IO. */ + if (unlikely(aio->io_len <= 0)) + goto done; + status = -ENOMEM; + aio->io_data = brick_block_alloc(aio->io_pos, (aio_a->alloc_len = aio->io_len)); + aio_a->do_dealloc = true; + } + + status = make_bio(output->brick, aio->io_data, aio->io_len, aio->io_pos, aio_a, &aio_a->bio); + if (unlikely(status < 0 || !aio_a->bio)) { + XIO_ERR("could not create bio, status = %d\n", status); + goto done; + } + + if (unlikely(aio->io_prio < XIO_PRIO_HIGH)) + aio->io_prio = XIO_PRIO_HIGH; + else if (unlikely(aio->io_prio > XIO_PRIO_LOW)) + aio->io_prio = XIO_PRIO_LOW; + + aio->io_len = status; + obj_get_first(aio); + status = 0; + +done: + return status; +} + +static +void _bio_io_put(struct bio_output *output, struct aio_object *aio) +{ + struct bio_aio_aspect *aio_a; + + aio->io_total_size = output->brick->total_size; + + aio_a = bio_aio_get_aspect(output->brick, aio); + CHECK_PTR(aio_a, err); + + if (likely(aio_a->bio)) { + bio_put(aio_a->bio); + aio_a->bio = NULL; + } + if (aio_a->do_dealloc) { + brick_block_free(aio->io_data, aio_a->alloc_len); + aio->io_data = NULL; + } + obj_free(aio); + + goto out_return; +err: + XIO_FAT("cannot work\n"); +out_return:; +} + +#define BIO_AIO_PUT(output, aio) \ + ({ \ + if (obj_put(aio)) { \ + _bio_io_put(output, aio); \ + } \ + }) + +static +void bio_io_put(struct bio_output *output, struct aio_object *aio) +{ + BIO_AIO_PUT(output, aio); +} + +static +void _bio_io_io(struct bio_output *output, struct aio_object *aio, bool cork) +{ + struct bio_brick *brick = output->brick; + struct bio_aio_aspect *aio_a = bio_aio_get_aspect(output->brick, aio); + struct bio *bio; + unsigned long long latency; + unsigned long flags; + int rw; + int status = -EINVAL; + + CHECK_PTR(aio_a, err); + bio = aio_a->bio; + CHECK_PTR(bio, err); + + obj_get(aio); + atomic_inc(&brick->fly_count[PRIO_INDEX(aio)]); + + bio_get(bio); + + rw = aio->io_rw & 1; + if (brick->do_noidle && !cork) + rw |= REQ_NOIDLE; + if (!aio->io_skip_sync) { + if (brick->do_sync) + rw |= REQ_SYNC; + } + + aio_a->start_stamp = cpu_clock(raw_smp_processor_id()); + spin_lock_irqsave(&brick->lock, flags); + list_add_tail(&aio_a->io_head, &brick->submitted_list[rw & 1]); + spin_unlock_irqrestore(&brick->lock, flags); + + bio->bi_rw = rw; + latency = TIME_STATS( + &timings[rw & 1], + submit_bio(rw, bio) + ); + + threshold_check(&bio_submit_threshold, latency); + + status = 0; +#ifdef BIO_EOPNOTSUPP /* missing since b25de9d6da49b1a8760a89672283128aa8c78345 */ + if (unlikely(bio_flagged(bio, BIO_EOPNOTSUPP))) + status = -EOPNOTSUPP; +#endif + + if (likely(status >= 0)) + goto done; + + bio_put(bio); + atomic_dec(&brick->fly_count[PRIO_INDEX(aio)]); + +err: + XIO_ERR("IO error %d\n", status); + CHECKED_CALLBACK(aio, status, done); + atomic_dec(&xio_global_io_flying); + +done:; +} + +static +void bio_io_io(struct bio_output *output, struct aio_object *aio) +{ + CHECK_PTR(aio, fatal); + + obj_get(aio); + atomic_inc(&xio_global_io_flying); + + if (aio->io_prio == XIO_PRIO_LOW || + (aio->io_prio == XIO_PRIO_NORMAL && aio->io_rw)) { + struct bio_aio_aspect *aio_a = bio_aio_get_aspect(output->brick, aio); + struct bio_brick *brick = output->brick; + unsigned long flags; + + spin_lock_irqsave(&brick->lock, flags); + list_add_tail(&aio_a->io_head, &brick->queue_list[PRIO_INDEX(aio)]); + atomic_inc(&brick->queue_count[PRIO_INDEX(aio)]); + spin_unlock_irqrestore(&brick->lock, flags); + brick->submitted = true; + + wake_up_interruptible(&brick->submit_event); + goto out_return; + } + + /* realtime IO: start immediately */ + _bio_io_io(output, aio, false); + BIO_AIO_PUT(output, aio); + goto out_return; +fatal: + XIO_FAT("cannot handle aio %p on output %p\n", aio, output); +out_return:; +} + +static +int bio_response_thread(void *data) +{ + struct bio_brick *brick = data; + + XIO_INF("bio response thread has started on '%s'.\n", brick->brick_path); + + for (;;) { + LIST_HEAD(tmp_list); + unsigned long flags; + int thr_limit; + int sleeptime; + int count; + int i; + + thr_limit = bio_io_threshold[0].thr_limit; + if (bio_io_threshold[1].thr_limit < thr_limit) + thr_limit = bio_io_threshold[1].thr_limit; + + sleeptime = HZ / 10; + if (thr_limit > 0) { + sleeptime = thr_limit / (1000000 * 2 / HZ); + if (unlikely(sleeptime < 2)) + sleeptime = 2; + } + + wait_event_interruptible_timeout( + brick->response_event, + atomic_read(&brick->completed_count) > 0, + sleeptime); + + spin_lock_irqsave(&brick->lock, flags); + list_replace_init(&brick->completed_list, &tmp_list); + spin_unlock_irqrestore(&brick->lock, flags); + + count = 0; + for (;;) { + struct list_head *tmp; + struct bio_aio_aspect *aio_a; + struct aio_object *aio; + unsigned long long latency; + int code; + + if (list_empty(&tmp_list)) { + if (brick_thread_should_stop() && + atomic_read(&brick->fly_count[0]) + + atomic_read(&brick->fly_count[1]) + + atomic_read(&brick->fly_count[2]) <= 0) + goto done; + break; + } + + tmp = tmp_list.next; + list_del_init(tmp); + atomic_dec(&brick->completed_count); + + aio_a = container_of(tmp, struct bio_aio_aspect, io_head); + aio = aio_a->object; + + latency = cpu_clock(raw_smp_processor_id()) - aio_a->start_stamp; + threshold_check(&bio_io_threshold[aio->io_rw & 1], latency); + + code = aio_a->status_code; + + if (code < 0) { + XIO_ERR("IO error %d\n", code); + } else { + aio_checksum(aio); + aio->io_flags |= AIO_UPTODATE; + } + + SIMPLE_CALLBACK(aio, code); + + atomic_dec(&brick->fly_count[PRIO_INDEX(aio)]); + atomic_inc(&brick->total_completed_count[PRIO_INDEX(aio)]); + count++; + + if (likely(aio_a->bio)) + bio_put(aio_a->bio); + BIO_AIO_PUT(aio_a->output, aio); + + atomic_dec(&xio_global_io_flying); + } + + /* Try to detect slow requests as early as possible, + * even before they have completed. + */ + for (i = 0; i < 2; i++) { + unsigned long long eldest = 0; + + spin_lock_irqsave(&brick->lock, flags); + if (!list_empty(&brick->submitted_list[i])) { + struct bio_aio_aspect *aio_a; + + aio_a = container_of(brick->submitted_list[i].next, struct bio_aio_aspect, io_head); + eldest = aio_a->start_stamp; + } + spin_unlock_irqrestore(&brick->lock, flags); + + if (eldest) + threshold_check(&bio_io_threshold[i], cpu_clock(raw_smp_processor_id()) - eldest); + } + + if (count) { + brick->submitted = true; + wake_up_interruptible(&brick->submit_event); + } + } +done: + XIO_INF("bio response thread has stopped.\n"); + return 0; +} + +static +bool _bg_should_run(struct bio_brick *brick) +{ + return (atomic_read(&brick->queue_count[2]) > 0 && + atomic_read(&brick->fly_count[0]) + atomic_read(&brick->fly_count[1]) <= brick->bg_threshold && + (brick->bg_maxfly <= 0 || atomic_read(&brick->fly_count[2]) < brick->bg_maxfly)); +} + +static +int bio_submit_thread(void *data) +{ + struct bio_brick *brick = data; + + XIO_INF("bio submit thread has started on '%s'.\n", brick->brick_path); + + while (!brick_thread_should_stop()) { + int prio; + + wait_event_interruptible_timeout( + brick->submit_event, + brick->submitted, + HZ / 2); + + brick->submitted = false; + + for (prio = 0; prio < XIO_PRIO_NR; prio++) { + LIST_HEAD(tmp_list); + unsigned long flags; + + if (prio == XIO_PRIO_NR-1 && !_bg_should_run(brick)) + break; + + spin_lock_irqsave(&brick->lock, flags); + list_replace_init(&brick->queue_list[prio], &tmp_list); + spin_unlock_irqrestore(&brick->lock, flags); + + while (!list_empty(&tmp_list)) { + struct list_head *tmp = tmp_list.next; + struct bio_aio_aspect *aio_a; + struct aio_object *aio; + bool cork; + + list_del_init(tmp); + + aio_a = container_of(tmp, struct bio_aio_aspect, io_head); + aio = aio_a->object; + if (unlikely(!aio)) { + XIO_ERR("invalid aio\n"); + continue; + } + + atomic_dec(&brick->queue_count[PRIO_INDEX(aio)]); + cork = atomic_read(&brick->queue_count[PRIO_INDEX(aio)]) > 0; + + _bio_io_io(aio_a->output, aio, cork); + + BIO_AIO_PUT(aio_a->output, aio); + } + } + } + + XIO_INF("bio submit thread has stopped.\n"); + return 0; +} + +static int bio_switch(struct bio_brick *brick) +{ + int status = 0; + + if (brick->power.button) { + if (brick->power.on_led) + goto done; + + xio_set_power_off_led((void *)brick, false); + + if (!brick->bdev) { + static int index; + const char *path = brick->brick_path; + int flags = O_RDWR | O_EXCL | O_LARGEFILE; + struct address_space *mapping; + struct inode *inode = NULL; + struct request_queue *q; + + brick->mf = mapfree_get(path, flags); + if (unlikely(!brick->mf || !brick->mf->mf_filp)) { + status = -ENOENT; + XIO_ERR("cannot open file '%s'\n", path); + goto done; + } + mapfree_pages(brick->mf, -1); + mapping = brick->mf->mf_filp->f_mapping; + if (likely(mapping)) + inode = mapping->host; + if (unlikely(!mapping || !inode)) { + XIO_ERR("internal problem with '%s'\n", path); + status = -EINVAL; + goto done; + } + if (unlikely(!S_ISBLK(inode->i_mode) || !inode->i_bdev)) { + XIO_ERR("sorry, '%s' is not a block device\n", path); + status = -ENODEV; + goto done; + } + + mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & ~(__GFP_IO | __GFP_FS)); + + q = bdev_get_queue(inode->i_bdev); + if (unlikely(!q)) { + XIO_ERR("internal queue '%s' does not exist\n", path); + status = -EINVAL; + goto done; + } + + XIO_INF("'%s' ra_pages OLD=%lu NEW=%d\n", + path, + q->backing_dev_info.ra_pages, + brick->ra_pages); + q->backing_dev_info.ra_pages = brick->ra_pages; + + brick->bvec_max = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); + if (brick->bvec_max > BIO_MAX_PAGES) + brick->bvec_max = BIO_MAX_PAGES; + else if (brick->bvec_max <= 1) + brick->bvec_max = 1; + brick->total_size = i_size_read(inode); + XIO_INF("'%s' size=%lld bvec_max=%d\n", + path, brick->total_size, brick->bvec_max); + + brick->response_thread = brick_thread_create(bio_response_thread, + brick, + "xio_bio_r%d", + index); + brick->submit_thread = brick_thread_create(bio_submit_thread, brick, "xio_bio_s%d", index); + status = -ENOMEM; + if (likely(brick->submit_thread && brick->response_thread)) { + brick->bdev = inode->i_bdev; + brick->mode_ptr = &brick->mf->mf_mode; + index++; + status = 0; + } + } + } + + xio_set_power_on_led((void *)brick, brick->power.button && brick->bdev != NULL); + +done: + if (status < 0 || !brick->power.button) { + if (brick->submit_thread) { + brick_thread_stop(brick->submit_thread); + brick->submit_thread = NULL; + } + if (brick->response_thread) { + brick_thread_stop(brick->response_thread); + brick->response_thread = NULL; + } + if (brick->mf) { + mapfree_put(brick->mf); + brick->mf = NULL; + } + brick->mode_ptr = NULL; + brick->bdev = NULL; + if (!brick->power.button) { + xio_set_power_off_led((void *)brick, true); + brick->total_size = 0; + } + } + return status; +} + +/*************** informational * statistics **************/ + +static noinline +char *bio_statistics(struct bio_brick *brick, int verbose) +{ + char *res = brick_string_alloc(4096); + int pos = 0; + + pos += report_timing(&timings[0], res + pos, 4096 - pos); + pos += report_timing(&timings[1], res + pos, 4096 - pos); + + snprintf(res + pos, 4096 - pos, + "total completed[0] = %d completed[1] = %d completed[2] = %d | queued[0] = %d queued[1] = %d queued[2] = %d flying[0] = %d flying[1] = %d flying[2] = %d completing = %d\n", + atomic_read(&brick->total_completed_count[0]), + atomic_read(&brick->total_completed_count[1]), + atomic_read(&brick->total_completed_count[2]), + atomic_read(&brick->fly_count[0]), + atomic_read(&brick->queue_count[0]), + atomic_read(&brick->queue_count[1]), + atomic_read(&brick->queue_count[2]), + atomic_read(&brick->fly_count[1]), + atomic_read(&brick->fly_count[2]), + atomic_read(&brick->completed_count)); + + return res; +} + +static noinline +void bio_reset_statistics(struct bio_brick *brick) +{ + atomic_set(&brick->total_completed_count[0], 0); + atomic_set(&brick->total_completed_count[1], 0); + atomic_set(&brick->total_completed_count[2], 0); +} + +/*************** object * aspect constructors * destructors **************/ + +static int bio_aio_aspect_init_fn(struct generic_aspect *_ini) +{ + struct bio_aio_aspect *ini = (void *)_ini; + + INIT_LIST_HEAD(&ini->io_head); + return 0; +} + +static void bio_aio_aspect_exit_fn(struct generic_aspect *_ini) +{ + struct bio_aio_aspect *ini = (void *)_ini; + + (void)ini; +} + +XIO_MAKE_STATICS(bio); + +/********************* brick constructors * destructors *******************/ + +static int bio_brick_construct(struct bio_brick *brick) +{ + spin_lock_init(&brick->lock); + INIT_LIST_HEAD(&brick->queue_list[0]); + INIT_LIST_HEAD(&brick->queue_list[1]); + INIT_LIST_HEAD(&brick->queue_list[2]); + INIT_LIST_HEAD(&brick->submitted_list[0]); + INIT_LIST_HEAD(&brick->submitted_list[1]); + INIT_LIST_HEAD(&brick->completed_list); + init_waitqueue_head(&brick->submit_event); + init_waitqueue_head(&brick->response_event); + return 0; +} + +static int bio_brick_destruct(struct bio_brick *brick) +{ + return 0; +} + +static int bio_output_construct(struct bio_output *output) +{ + return 0; +} + +static int bio_output_destruct(struct bio_output *output) +{ + return 0; +} + +/************************ static structs ***********************/ + +static struct bio_brick_ops bio_brick_ops = { + .brick_switch = bio_switch, + .brick_statistics = bio_statistics, + .reset_statistics = bio_reset_statistics, +}; + +static struct bio_output_ops bio_output_ops = { + .xio_get_info = bio_get_info, + .aio_get = bio_io_get, + .aio_put = bio_io_put, + .aio_io = bio_io_io, +}; + +const struct bio_input_type bio_input_type = { + .type_name = "bio_input", + .input_size = sizeof(struct bio_input), +}; + +static const struct bio_input_type *bio_input_types[] = { + &bio_input_type, +}; + +const struct bio_output_type bio_output_type = { + .type_name = "bio_output", + .output_size = sizeof(struct bio_output), + .master_ops = &bio_output_ops, + .output_construct = &bio_output_construct, + .output_destruct = &bio_output_destruct, +}; + +static const struct bio_output_type *bio_output_types[] = { + &bio_output_type, +}; + +const struct bio_brick_type bio_brick_type = { + .type_name = "bio_brick", + .brick_size = sizeof(struct bio_brick), + .max_inputs = 0, + .max_outputs = 1, + .master_ops = &bio_brick_ops, + .aspect_types = bio_aspect_types, + .default_input_types = bio_input_types, + .default_output_types = bio_output_types, + .brick_construct = &bio_brick_construct, + .brick_destruct = &bio_brick_destruct, +}; + +/***************** module init stuff ************************/ + +int __init init_xio_bio(void) +{ + XIO_INF("init_bio()\n"); + _bio_brick_type = (void *)&bio_brick_type; + return bio_register_brick_type(); +} + +void exit_xio_bio(void) +{ + XIO_INF("exit_bio()\n"); + bio_unregister_brick_type(); +} diff --git a/include/linux/xio/xio_bio.h b/include/linux/xio/xio_bio.h new file mode 100644 index 0000000..a0d98be --- /dev/null +++ b/include/linux/xio/xio_bio.h @@ -0,0 +1,85 @@ +/* + * MARS Long Distance Replication Software + * + * Copyright (C) 2010-2014 Thomas Schoebel-Theuer + * Copyright (C) 2011-2014 1&1 Internet AG + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef XIO_BIO_H +#define XIO_BIO_H + +#define BIO_SUBMIT_MAX_LATENCY 250 /* 250 us */ +#define BIO_IO_R_MAX_LATENCY 40000 /* 40 ms */ +#define BIO_IO_W_MAX_LATENCY 100000 /* 100 ms */ + +extern struct threshold bio_submit_threshold; +extern struct threshold bio_io_threshold[2]; + +#include + +struct bio_aio_aspect { + GENERIC_ASPECT(aio); + struct list_head io_head; + struct bio *bio; + struct bio_output *output; + unsigned long long start_stamp; + int status_code; + int hash_pos; + int alloc_len; + bool do_dealloc; +}; + +struct bio_brick { + XIO_BRICK(bio); + /* tunables */ + int ra_pages; + int bg_threshold; + int bg_maxfly; + bool do_noidle; + bool do_sync; + bool do_unplug; + + /* readonly */ + loff_t total_size; + atomic_t fly_count[XIO_PRIO_NR]; + atomic_t queue_count[XIO_PRIO_NR]; + atomic_t completed_count; + atomic_t total_completed_count[XIO_PRIO_NR]; + + /* private */ + spinlock_t lock; + struct list_head queue_list[XIO_PRIO_NR]; + struct list_head submitted_list[2]; + struct list_head completed_list; + + wait_queue_head_t submit_event; + wait_queue_head_t response_event; + struct mapfree_info *mf; + struct block_device *bdev; + struct task_struct *submit_thread; + struct task_struct *response_thread; + int bvec_max; + bool submitted; +}; + +struct bio_input { + XIO_INPUT(bio); +}; + +struct bio_output { + XIO_OUTPUT(bio); +}; + +XIO_TYPES(bio); + +#endif -- 2.6.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/