Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751367AbVJKDSA (ORCPT ); Mon, 10 Oct 2005 23:18:00 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751368AbVJKDSA (ORCPT ); Mon, 10 Oct 2005 23:18:00 -0400 Received: from lakshmi.addtoit.com ([198.99.130.6]:12560 "EHLO lakshmi.solana.com") by vger.kernel.org with ESMTP id S1751367AbVJKDR7 (ORCPT ); Mon, 10 Oct 2005 23:17:59 -0400 Message-Id: <200510110310.j9B3AWJ2013823@ccure.user-mode-linux.org> X-Mailer: exmh version 2.7.2 01/07/2005 with nmh-1.0.4 To: torvalds@osdl.org cc: linux-kernel@vger.kernel.org, user-mode-linux-devel@lists.sourceforge.net, blaisorblade@yahoo.it Subject: [PATCH] UML - revert block driver use of host AIO Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Date: Mon, 10 Oct 2005 23:10:32 -0400 From: Jeff Dike Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 36765 Lines: 1263 The patch to use host AIO support that I submitted early after 2.6.13 exposed some problems in the block driver. I have fixes for these, but am not comfortable putting them into 2.6.14 at this late date. So, this patch reverts the use of host AIO. I will resubmit the original patch, plus fixes to the driver after 2.6.14 in order to get a reasonable amount of testing before they're exposed to the general public. Signed-off-by: Jeff Dike Index: linux-2.6.14-rc3/arch/um/drivers/Makefile =================================================================== --- linux-2.6.14-rc3.orig/arch/um/drivers/Makefile 2005-10-10 12:02:08.000000000 -0400 +++ linux-2.6.14-rc3/arch/um/drivers/Makefile 2005-10-10 23:04:53.000000000 -0400 @@ -13,7 +13,7 @@ net-objs := net_kern.o net_user.o mconsole-objs := mconsole_kern.o mconsole_user.o hostaudio-objs := hostaudio_kern.o -ubd-objs := ubd_kern.o +ubd-objs := ubd_kern.o ubd_user.o port-objs := port_kern.o port_user.o harddog-objs := harddog_kern.o harddog_user.o Index: linux-2.6.14-rc3/arch/um/drivers/ubd_kern.c =================================================================== --- linux-2.6.14-rc3.orig/arch/um/drivers/ubd_kern.c 2005-10-10 12:02:08.000000000 -0400 +++ linux-2.6.14-rc3/arch/um/drivers/ubd_kern.c 2005-10-10 23:04:53.000000000 -0400 @@ -35,7 +35,6 @@ #include "linux/blkpg.h" #include "linux/genhd.h" #include "linux/spinlock.h" -#include "asm/atomic.h" #include "asm/segment.h" #include "asm/uaccess.h" #include "asm/irq.h" @@ -54,21 +53,20 @@ #include "mem.h" #include "mem_kern.h" #include "cow.h" -#include "aio.h" enum ubd_req { UBD_READ, UBD_WRITE }; struct io_thread_req { - enum aio_type op; + enum ubd_req op; int fds[2]; unsigned long offsets[2]; unsigned long long offset; unsigned long length; char *buffer; int sectorsize; - int bitmap_offset; - long bitmap_start; - long bitmap_end; + unsigned long sector_mask; + unsigned long long cow_offset; + unsigned long bitmap_words[2]; int error; }; @@ -82,31 +80,28 @@ unsigned long *bitmap_len_out, int *data_offset_out); extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -extern void do_io(struct io_thread_req *req, struct request *r, - unsigned long *bitmap); +extern void do_io(struct io_thread_req *req); -static inline int ubd_test_bit(__u64 bit, void *data) +static inline int ubd_test_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - return((buffer[n] & (1 << off)) != 0); + return((data[n] & (1 << off)) != 0); } -static inline void ubd_set_bit(__u64 bit, void *data) +static inline void ubd_set_bit(__u64 bit, unsigned char *data) { - unsigned char *buffer = data; __u64 n; int bits, off; - bits = sizeof(buffer[0]) * 8; + bits = sizeof(data[0]) * 8; n = bit / bits; off = bit % bits; - buffer[n] |= (1 << off); + data[n] |= (1 << off); } /*End stuff from ubd_user.h*/ @@ -115,6 +110,8 @@ static DEFINE_SPINLOCK(ubd_io_lock); static DEFINE_SPINLOCK(ubd_lock); +static void (*do_ubd)(void); + static int ubd_open(struct inode * inode, struct file * filp); static int ubd_release(struct inode * inode, struct file * file); static int ubd_ioctl(struct inode * inode, struct file * file, @@ -161,8 +158,6 @@ int data_offset; }; -#define MAX_SG 64 - struct ubd { char *file; int count; @@ -173,7 +168,6 @@ int no_cow; struct cow cow; struct platform_device pdev; - struct scatterlist sg[MAX_SG]; }; #define DEFAULT_COW { \ @@ -466,114 +460,81 @@ ); static void do_ubd_request(request_queue_t * q); -static int in_ubd; + +/* Only changed by ubd_init, which is an initcall. */ +int thread_fd = -1; /* Changed by ubd_handler, which is serialized because interrupts only * happen on CPU 0. */ int intr_count = 0; -static void ubd_end_request(struct request *req, int bytes, int uptodate) -{ - if (!end_that_request_first(req, uptodate, bytes >> 9)) { - add_disk_randomness(req->rq_disk); - end_that_request_last(req); - } -} - /* call ubd_finish if you need to serialize */ -static void __ubd_finish(struct request *req, int bytes) +static void __ubd_finish(struct request *req, int error) { - if(bytes < 0){ - ubd_end_request(req, 0, 0); - return; - } + int nsect; - ubd_end_request(req, bytes, 1); + if(error){ + end_request(req, 0); + return; + } + nsect = req->current_nr_sectors; + req->sector += nsect; + req->buffer += nsect << 9; + req->errors = 0; + req->nr_sectors -= nsect; + req->current_nr_sectors = 0; + end_request(req, 1); +} + +static inline void ubd_finish(struct request *req, int error) +{ + spin_lock(&ubd_io_lock); + __ubd_finish(req, error); + spin_unlock(&ubd_io_lock); +} + +/* Called without ubd_io_lock held */ +static void ubd_handler(void) +{ + struct io_thread_req req; + struct request *rq = elv_next_request(ubd_queue); + int n; + + do_ubd = NULL; + intr_count++; + n = os_read_file(thread_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " + "err = %d\n", os_getpid(), -n); + spin_lock(&ubd_io_lock); + end_request(rq, 0); + spin_unlock(&ubd_io_lock); + return; + } + + ubd_finish(rq, req.error); + reactivate_fd(thread_fd, UBD_IRQ); + do_ubd_request(ubd_queue); } -static inline void ubd_finish(struct request *req, int bytes) +static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) { - spin_lock(&ubd_io_lock); - __ubd_finish(req, bytes); - spin_unlock(&ubd_io_lock); + ubd_handler(); + return(IRQ_HANDLED); } -struct bitmap_io { - atomic_t count; - struct aio_context aio; -}; - -struct ubd_aio { - struct aio_context aio; - struct request *req; - int len; - struct bitmap_io *bitmap; - void *bitmap_buf; -}; - -static int ubd_reply_fd = -1; +/* Only changed by ubd_init, which is an initcall. */ +static int io_pid = -1; -static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) +void kill_io_thread(void) { - struct aio_thread_reply reply; - struct ubd_aio *aio; - struct request *req; - int err, n, fd = (int) (long) dev; - - while(1){ - err = os_read_file(fd, &reply, sizeof(reply)); - if(err == -EAGAIN) - break; - if(err < 0){ - printk("ubd_aio_handler - read returned err %d\n", - -err); - break; - } - - aio = container_of(reply.data, struct ubd_aio, aio); - n = reply.err; - - if(n == 0){ - req = aio->req; - req->nr_sectors -= aio->len >> 9; - - if((aio->bitmap != NULL) && - (atomic_dec_and_test(&aio->bitmap->count))){ - aio->aio = aio->bitmap->aio; - aio->len = 0; - kfree(aio->bitmap); - aio->bitmap = NULL; - submit_aio(&aio->aio); - } - else { - if((req->nr_sectors == 0) && - (aio->bitmap == NULL)){ - int len = req->hard_nr_sectors << 9; - ubd_finish(req, len); - } - - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - else if(n < 0){ - ubd_finish(aio->req, n); - if(aio->bitmap != NULL) - kfree(aio->bitmap); - if(aio->bitmap_buf != NULL) - kfree(aio->bitmap_buf); - kfree(aio); - } - } - reactivate_fd(fd, UBD_IRQ); - - do_ubd_request(ubd_queue); - - return(IRQ_HANDLED); + if(io_pid != -1) + os_kill_process(io_pid, 1); } +__uml_exitcall(kill_io_thread); + static int ubd_file_size(struct ubd *dev, __u64 *size_out) { char *file; @@ -608,7 +569,7 @@ &dev->cow.data_offset, create_ptr); if((dev->fd == -ENOENT) && create_cow){ - dev->fd = create_cow_file(dev->file, dev->cow.file, + dev->fd = create_cow_file(dev->file, dev->cow.file, dev->openflags, 1 << 9, PAGE_SIZE, &dev->cow.bitmap_offset, &dev->cow.bitmap_len, @@ -870,10 +831,6 @@ { int i; - ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr); - if(ubd_reply_fd < 0) - printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd); - devfs_mk_dir("ubd"); if (register_blkdev(MAJOR_NR, "ubd")) return -1; @@ -884,7 +841,6 @@ return -1; } - blk_queue_max_hw_segments(ubd_queue, MAX_SG); if (fake_major != MAJOR_NR) { char name[sizeof("ubd_nnn\0")]; @@ -896,12 +852,40 @@ driver_register(&ubd_driver); for (i = 0; i < MAX_DEV; i++) ubd_add(i); - return 0; } late_initcall(ubd_init); +int ubd_driver_init(void){ + unsigned long stack; + int err; + + /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ + if(global_openflags.s){ + printk(KERN_INFO "ubd: Synchronous mode\n"); + /* Letting ubd=sync be like using ubd#s= instead of ubd#= is + * enough. So use anyway the io thread. */ + } + stack = alloc_stack(0, 0); + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); + io_pid = -1; + return(0); + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + SA_INTERRUPT, "ubd", ubd_dev); + if(err != 0) + printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return(err); +} + +device_initcall(ubd_driver_init); + static int ubd_open(struct inode *inode, struct file *filp) { struct gendisk *disk = inode->i_bdev->bd_disk; @@ -939,55 +923,105 @@ return(0); } -static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap) +static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, + __u64 *cow_offset, unsigned long *bitmap, + __u64 bitmap_offset, unsigned long *bitmap_words, + __u64 bitmap_len) { - __u64 sector = req->offset / req->sectorsize; - int i; + __u64 sector = io_offset >> 9; + int i, update_bitmap = 0; - for(i = 0; i < req->length / req->sectorsize; i++){ - if(ubd_test_bit(sector + i, bitmap)) - continue; - - if(req->bitmap_start == -1) - req->bitmap_start = sector + i; - req->bitmap_end = sector + i + 1; + for(i = 0; i < length >> 9; i++){ + if(cow_mask != NULL) + ubd_set_bit(i, (unsigned char *) cow_mask); + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + continue; - ubd_set_bit(sector + i, bitmap); - } + update_bitmap = 1; + ubd_set_bit(sector + i, (unsigned char *) bitmap); + } + + if(!update_bitmap) + return; + + *cow_offset = sector / (sizeof(unsigned long) * 8); + + /* This takes care of the case where we're exactly at the end of the + * device, and *cow_offset + 1 is off the end. So, just back it up + * by one word. Thanks to Lynn Kerby for the fix and James McMechan + * for the original diagnosis. + */ + if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / + sizeof(unsigned long) - 1)) + (*cow_offset)--; + + bitmap_words[0] = bitmap[*cow_offset]; + bitmap_words[1] = bitmap[*cow_offset + 1]; + + *cow_offset *= sizeof(unsigned long); + *cow_offset += bitmap_offset; +} + +static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, + __u64 bitmap_offset, __u64 bitmap_len) +{ + __u64 sector = req->offset >> 9; + int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); + + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ + if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); + } + } + else cowify_bitmap(req->offset, req->length, &req->sector_mask, + &req->cow_offset, bitmap, bitmap_offset, + req->bitmap_words, bitmap_len); } /* Called with ubd_io_lock held */ -static int prepare_request(struct request *req, struct io_thread_req *io_req, - unsigned long long offset, int page_offset, - int len, struct page *page) +static int prepare_request(struct request *req, struct io_thread_req *io_req) { struct gendisk *disk = req->rq_disk; struct ubd *dev = disk->private_data; + __u64 offset; + int len; + + if(req->rq_status == RQ_INACTIVE) return(1); /* This should be impossible now */ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ printk("Write attempted on readonly ubd device %s\n", disk->disk_name); - ubd_end_request(req, 0, 0); + end_request(req, 0); return(1); } + offset = ((__u64) req->sector) << 9; + len = req->current_nr_sectors << 9; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; io_req->fds[1] = dev->fd; + io_req->cow_offset = -1; io_req->offset = offset; io_req->length = len; io_req->error = 0; - io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE; + io_req->sector_mask = 0; + + io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; io_req->offsets[0] = 0; io_req->offsets[1] = dev->cow.data_offset; - io_req->buffer = page_address(page) + page_offset; + io_req->buffer = req->buffer; io_req->sectorsize = 1 << 9; - io_req->bitmap_offset = dev->cow.bitmap_offset; - io_req->bitmap_start = -1; - io_req->bitmap_end = -1; - if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE)) - cowify_bitmap(io_req, dev->cow.bitmap); + if(dev->cow.file != NULL) + cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, + dev->cow.bitmap_len); + return(0); } @@ -996,36 +1030,30 @@ { struct io_thread_req io_req; struct request *req; - __u64 sector; - int err; + int err, n; - if(in_ubd) - return; - in_ubd = 1; - while((req = elv_next_request(q)) != NULL){ - struct gendisk *disk = req->rq_disk; - struct ubd *dev = disk->private_data; - int n, i; - - blkdev_dequeue_request(req); - - sector = req->sector; - n = blk_rq_map_sg(q, req, dev->sg); - - for(i = 0; i < n; i++){ - struct scatterlist *sg = &dev->sg[i]; - - err = prepare_request(req, &io_req, sector << 9, - sg->offset, sg->length, - sg->page); - if(err) - continue; - - sector += sg->length >> 9; - do_io(&io_req, req, dev->cow.bitmap); + if(thread_fd == -1){ + while((req = elv_next_request(q)) != NULL){ + err = prepare_request(req, &io_req); + if(!err){ + do_io(&io_req); + __ubd_finish(req, io_req.error); + } + } + } + else { + if(do_ubd || (req = elv_next_request(q)) == NULL) + return; + err = prepare_request(req, &io_req); + if(!err){ + do_ubd = ubd_handler; + n = os_write_file(thread_fd, (char *) &io_req, + sizeof(io_req)); + if(n != sizeof(io_req)) + printk("write to io thread failed, " + "errno = %d\n", -n); } } - in_ubd = 0; } static int ubd_ioctl(struct inode * inode, struct file * file, @@ -1241,95 +1269,131 @@ return(err); } -void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap) +static int update_bitmap(struct io_thread_req *req) { - struct ubd_aio *aio; - struct bitmap_io *bitmap_io = NULL; - char *buf; - void *bitmap_buf = NULL; - unsigned long len, sector; - int nsectors, start, end, bit, err; - __u64 off; - - if(req->bitmap_start != -1){ - /* Round up to the nearest word */ - int round = sizeof(unsigned long); - len = (req->bitmap_end - req->bitmap_start + - round * 8 - 1) / (round * 8); - len *= round; - - off = req->bitmap_start / (8 * round); - off *= round; - - bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL); - if(bitmap_io == NULL){ - printk("Failed to kmalloc bitmap IO\n"); - req->error = 1; - return; - } + int n; - bitmap_buf = kmalloc(len, GFP_KERNEL); - if(bitmap_buf == NULL){ - printk("do_io : kmalloc of bitmap chunk " - "failed\n"); - kfree(bitmap_io); - req->error = 1; - return; - } - memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len); + if(req->cow_offset == -1) + return(0); - *bitmap_io = ((struct bitmap_io) - { .count = ATOMIC_INIT(0), - .aio = INIT_AIO(AIO_WRITE, req->fds[1], - bitmap_buf, len, - req->bitmap_offset + off, - ubd_reply_fd) } ); - } + n = os_seek_file(req->fds[1], req->cow_offset); + if(n < 0){ + printk("do_io - bitmap lseek failed : err = %d\n", -n); + return(1); + } - nsectors = req->length / req->sectorsize; - start = 0; - end = nsectors; - bit = 0; - do { - if(bitmap != NULL){ - sector = req->offset / req->sectorsize; - bit = ubd_test_bit(sector + start, bitmap); - end = start; - while((end < nsectors) && - (ubd_test_bit(sector + end, bitmap) == bit)) - end++; - } + n = os_write_file(req->fds[1], &req->bitmap_words, + sizeof(req->bitmap_words)); + if(n != sizeof(req->bitmap_words)){ + printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, + req->fds[1]); + return(1); + } - off = req->offsets[bit] + req->offset + - start * req->sectorsize; - len = (end - start) * req->sectorsize; - buf = &req->buffer[start * req->sectorsize]; - - aio = kmalloc(sizeof(*aio), GFP_KERNEL); - if(aio == NULL){ - req->error = 1; - return; - } + return(0); +} - *aio = ((struct ubd_aio) - { .aio = INIT_AIO(req->op, req->fds[bit], buf, - len, off, ubd_reply_fd), - .len = len, - .req = r, - .bitmap = bitmap_io, - .bitmap_buf = bitmap_buf }); - - if(aio->bitmap != NULL) - atomic_inc(&aio->bitmap->count); - - err = submit_aio(&aio->aio); - if(err){ - printk("do_io - submit_aio failed, " - "err = %d\n", err); - req->error = 1; - return; - } +void do_io(struct io_thread_req *req) +{ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; + int err; + __u64 off; + + nsectors = req->length / req->sectorsize; + start = 0; + do { + bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); + end = start; + while((end < nsectors) && + (ubd_test_bit(end, (unsigned char *) + &req->sector_mask) == bit)) + end++; + + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + + err = os_seek_file(req->fds[bit], off); + if(err < 0){ + printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } + if(req->op == UBD_READ){ + n = 0; + do { + buf = &buf[n]; + len -= n; + n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { + printk("do_io - read failed, err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } while((n < len) && (n != 0)); + if (n < len) memset(&buf[n], 0, len - n); + } else { + n = os_write_file(req->fds[bit], buf, len); + if(n != len){ + printk("do_io - write failed err = %d " + "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } + } + + start = end; + } while(start < nsectors); - start = end; - } while(start < nsectors); + req->error = update_bitmap(req); } + +/* Changed in start_io_thread, which is serialized by being called only + * from ubd_init, which is an initcall. + */ +int kernel_fd = -1; + +/* Only changed by the io thread */ +int io_count = 0; + +int io_thread(void *arg) +{ + struct io_thread_req req; + int n; + + ignore_sigwinch_sig(); + while(1){ + n = os_read_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + if(n < 0) + printk("io_thread - read failed, fd = %d, " + "err = %d\n", kernel_fd, -n); + else { + printk("io_thread - short read, fd = %d, " + "length = %d\n", kernel_fd, n); + } + continue; + } + io_count++; + do_io(&req); + n = os_write_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) + printk("io_thread - write failed, fd = %d, err = %d\n", + kernel_fd, -n); + } +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ Index: linux-2.6.14-rc3/arch/um/drivers/ubd_user.c =================================================================== --- linux-2.6.14-rc3.orig/arch/um/drivers/ubd_user.c 2005-10-01 18:44:20.996936304 -0400 +++ linux-2.6.14-rc3/arch/um/drivers/ubd_user.c 2005-10-10 23:04:53.000000000 -0400 @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "asm/types.h" +#include "user_util.h" +#include "kern_util.h" +#include "user.h" +#include "ubd_user.h" +#include "os.h" +#include "cow.h" + +#include +#include + +void ignore_sigwinch_sig(void) +{ + signal(SIGWINCH, SIG_IGN); +} + +int start_io_thread(unsigned long sp, int *fd_out) +{ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); + if(err < 0){ + printk("start_io_thread - os_pipe failed, err = %d\n", -err); + goto out; + } + + kernel_fd = fds[0]; + *fd_out = fds[1]; + + pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); + err = -errno; + goto out_close; + } + + return(pid); + + out_close: + os_close_file(fds[0]); + os_close_file(fds[1]); + kernel_fd = -1; + *fd_out = -1; + out: + return(err); +} + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * --------------------------------------------------------------------------- + * Local variables: + * c-file-style: "linux" + * End: + */ Index: linux-2.6.14-rc3/arch/um/include/aio.h =================================================================== --- linux-2.6.14-rc3.orig/arch/um/include/aio.h 2005-10-10 12:02:08.000000000 -0400 +++ linux-2.6.14-rc3/arch/um/include/aio.h 2005-10-10 23:04:53.000000000 -0400 @@ -14,27 +14,15 @@ }; struct aio_context { - enum aio_type type; - int fd; - void *data; - int len; - unsigned long long offset; int reply_fd; struct aio_context *next; }; -#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \ - aio_reply_fd) \ - { .type = aio_type, \ - .fd = aio_fd, \ - .data = aio_data, \ - .len = aio_len, \ - .offset = aio_offset, \ - .reply_fd = aio_reply_fd } - #define INIT_AIO_CONTEXT { .reply_fd = -1, \ .next = NULL } -extern int submit_aio(struct aio_context *aio); +extern int submit_aio(enum aio_type type, int fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio); #endif Index: linux-2.6.14-rc3/arch/um/os-Linux/aio.c =================================================================== --- linux-2.6.14-rc3.orig/arch/um/os-Linux/aio.c 2005-10-10 12:02:08.000000000 -0400 +++ linux-2.6.14-rc3/arch/um/os-Linux/aio.c 2005-10-10 23:04:54.000000000 -0400 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,31 +16,18 @@ #include "user.h" #include "mode.h" +struct aio_thread_req { + enum aio_type type; + int io_fd; + unsigned long long offset; + char *buf; + int len; + struct aio_context *aio; +}; + static int aio_req_fd_r = -1; static int aio_req_fd_w = -1; -static int update_aio(struct aio_context *aio, int res) -{ - if(res < 0) - aio->len = res; - else if((res == 0) && (aio->type == AIO_READ)){ - /* This is the EOF case - we have hit the end of the file - * and it ends in a partial block, so we fill the end of - * the block with zeros and claim success. - */ - memset(aio->data, 0, aio->len); - aio->len = 0; - } - else if(res > 0){ - aio->len -= res; - aio->data += res; - aio->offset += res; - return aio->len; - } - - return 0; -} - #if defined(HAVE_AIO_ABI) #include @@ -80,7 +66,8 @@ * that it now backs the mmapped area. */ -static int do_aio(aio_context_t ctx, struct aio_context *aio) +static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf, + int len, unsigned long long offset, struct aio_context *aio) { struct iocb iocb, *iocbp = &iocb; char c; @@ -88,39 +75,40 @@ iocb = ((struct iocb) { .aio_data = (unsigned long) aio, .aio_reqprio = 0, - .aio_fildes = aio->fd, - .aio_buf = (unsigned long) aio->data, - .aio_nbytes = aio->len, - .aio_offset = aio->offset, + .aio_fildes = fd, + .aio_buf = (unsigned long) buf, + .aio_nbytes = len, + .aio_offset = offset, .aio_reserved1 = 0, .aio_reserved2 = 0, .aio_reserved3 = 0 }); - switch(aio->type){ + switch(type){ case AIO_READ: iocb.aio_lio_opcode = IOCB_CMD_PREAD; + err = io_submit(ctx, 1, &iocbp); break; case AIO_WRITE: iocb.aio_lio_opcode = IOCB_CMD_PWRITE; + err = io_submit(ctx, 1, &iocbp); break; case AIO_MMAP: iocb.aio_lio_opcode = IOCB_CMD_PREAD; iocb.aio_buf = (unsigned long) &c; iocb.aio_nbytes = sizeof(c); + err = io_submit(ctx, 1, &iocbp); break; default: - printk("Bogus op in do_aio - %d\n", aio->type); + printk("Bogus op in do_aio - %d\n", type); err = -EINVAL; - goto out; + break; } - err = io_submit(ctx, 1, &iocbp); if(err > 0) err = 0; else err = -errno; - out: return err; } @@ -129,9 +117,8 @@ static int aio_thread(void *arg) { struct aio_thread_reply reply; - struct aio_context *aio; struct io_event event; - int err, n; + int err, n, reply_fd; signal(SIGWINCH, SIG_IGN); @@ -144,22 +131,14 @@ "errno = %d\n", errno); } else { - /* This is safe as we've just a pointer here. */ - aio = (struct aio_context *) (long) event.data; - if(update_aio(aio, event.res)){ - do_aio(ctx, aio); - continue; - } - reply = ((struct aio_thread_reply) - { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, - sizeof(reply)); + { .data = (void *) (long) event.data, + .err = event.res }); + reply_fd = ((struct aio_context *) reply.data)->reply_fd; + err = os_write_file(reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) - printk("aio_thread - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, - -err); + printk("aio_thread - write failed, fd = %d, " + "err = %d\n", aio_req_fd_r, -err); } } return 0; @@ -167,35 +146,35 @@ #endif -static int do_not_aio(struct aio_context *aio) +static int do_not_aio(struct aio_thread_req *req) { char c; int err; - switch(aio->type){ + switch(req->type){ case AIO_READ: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, aio->data, aio->len); + err = os_read_file(req->io_fd, req->buf, req->len); break; case AIO_WRITE: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_write_file(aio->fd, aio->data, aio->len); + err = os_write_file(req->io_fd, req->buf, req->len); break; case AIO_MMAP: - err = os_seek_file(aio->fd, aio->offset); + err = os_seek_file(req->io_fd, req->offset); if(err) goto out; - err = os_read_file(aio->fd, &c, sizeof(c)); + err = os_read_file(req->io_fd, &c, sizeof(c)); break; default: - printk("do_not_aio - bad request type : %d\n", aio->type); + printk("do_not_aio - bad request type : %d\n", req->type); err = -EINVAL; break; } @@ -206,14 +185,14 @@ static int not_aio_thread(void *arg) { - struct aio_context *aio; + struct aio_thread_req req; struct aio_thread_reply reply; int err; signal(SIGWINCH, SIG_IGN); while(1){ - err = os_read_file(aio_req_fd_r, &aio, sizeof(aio)); - if(err != sizeof(aio)){ + err = os_read_file(aio_req_fd_r, &req, sizeof(req)); + if(err != sizeof(req)){ if(err < 0) printk("not_aio_thread - read failed, " "fd = %d, err = %d\n", aio_req_fd_r, @@ -224,34 +203,17 @@ } continue; } - again: - err = do_not_aio(aio); - - if(update_aio(aio, err)) - goto again; - - reply = ((struct aio_thread_reply) { .data = aio, - .err = aio->len }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + err = do_not_aio(&req); + reply = ((struct aio_thread_reply) { .data = req.aio, + .err = err }); + err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply)); if(err != sizeof(reply)) printk("not_aio_thread - write failed, fd = %d, " "err = %d\n", aio_req_fd_r, -err); } } -static int submit_aio_24(struct aio_context *aio) -{ - int err; - - err = os_write_file(aio_req_fd_w, &aio, sizeof(aio)); - if(err == sizeof(aio)) - err = 0; - - return err; -} - static int aio_pid = -1; -static int (*submit_proc)(struct aio_context *aio); static int init_aio_24(void) { @@ -283,33 +245,11 @@ #endif printk("2.6 host AIO support not used - falling back to I/O " "thread\n"); - - submit_proc = submit_aio_24; - return 0; } #ifdef HAVE_AIO_ABI #define DEFAULT_24_AIO 0 -static int submit_aio_26(struct aio_context *aio) -{ - struct aio_thread_reply reply; - int err; - - err = do_aio(ctx, aio); - if(err){ - reply = ((struct aio_thread_reply) { .data = aio, - .err = err }); - err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); - if(err != sizeof(reply)) - printk("submit_aio_26 - write failed, " - "fd = %d, err = %d\n", aio->reply_fd, -err); - else err = 0; - } - - return err; -} - static int init_aio_26(void) { unsigned long stack; @@ -330,22 +270,39 @@ aio_pid = err; printk("Using 2.6 host AIO\n"); + return 0; +} - submit_proc = submit_aio_26; +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_reply reply; + int err; - return 0; + err = do_aio(ctx, type, io_fd, buf, len, offset, aio); + if(err){ + reply = ((struct aio_thread_reply) { .data = aio, + .err = err }); + err = os_write_file(aio->reply_fd, &reply, sizeof(reply)); + if(err != sizeof(reply)) + printk("submit_aio_26 - write failed, " + "fd = %d, err = %d\n", aio->reply_fd, -err); + else err = 0; + } + + return err; } #else #define DEFAULT_24_AIO 1 -static int submit_aio_26(struct aio_context *aio) +static int init_aio_26(void) { return -ENOSYS; } -static int init_aio_26(void) +static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) { - submit_proc = submit_aio_26; return -ENOSYS; } #endif @@ -412,7 +369,33 @@ __uml_exitcall(exit_aio); -int submit_aio(struct aio_context *aio) +static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, struct aio_context *aio) +{ + struct aio_thread_req req = { .type = type, + .io_fd = io_fd, + .offset = offset, + .buf = buf, + .len = len, + .aio = aio, + }; + int err; + + err = os_write_file(aio_req_fd_w, &req, sizeof(req)); + if(err == sizeof(req)) + err = 0; + + return err; +} + +int submit_aio(enum aio_type type, int io_fd, char *buf, int len, + unsigned long long offset, int reply_fd, + struct aio_context *aio) { - return (*submit_proc)(aio); + aio->reply_fd = reply_fd; + if(aio_24) + return submit_aio_24(type, io_fd, buf, len, offset, aio); + else { + return submit_aio_26(type, io_fd, buf, len, offset, aio); + } } - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/