2002-10-16 15:23:26

by David Howells

[permalink] [raw]
Subject: [PATCH] do_generic_file_read / readahead adjustments


The attached patch does the following three things:

(1) Makes the functions in mm/readahead.c only use struct file* to pass to
readpage(). address_mapping* and file_ra_state* are used instead to keep
track of readahead stuff.

(2) Adds a new function do_generic_mapping_read() that is similar to
do_generic_file_read(), except that it uses a mapping pointer and a
readahead state pointer to access a file. The file* is only used to pass
to readpage().

(3) Turns do_generic_file_read() into an inline function in linux/fs.h that
simply wraps do_generic_mapping_read().

This should mean that it is no longer necessary to have a struct file to
access a file in this manner. Just an inode or address space should be
sufficient.

It also means alternate read-ahead structures can be maintained.

It may be worth adding a read-ahead struct init function too.

David

diff -u -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/include/linux/fs.h linux-readahead-2543/include/linux/fs.h
--- linux-2.5.43/include/linux/fs.h 2002-10-16 09:26:02.000000000 +0100
+++ linux-readahead-2543/include/linux/fs.h 2002-10-16 16:16:11.000000000 +0100
@@ -1251,7 +1251,8 @@
ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t);
-extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
+extern void do_generic_mapping_read(struct address_space *, struct file_ra_state *, struct file *,
+ loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec
@@ -1268,6 +1269,18 @@
extern int generic_vm_writeback(struct page *page,
struct writeback_control *wbc);

+static inline void do_generic_file_read(struct file * filp, loff_t *ppos,
+ read_descriptor_t * desc,
+ read_actor_t actor)
+{
+ do_generic_mapping_read(filp->f_dentry->d_inode->i_mapping,
+ &filp->f_ra,
+ filp,
+ ppos,
+ desc,
+ actor);
+}
+
extern struct file_operations generic_ro_fops;

extern int vfs_readlink(struct dentry *, char *, int, const char *);
diff -u -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/include/linux/mm.h linux-readahead-2543/include/linux/mm.h
--- linux-2.5.43/include/linux/mm.h 2002-10-16 09:26:02.000000000 +0100
+++ linux-readahead-2543/include/linux/mm.h 2002-10-16 15:58:20.000000000 +0100
@@ -513,11 +513,18 @@
/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
-int do_page_cache_readahead(struct file *file,
+int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
unsigned long offset, unsigned long nr_to_read);
-void page_cache_readahead(struct file *file, unsigned long offset);
-void page_cache_readaround(struct file *file, unsigned long offset);
-void handle_ra_miss(struct file *file);
+void page_cache_readahead(struct address_space *mapping,
+ struct file_ra_state *ra,
+ struct file *filp,
+ unsigned long offset);
+void page_cache_readaround(struct address_space *mapping,
+ struct file_ra_state *ra,
+ struct file *filp,
+ unsigned long offset);
+void handle_ra_miss(struct address_space *mapping,
+ struct file_ra_state *ra);

/* Do stack extension */
extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
diff -ur -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/kernel/ksyms.c linux-readahead-2543/kernel/ksyms.c
--- linux-2.5.43/kernel/ksyms.c 2002-10-16 09:26:03.000000000 +0100
+++ linux-readahead-2543/kernel/ksyms.c 2002-10-16 16:17:30.000000000 +0100
@@ -229,7 +229,7 @@
EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(generic_file_sendfile);
-EXPORT_SYMBOL(do_generic_file_read);
+EXPORT_SYMBOL(do_generic_mapping_read);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_file_write_nolock);
EXPORT_SYMBOL(generic_file_mmap);
diff -ur -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/mm/filemap.c linux-readahead-2543/mm/filemap.c
--- linux-2.5.43/mm/filemap.c 2002-10-16 09:26:03.000000000 +0100
+++ linux-readahead-2543/mm/filemap.c 2002-10-16 16:18:17.000000000 +0100
@@ -572,10 +572,14 @@
* This is really ugly. But the goto's actually try to clarify some
* of the logic when it comes to error handling etc.
* - note the struct file * is only passed for the use of readpage
*/
-void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor)
+void do_generic_mapping_read(struct address_space *mapping,
+ struct file_ra_state *ra,
+ struct file * filp,
+ loff_t *ppos,
+ read_descriptor_t * desc,
+ read_actor_t actor)
{
- struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host;
unsigned long index, offset;
struct page *cached_page;
@@ -599,7 +603,7 @@
break;
}

- page_cache_readahead(filp, index);
+ page_cache_readahead(mapping, ra, filp, index);

nr = nr - offset;

@@ -611,7 +615,7 @@
page = radix_tree_lookup(&mapping->page_tree, index);
if (!page) {
read_unlock(&mapping->page_lock);
- handle_ra_miss(filp);
+ handle_ra_miss(mapping,ra);
goto no_cached_page;
}
page_cache_get(page);
@@ -947,9 +951,9 @@
}

static ssize_t
-do_readahead(struct file *file, unsigned long index, unsigned long nr)
+do_readahead(struct address_space *mapping, struct file *filp,
+ unsigned long index, unsigned long nr)
{
- struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long max;
unsigned long active;
unsigned long inactive;
@@ -963,7 +967,7 @@
if (nr > max)
nr = max;

- do_page_cache_readahead(file, index, nr);
+ do_page_cache_readahead(mapping, filp, index, nr);
return 0;
}

@@ -976,10 +980,11 @@
file = fget(fd);
if (file) {
if (file->f_mode & FMODE_READ) {
+ struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
unsigned long start = offset >> PAGE_CACHE_SHIFT;
unsigned long end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
unsigned long len = end - start + 1;
- ret = do_readahead(file, start, len);
+ ret = do_readahead(mapping, file, start, len);
}
fput(file);
}
@@ -1000,6 +1005,7 @@
int error;
struct file *file = area->vm_file;
struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+ struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
struct page *page;
unsigned long size, pgoff, endoff;
@@ -1032,7 +1038,7 @@
*/
if (VM_SequentialReadHint(area)) {
did_readahead = 1;
- page_cache_readahead(area->vm_file, pgoff);
+ page_cache_readahead(mapping, ra, file, pgoff);
}

/*
@@ -1041,7 +1047,7 @@
*/
if ((pgoff < size) && !VM_RandomReadHint(area)) {
did_readahead = 1;
- page_cache_readaround(file, pgoff);
+ page_cache_readaround(mapping, ra, file, pgoff);
}

/*
@@ -1051,7 +1057,7 @@
page = find_get_page(mapping, pgoff);
if (!page) {
if (did_readahead) {
- handle_ra_miss(file);
+ handle_ra_miss(mapping,ra);
did_readahead = 0;
}
goto no_cached_page;
diff -ur -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/mm/madvise.c linux-readahead-2543/mm/madvise.c
--- linux-2.5.43/mm/madvise.c 2002-09-27 22:49:16.000000000 +0100
+++ linux-readahead-2543/mm/madvise.c 2002-10-16 16:02:53.000000000 +0100
@@ -80,7 +80,7 @@
if ((vma->vm_mm->rss + (end - start)) > rlim_rss)
return error;

- do_page_cache_readahead(file, start, end - start);
+ do_page_cache_readahead(file->f_dentry->d_inode->i_mapping, file, start, end - start);
return 0;
}

diff -ur -x '*.o' -x '*.cmd' -x '*~' linux-2.5.43/mm/readahead.c linux-readahead-2543/mm/readahead.c
--- linux-2.5.43/mm/readahead.c 2002-09-27 22:49:04.000000000 +0100
+++ linux-readahead-2543/mm/readahead.c 2002-10-16 16:02:23.000000000 +0100
@@ -22,18 +22,18 @@
/*
* Return max readahead size for this inode in number-of-pages.
*/
-static inline unsigned long get_max_readahead(struct file *file)
+static inline unsigned long get_max_readahead(struct file_ra_state *ra)
{
- return file->f_ra.ra_pages;
+ return ra->ra_pages;
}

-static inline unsigned long get_min_readahead(struct file *file)
+static inline unsigned long get_min_readahead(struct file_ra_state *ra)
{
return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
}

static int
-read_pages(struct file *file, struct address_space *mapping,
+read_pages(struct address_space *mapping, struct file *filp,
struct list_head *pages, unsigned nr_pages)
{
unsigned page_idx;
@@ -48,7 +48,7 @@
struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) {
- mapping->a_ops->readpage(file, page);
+ mapping->a_ops->readpage(filp, page);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
} else {
@@ -134,10 +134,11 @@
*
* Returns the number of pages which actually had IO started against them.
*/
-int do_page_cache_readahead(struct file *file,
- unsigned long offset, unsigned long nr_to_read)
+int do_page_cache_readahead(struct address_space *mapping,
+ struct file *filp,
+ unsigned long offset,
+ unsigned long nr_to_read)
{
- struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
struct inode *inode = mapping->host;
struct page *page;
unsigned long end_index; /* The last page we want to read */
@@ -181,7 +182,7 @@
* will then handle the error.
*/
if (ret) {
- read_pages(file, mapping, &page_pool, ret);
+ read_pages(mapping, filp, &page_pool, ret);
blk_run_queues();
}
BUG_ON(!list_empty(&page_pool));
@@ -216,9 +217,9 @@
* page_cache_readahead is the main function. If performs the adaptive
* readahead window size management and submits the readahead I/O.
*/
-void page_cache_readahead(struct file *file, unsigned long offset)
+void page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
+ struct file *filp, unsigned long offset)
{
- struct file_ra_state *ra = &file->f_ra;
unsigned max;
unsigned min;
unsigned orig_next_size;
@@ -239,11 +240,11 @@
if (ra->next_size == -1UL)
goto out; /* Maximally shrunk */

- max = get_max_readahead(file);
+ max = get_max_readahead(ra);
if (max == 0)
goto out; /* No readahead */

- min = get_min_readahead(file);
+ min = get_min_readahead(ra);
orig_next_size = ra->next_size;

if (ra->next_size == 0 && offset == 0) {
@@ -316,7 +317,8 @@
ra->ahead_start = 0; /* Invalidate these */
ra->ahead_size = 0;

- actual = do_page_cache_readahead(file, offset, ra->size);
+ actual = do_page_cache_readahead(mapping, filp, offset,
+ ra->size);
check_ra_success(ra, ra->size, actual, orig_next_size);
} else {
/*
@@ -327,7 +329,7 @@
if (ra->ahead_start == 0) {
ra->ahead_start = ra->start + ra->size;
ra->ahead_size = ra->next_size;
- actual = do_page_cache_readahead(file,
+ actual = do_page_cache_readahead(mapping, filp,
ra->ahead_start, ra->ahead_size);
check_ra_success(ra, ra->ahead_size,
actual, orig_next_size);
@@ -342,12 +344,11 @@
* but somewhat ascending. So readaround favours pages beyond the target one.
* We also boost the window size, as it can easily shrink due to misses.
*/
-void page_cache_readaround(struct file *file, unsigned long offset)
+void page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
+ struct file *filp, unsigned long offset)
{
- struct file_ra_state *ra = &file->f_ra;
-
if (ra->next_size != -1UL) {
- const unsigned long min = get_min_readahead(file) * 2;
+ const unsigned long min = get_min_readahead(ra) * 2;
unsigned long target;
unsigned long backward;

@@ -365,7 +366,7 @@
target = 0;
else
target -= backward;
- page_cache_readahead(file, target);
+ page_cache_readahead(mapping, ra, filp, target);
}
}

@@ -383,10 +384,9 @@
* that the readahead window size will stabilise around the maximum level at
* which there is no thrashing.
*/
-void handle_ra_miss(struct file *file)
+void handle_ra_miss(struct address_space *mapping, struct file_ra_state *ra)
{
- struct file_ra_state *ra = &file->f_ra;
- const unsigned long min = get_min_readahead(file);
+ const unsigned long min = get_min_readahead(ra);

if (ra->next_size == -1UL) {
ra->next_size = min;


2002-10-16 16:13:56

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH] do_generic_file_read / readahead adjustments

David Howells wrote:
>
> The attached patch does the following three things:
>
> (1) Makes the functions in mm/readahead.c only use struct file* to pass to
> readpage(). address_mapping* and file_ra_state* are used instead to keep
> track of readahead stuff.
>
> (2) Adds a new function do_generic_mapping_read() that is similar to
> do_generic_file_read(), except that it uses a mapping pointer and a
> readahead state pointer to access a file. The file* is only used to pass
> to readpage().
>
> (3) Turns do_generic_file_read() into an inline function in linux/fs.h that
> simply wraps do_generic_mapping_read().
>

Seems sensible. Is there something out there which actually uses this?

2002-10-16 16:30:32

by Rik van Riel

[permalink] [raw]
Subject: Re: [PATCH] do_generic_file_read / readahead adjustments

On Wed, 16 Oct 2002, Andrew Morton wrote:
> David Howells wrote:
> >
> > The attached patch does the following three things:
>
> Seems sensible. Is there something out there which actually uses this?

I think David has a few things up his sleeve. The patch looks
sensible.

Rik
--
A: No.
Q: Should I include quotations after my reply?

http://www.surriel.com/ http://distro.conectiva.com/

2002-10-16 17:49:58

by David Howells

[permalink] [raw]
Subject: Re: [PATCH] do_generic_file_read / readahead adjustments


> On Wed, 16 Oct 2002, Andrew Morton wrote:
> > David Howells wrote:
> > >
> > > The attached patch does the following three things:
> >
> > Seems sensible. Is there something out there which actually uses this?
>
> I think David has a few things up his sleeve. The patch looks
> sensible.

I'm writing a general cache for filesystems such as AFS, NFSv4, and
Lustre. Block devices are made available to the "cache manager" by means of a
filesystem that can be mounted. I'm storing meta data in an inode in the
cache, but to scan this at the moment I need to gain a "struct file" to use
with do_generic_file_read().

This involves either creating a dummy dentry and struct file (which will cause
Al Viro to come looking for me with a shotgun), or to use an extra auxilliary
filesystem mounted with do_kern_mount(), neither of which are particularly
appealing.

This patch is the alternative: make a function (do_generic_mapping_read())
that I can pass an inode or an address_space to, and make
do_generic_file_read() call that. This allows me to make use of readahead
semantics without having to reinvent them for myself.

David