Extend struct file_ra_state to support the on-demand readahead logic.
Also define some helpers for it.
Signed-off-by: Fengguang Wu <[email protected]>
---
include/linux/fs.h | 64 +++++++++++++++++++++++++++++++++++++++++++
mm/readahead.c | 19 ++++++++++++
2 files changed, 83 insertions(+)
--- linux-2.6.22-rc1-mm1.orig/include/linux/fs.h
+++ linux-2.6.22-rc1-mm1/include/linux/fs.h
@@ -702,6 +702,10 @@ struct fown_struct {
/*
* Track a single file's readahead state
+ *
+ * ================#============|==================#==================|
+ * ^ ^ ^ ^
+ * file_ra_state.la_index .ra_index .lookahead_index .readahead_index
*/
struct file_ra_state {
unsigned long start; /* Current window */
@@ -711,6 +715,12 @@ struct file_ra_state {
unsigned long prev_index; /* Cache last read() position */
unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size;
+
+ pgoff_t la_index; /* enqueue time */
+ pgoff_t ra_index; /* begin offset */
+ pgoff_t lookahead_index; /* time to do next readahead */
+ pgoff_t readahead_index; /* end offset */
+
unsigned long ra_pages; /* Maximum readahead window */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
@@ -719,6 +729,60 @@ struct file_ra_state {
#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */
#define RA_FLAG_INCACHE 0x02 /* file is already in cache */
+/*
+ * Measuring read-ahead sizes.
+ *
+ * |----------- readahead size ------------>|
+ * ===#============|==================#=====================|
+ * |------- invoke interval ------>|-- lookahead size -->|
+ */
+static inline unsigned long ra_readahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->ra_index;
+}
+
+static inline unsigned long ra_lookahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->lookahead_index;
+}
+
+static inline unsigned long ra_invoke_interval(struct file_ra_state *ra)
+{
+ return ra->lookahead_index - ra->la_index;
+}
+
+/*
+ * Check if @index falls in the readahead windows.
+ */
+static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+ return (index >= ra->la_index &&
+ index < ra->readahead_index);
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static inline void ra_set_index(struct file_ra_state *ra,
+ pgoff_t la_index, pgoff_t ra_index)
+{
+ ra->la_index = la_index;
+ ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static inline void ra_set_size(struct file_ra_state *ra,
+ unsigned long ra_size, unsigned long la_size)
+{
+ ra->readahead_index = ra->ra_index + ra_size;
+ ra->lookahead_index = ra->ra_index + ra_size - la_size;
+}
+
+unsigned long ra_submit(struct file_ra_state *ra,
+ struct address_space *mapping, struct file *filp);
+
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
--- linux-2.6.22-rc1-mm1.orig/mm/readahead.c
+++ linux-2.6.22-rc1-mm1/mm/readahead.c
@@ -592,3 +592,22 @@ unsigned long max_sane_readahead(unsigne
return min(nr, (node_page_state(numa_node_id(), NR_INACTIVE)
+ node_page_state(numa_node_id(), NR_FREE_PAGES)) / 2);
}
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+unsigned long ra_submit(struct file_ra_state *ra,
+ struct address_space *mapping, struct file *filp)
+{
+ unsigned long ra_size;
+ unsigned long la_size;
+ int actual;
+
+ ra_size = ra_readahead_size(ra);
+ la_size = ra_lookahead_size(ra);
+ actual = __do_page_cache_readahead(mapping, filp,
+ ra->ra_index, ra_size, la_size);
+
+ return actual;
+}
+EXPORT_SYMBOL_GPL(ra_submit);
--
On Thu, 2007-05-17 at 06:47 +0800, Fengguang Wu wrote:
> /*
> * Track a single file's readahead state
> + *
> + * ================#============|==================#==================|
> + * ^ ^ ^ ^
> + * file_ra_state.la_index .ra_index .lookahead_index .readahead_index
> */
> struct file_ra_state {
> unsigned long start; /* Current window */
> @@ -711,6 +715,12 @@ struct file_ra_state {
> unsigned long prev_index; /* Cache last read() position */
> unsigned long ahead_start; /* Ahead window */
> unsigned long ahead_size;
> +
> + pgoff_t la_index; /* enqueue time */
> + pgoff_t ra_index; /* begin offset */
> + pgoff_t lookahead_index; /* time to do next readahead */
> + pgoff_t readahead_index; /* end offset */
> +
Hi Fengguang,
I found these variables a little confusing. la_index is the last offset
passed to ondemand_readahead, so perhaps "last_request_start" is a
better name? The comment "enqueue time" seems strange, too.
ra_index seems ok, although "readahead_start" might be better. Perhaps
readahead_index should be expressed as readahead_size, which is how it
seems to be used. Perhaps "lookahead_index" is best expressed as a
buffer at the end of the readahead zone (readahead_min?).
ie:
pgoff_t last_request_start; /* start of req which triggered readahead */
pgoff_t readahead_start; /* Where readahead started */
pgoff_t readahead_size; /* PAGE_CACHE_SIZE units of readahead */
pgoff_t readahead_min; /* readahead_size left before we recalc */
This gets rid of many of the accessors, I think, and avoids introducing
a new term to understand (lookahead).
> +/*
> + * Where is the old read-ahead and look-ahead?
> + */
> +static inline void ra_set_index(struct file_ra_state *ra,
> + pgoff_t la_index, pgoff_t ra_index)
> +{
> + ra->la_index = la_index;
> + ra->ra_index = ra_index;
> +}
> +
> +/*
> + * Where is the new read-ahead and look-ahead?
> + */
> +static inline void ra_set_size(struct file_ra_state *ra,
> + unsigned long ra_size, unsigned long la_size)
> +{
> + ra->readahead_index = ra->ra_index + ra_size;
> + ra->lookahead_index = ra->ra_index + ra_size - la_size;
> +}
These are only called in one place, so I think it's clearer to do this
there directly. But I see you exported ra_submit, too, even though it's
only used in the same file. Are there plans for other users?
Thanks,
Rusty.
Hi Rusty,
On Tue, Jun 12, 2007 at 01:30:50PM +1000, Rusty Russell wrote:
> On Thu, 2007-05-17 at 06:47 +0800, Fengguang Wu wrote:
> > /*
> > * Track a single file's readahead state
> > + *
> > + * ================#============|==================#==================|
> > + * ^ ^ ^ ^
> > + * file_ra_state.la_index .ra_index .lookahead_index .readahead_index
> > */
> > struct file_ra_state {
> > unsigned long start; /* Current window */
> > @@ -711,6 +715,12 @@ struct file_ra_state {
> > unsigned long prev_index; /* Cache last read() position */
> > unsigned long ahead_start; /* Ahead window */
> > unsigned long ahead_size;
> > +
> > + pgoff_t la_index; /* enqueue time */
> > + pgoff_t ra_index; /* begin offset */
> > + pgoff_t lookahead_index; /* time to do next readahead */
> > + pgoff_t readahead_index; /* end offset */
> > +
>
> I found these variables a little confusing. la_index is the last offset
> passed to ondemand_readahead, so perhaps "last_request_start" is a
> better name? The comment "enqueue time" seems strange, too.
Yes, they are a bit confusing. Sorry for the bad naming and comments!
The precise meanings can be:
la_index - the time (where we are reading) when the readahead window is established
ra_index - where the readahead window starts
lookahead_index - the time (on reading of which) to push forward the readahead window
readahead_index - where the readahead window ends, or
where the next readahead should start with
In normal case, when the readahead window is pushed forward, the
following holds:
la_index = lookahead_index; lookahead_index = new-value;
ra_index = readahead_index; readahead_index = new-value;
> ra_index seems ok, although "readahead_start" might be better. Perhaps
> readahead_index should be expressed as readahead_size, which is how it
> seems to be used. Perhaps "lookahead_index" is best expressed as a
> buffer at the end of the readahead zone (readahead_min?).
>
> ie:
> pgoff_t last_request_start; /* start of req which triggered readahead */
> pgoff_t readahead_start; /* Where readahead started */
> pgoff_t readahead_size; /* PAGE_CACHE_SIZE units of readahead */
> pgoff_t readahead_min; /* readahead_size left before we recalc */
>
> This gets rid of many of the accessors, I think, and avoids introducing
> a new term to understand (lookahead).
Both indexes and sizes will be used in the code. So calculates may
always be necessary somewhere. If there's a good naming scheme, either
form(index/size based) is OK to me :)
In fact, there are two kind of windows and one buffer:
|---------- readahead window ----------->|
===#============|==================#=====================|
|--- reader walking window ---->|--- async buffer --->|
'lookahead' is not a standard term, while 'readahead_min' may be
confusing for some people? Anyway, I'd like to propose two more
possible schemes:
pgoff_t ahead_start; /* readahead window */
pgoff_t ahead_end;
pgoff_t reader_start; /* on read of which the ahead window was established */
pgoff_t reader_end; /* on read of which the ahead window will be pushed forward */
or preferably:
pgoff_t start; /* where readahead started */
unsigned long size; /* # of readahead pages */
unsigned long async_size; /* do asynchronous readahead when there are only # of pages ahead */
unsigned long async_size_old; /* TODO: this one is not needed for now */
Any opinions? Thanks.
> > +/*
> > + * Where is the old read-ahead and look-ahead?
> > + */
> > +static inline void ra_set_index(struct file_ra_state *ra,
> > + pgoff_t la_index, pgoff_t ra_index)
> > +{
> > + ra->la_index = la_index;
> > + ra->ra_index = ra_index;
> > +}
> > +
> > +/*
> > + * Where is the new read-ahead and look-ahead?
> > + */
> > +static inline void ra_set_size(struct file_ra_state *ra,
> > + unsigned long ra_size, unsigned long la_size)
> > +{
> > + ra->readahead_index = ra->ra_index + ra_size;
> > + ra->lookahead_index = ra->ra_index + ra_size - la_size;
> > +}
>
> These are only called in one place, so I think it's clearer to do this
> there directly. But I see you exported ra_submit, too, even though it's
> only used in the same file. Are there plans for other users?
Yes, if we are to re-introduce the adaptive readahead, the functions
will be reused.
Thank you,
Fengguang
On Tue, 2007-06-12 at 20:07 +0800, Fengguang Wu wrote:
> Hi Rusty,
Hi Fengguang,
> or preferably:
>
> pgoff_t start; /* where readahead started */
> unsigned long size; /* # of readahead pages */
> unsigned long async_size; /* do asynchronous readahead when there are only # of pages ahead */
>
> unsigned long async_size_old; /* TODO: this one is not needed for now */
>
> Any opinions? Thanks.
These names and comments are really nice. I think the code will become
more readable after this, too.
Did you want me to try to make this patch, or did you want to do it?
Thanks,
Rusty.
Hi Rusty,
On Wed, Jun 13, 2007 at 10:27:19AM +1000, Rusty Russell wrote:
> On Tue, 2007-06-12 at 20:07 +0800, Fengguang Wu wrote:
> > or preferably:
> >
> > pgoff_t start; /* where readahead started */
> > unsigned long size; /* # of readahead pages */
> > unsigned long async_size; /* do asynchronous readahead when there are only # of pages ahead */
> >
> > unsigned long async_size_old; /* TODO: this one is not needed for now */
> >
> > Any opinions? Thanks.
>
> These names and comments are really nice. I think the code will become
> more readable after this, too.
Thank you!
> Did you want me to try to make this patch, or did you want to do it?
I'll make it, after your patch, hehe.
Fengguang