Define some helpers on struct file_ra_state.
Signed-off-by: Wu Fengguang <[email protected]>
---
mm/readahead.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 186 insertions(+), 2 deletions(-)
--- linux-2.6.17-rc4-mm3.orig/mm/readahead.c
+++ linux-2.6.17-rc4-mm3/mm/readahead.c
@@ -854,6 +854,190 @@ static unsigned long node_readahead_agin
}
/*
+ * Some helpers for querying/building a read-ahead request.
+ *
+ * Diagram for some variable names used frequently:
+ *
+ * |<------- la_size ------>|
+ * +-----------------------------------------+
+ * | # |
+ * +-----------------------------------------+
+ * ra_index -->|<---------------- ra_size -------------->|
+ *
+ */
+
+static enum ra_class ra_class_new(struct file_ra_state *ra)
+{
+ return ra->flags & RA_CLASS_MASK;
+}
+
+static inline enum ra_class ra_class_old(struct file_ra_state *ra)
+{
+ return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+}
+
+static unsigned long ra_readahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->ra_index;
+}
+
+static unsigned long ra_lookahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->lookahead_index;
+}
+
+static unsigned long ra_invoke_interval(struct file_ra_state *ra)
+{
+ return ra->lookahead_index - ra->la_index;
+}
+
+/*
+ * The 64bit cache_hits stores three accumulated values and a counter value.
+ * MSB LSB
+ * 3333333333333333 : 2222222222222222 : 1111111111111111 : 0000000000000000
+ */
+static int ra_cache_hit(struct file_ra_state *ra, int nr)
+{
+ return (ra->cache_hits >> (nr * 16)) & 0xFFFF;
+}
+
+/*
+ * Conceptual code:
+ * ra_cache_hit(ra, 1) += ra_cache_hit(ra, 0);
+ * ra_cache_hit(ra, 0) = 0;
+ */
+static void ra_addup_cache_hit(struct file_ra_state *ra)
+{
+ int n;
+
+ n = ra_cache_hit(ra, 0);
+ ra->cache_hits -= n;
+ n <<= 16;
+ ra->cache_hits += n;
+}
+
+/*
+ * The read-ahead is deemed success if cache-hit-rate >= 1/readahead_hit_rate.
+ */
+static int ra_cache_hit_ok(struct file_ra_state *ra)
+{
+ return ra_cache_hit(ra, 0) * readahead_hit_rate >=
+ (ra->lookahead_index - ra->la_index);
+}
+
+/*
+ * Check if @index falls in the @ra request.
+ */
+static int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+ if (index < ra->la_index || index >= ra->readahead_index)
+ return 0;
+
+ if (index >= ra->ra_index)
+ return 1;
+ else
+ return -1;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static void ra_set_class(struct file_ra_state *ra,
+ enum ra_class ra_class)
+{
+ unsigned long flags_mask;
+ unsigned long flags;
+ unsigned long old_ra_class;
+
+ flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+ flags = ra->flags & flags_mask;
+
+ old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT;
+
+ ra->flags = flags | old_ra_class | ra_class;
+
+ ra_addup_cache_hit(ra);
+ if (ra_class != RA_CLASS_STATE)
+ ra->cache_hits <<= 16;
+
+ ra->age = node_readahead_aging();
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static void ra_set_index(struct file_ra_state *ra,
+ pgoff_t la_index, pgoff_t ra_index)
+{
+ ra->la_index = la_index;
+ ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static void ra_set_size(struct file_ra_state *ra,
+ unsigned long ra_size, unsigned long la_size)
+{
+ /* Disable look-ahead for loopback file. */
+ if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD))
+ la_size = 0;
+
+ ra->readahead_index = ra->ra_index + ra_size;
+ ra->lookahead_index = ra->readahead_index - la_size;
+}
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static int ra_dispatch(struct file_ra_state *ra,
+ struct address_space *mapping, struct file *filp)
+{
+ enum ra_class ra_class = ra_class_new(ra);
+ unsigned long ra_size = ra_readahead_size(ra);
+ unsigned long la_size = ra_lookahead_size(ra);
+ pgoff_t eof_index = PAGES_BYTE(i_size_read(mapping->host)) + 1;
+ int actual;
+
+ if (unlikely(ra->ra_index >= eof_index))
+ return 0;
+
+ /* Snap to EOF. */
+ if (ra->readahead_index + ra_size / 2 > eof_index) {
+ if (ra_class == RA_CLASS_CONTEXT_AGGRESSIVE &&
+ eof_index > ra->lookahead_index + 1)
+ la_size = eof_index - ra->lookahead_index;
+ else
+ la_size = 0;
+ ra_size = eof_index - ra->ra_index;
+ ra_set_size(ra, ra_size, la_size);
+ ra->flags |= RA_FLAG_EOF;
+ }
+
+ actual = __do_page_cache_readahead(mapping, filp,
+ ra->ra_index, ra_size, la_size);
+
+#ifdef CONFIG_DEBUG_READAHEAD
+ if (ra->flags & RA_FLAG_MMAP)
+ ra_account(ra, RA_EVENT_READAHEAD_MMAP, actual);
+ if (ra->readahead_index == eof_index)
+ ra_account(ra, RA_EVENT_READAHEAD_EOF, actual);
+ if (la_size)
+ ra_account(ra, RA_EVENT_LOOKAHEAD, la_size);
+ if (ra_size > actual)
+ ra_account(ra, RA_EVENT_IO_CACHE_HIT, ra_size - actual);
+ ra_account(ra, RA_EVENT_READAHEAD, actual);
+
+ dprintk("readahead-%s(ino=%lu, index=%lu, ra=%lu+%lu-%lu) = %d\n",
+ ra_class_name[ra_class],
+ mapping->host->i_ino, ra->la_index,
+ ra->ra_index, ra_size, la_size, actual);
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+ return actual;
+}
+
+/*
* ra_min is mainly determined by the size of cache memory. Reasonable?
*
* Table of concrete numbers for 4KB page size:
@@ -925,10 +1109,10 @@ static void ra_account(struct file_ra_st
return;
if (e == RA_EVENT_READAHEAD_HIT && pages < 0) {
- c = (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+ c = ra_class_old(ra);
pages = -pages;
} else if (ra)
- c = ra->flags & RA_CLASS_MASK;
+ c = ra_class_new(ra);
else
c = RA_CLASS_NONE;
--
Wu Fengguang <[email protected]> wrote:
>
> Define some helpers on struct file_ra_state.
>
> +/*
> + * The 64bit cache_hits stores three accumulated values and a counter value.
> + * MSB LSB
> + * 3333333333333333 : 2222222222222222 : 1111111111111111 : 0000000000000000
> + */
> +static int ra_cache_hit(struct file_ra_state *ra, int nr)
> +{
> + return (ra->cache_hits >> (nr * 16)) & 0xFFFF;
> +}
So... why not use four u16s?
> +/*
> + * Submit IO for the read-ahead request in file_ra_state.
> + */
> +static int ra_dispatch(struct file_ra_state *ra,
> + struct address_space *mapping, struct file *filp)
> +{
> + enum ra_class ra_class = ra_class_new(ra);
> + unsigned long ra_size = ra_readahead_size(ra);
> + unsigned long la_size = ra_lookahead_size(ra);
> + pgoff_t eof_index = PAGES_BYTE(i_size_read(mapping->host)) + 1;
Sigh. I guess one gets used to that PAGES_BYTE thing after a while. If
you're not familiar with it, it obfuscates things.
<hunts around for its definition>
So in fact it's converting a loff_t to a pgoff_t. Why not call it
convert_loff_t_to_pgoff_t()? ;)
Something better, anyway. Something lower-case and an inline-not-a-macro, too.
> + int actual;
> +
> + if (unlikely(ra->ra_index >= eof_index))
> + return 0;
> +
> + /* Snap to EOF. */
> + if (ra->readahead_index + ra_size / 2 > eof_index) {
You've had a bit of a think and you've arrived at a design decision
surrounding the arithmetic in here. It's very very hard to look at this line
of code and to work out why you decided to implement it in this fashion.
The only way to make such code comprehensible (and hence maintainable) is
to fully comment such things.
On Fri, May 26, 2006 at 10:15:36AM -0700, Andrew Morton wrote:
> Wu Fengguang <[email protected]> wrote:
> >
> > Define some helpers on struct file_ra_state.
> >
> > +/*
> > + * The 64bit cache_hits stores three accumulated values and a counter value.
> > + * MSB LSB
> > + * 3333333333333333 : 2222222222222222 : 1111111111111111 : 0000000000000000
> > + */
> > +static int ra_cache_hit(struct file_ra_state *ra, int nr)
> > +{
> > + return (ra->cache_hits >> (nr * 16)) & 0xFFFF;
> > +}
>
> So... why not use four u16s?
Sure, me too, have been thinking about it ;-)
> > +/*
> > + * Submit IO for the read-ahead request in file_ra_state.
> > + */
> > +static int ra_dispatch(struct file_ra_state *ra,
> > + struct address_space *mapping, struct file *filp)
> > +{
> > + enum ra_class ra_class = ra_class_new(ra);
> > + unsigned long ra_size = ra_readahead_size(ra);
> > + unsigned long la_size = ra_lookahead_size(ra);
> > + pgoff_t eof_index = PAGES_BYTE(i_size_read(mapping->host)) + 1;
>
> Sigh. I guess one gets used to that PAGES_BYTE thing after a while. If
> you're not familiar with it, it obfuscates things.
>
> <hunts around for its definition>
>
> So in fact it's converting a loff_t to a pgoff_t. Why not call it
> convert_loff_t_to_pgoff_t()? ;)
>
> Something better, anyway. Something lower-case and an inline-not-a-macro, too.
I'm now using DIV_ROUND_UP(), maybe we can settle with that.
> > + int actual;
> > +
> > + if (unlikely(ra->ra_index >= eof_index))
> > + return 0;
> > +
> > + /* Snap to EOF. */
> > + if (ra->readahead_index + ra_size / 2 > eof_index) {
>
> You've had a bit of a think and you've arrived at a design decision
> surrounding the arithmetic in here. It's very very hard to look at this line
> of code and to work out why you decided to implement it in this fashion.
> The only way to make such code comprehensible (and hence maintainable) is
> to fully comment such things.
Sorry for being a bit lazy.
It is true that some situations are rather tricky, and some
if()/numbers are carefully chosen. I'll continue expanding/detailing
the documentation with future releases. Or would you prefer to add
them as small and distinct patches?
Comments for this one(also rationalized code):
/*
* Snap to EOF, if the request
* - crossed the EOF boundary;
* - is close to EOF(explained below).
*
* Imagine a file sized 18 pages, and we dicided to read-ahead the
* first 16 pages. It is highly possible that in the near future we
* will have to do another read-ahead for the remaining 2 pages,
* which is an unfavorable small I/O.
*
* So we prefer to take a bit risk to enlarge the current read-ahead,
* to eliminate possible future small I/O.
*/
if (ra->readahead_index + ra_readahead_size(ra)/4 > eof_index) {
ra->readahead_index = eof_index;
if (ra->lookahead_index > eof_index)
ra->lookahead_index = eof_index;
ra->flags |= RA_FLAG_EOF;
}
Wu