2015-10-15 06:31:25

by NeilBrown

[permalink] [raw]
Subject: [PATCH/RFC] sunrpc/cache: make cache flushing more reliable.


The caches used to store sunrpc authentication information can be
flushed by writing a timestamp to a file in /proc.

This timestamp has a one-second resolution and any entry in cache that
was last_refreshed *before* that time is treated as expired.

This is problematic as it is not possible to reliably flush the cache
without interrupting NFS service.
If the current time is written to the "flush" file, any entry that was
added since the current second started will still be treated as valid.
If one second beyond than the current time is written to the file
then no entries can be valid until the second ticks over. This will
mean that no NFS request will be handled for up to 1 second.

To resolve this issue we make two changes:

1/ treat an entry as expired if the timestamp when it was last_refreshed
is before *or the same as* the expiry time. This means that current
code which writes out the current time will now flush the cache
reliably.

2/ when a new entry in added to the cache - set the last_refresh timestamp
to 1 second *beyond* the current flush time, when that not in the
past.
This ensures that newly added entries will always be valid.


Now that we have a very reliable way to flush the cache, and also
since we are using "since-boot" timestamps which are monotonic,
change cache_purge() to set the smallest future flush_time which
will work, and leave it there: don't revert to '1'.

Also disable the setting of the 'flush_time' far into the future.
That has never been useful and is now awkward as it would cause
last_refresh times to be strange.
Finally: if a request is made to set the 'flush_time' to the current
second, assume the intent is to flush the cache and advance it, if
necessary, to 1 second beyond the current 'flush_time' so that all
active entries will be deemed to be expired.

As part of this we need to add a 'cache_detail' arg to cache_init()
and cache_fresh_locked() so they can find the current ->flush_time.

Signed-off-by: NeilBrown <[email protected]>
Reported-by: Olaf Kirch <[email protected]>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03d3b4c92d9f..ed03c9f7f908 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -48,8 +48,10 @@
struct cache_head {
struct hlist_node cache_list;
time_t expiry_time; /* After time time, don't use the data */
- time_t last_refresh; /* If CACHE_PENDING, this is when upcall
- * was sent, else this is when update was received
+ time_t last_refresh; /* If CACHE_PENDING, this is when upcall was
+ * sent, else this is when update was
+ * received, though it is alway set to
+ * be *after* ->flush_time.
*/
struct kref ref;
unsigned long flags;
@@ -105,8 +107,12 @@ struct cache_detail {
/* fields below this comment are for internal use
* and should not be touched by cache owners
*/
- time_t flush_time; /* flush all cache items with last_refresh
- * earlier than this */
+ time_t flush_time; /* flush all cache items with
+ * last_refresh at or earlier
+ * than this. last_refresh
+ * is never set at or earlier
+ * than this.
+ */
struct list_head others;
time_t nextcheck;
int entries;
@@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{
return (h->expiry_time < seconds_since_boot()) ||
- (detail->flush_time > h->last_refresh);
+ (detail->flush_time >= h->last_refresh);
}

extern int cache_check(struct cache_detail *detail,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..d220cc36ed2f 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);

-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
{
time_t now = seconds_since_boot();
INIT_HLIST_NODE(&h->cache_list);
h->flags = 0;
kref_init(&h->ref);
h->expiry_time = now + CACHE_NEW_EXPIRY;
+ if (now <= detail->flush_time)
+ /* ensure it isn't already expired */
+ now = detail->flush_time + 1;
h->last_refresh = now;
}

@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
* we might get lose if we need to
* cache_put it soon.
*/
- cache_init(new);
+ cache_init(new, detail);
detail->init(new, key);

write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);

static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);

-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail)
{
+ time_t now = seconds_since_boot();
+ if (now <= detail->flush_time)
+ /* ensure it isn't immediately treated as expired */
+ now = detail->flush_time + 1;
head->expiry_time = expiry;
- head->last_refresh = seconds_since_boot();
+ head->last_refresh = now;
smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
set_bit(CACHE_VALID, &head->flags);
}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
- cache_fresh_locked(old, new->expiry_time);
+ cache_fresh_locked(old, new->expiry_time, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail);
return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_put(old, detail);
return NULL;
}
- cache_init(tmp);
+ cache_init(tmp, detail);
detail->init(tmp, old);

write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
detail->entries++;
cache_get(tmp);
- cache_fresh_locked(tmp, new->expiry_time);
- cache_fresh_locked(old, 0);
+ cache_fresh_locked(tmp, new->expiry_time, detail);
+ cache_fresh_locked(old, 0, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
rv = cache_is_valid(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
- cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+ cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+ detail);
rv = -ENOENT;
}
write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);

void cache_purge(struct cache_detail *detail)
{
- detail->flush_time = LONG_MAX;
+ time_t now = seconds_since_boot();
+ if (detail->flush_time >= now)
+ now = detail->flush_time + 1;
+ /* 'now' is the maximum value any 'last_refresh' can have */
+ detail->flush_time = now + 1;
detail->nextcheck = seconds_since_boot();
cache_flush();
- detail->flush_time = 1;
}
EXPORT_SYMBOL_GPL(cache_purge);

@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
{
char tbuf[20];
char *bp, *ep;
+ time_t then, now;

if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
return -EINVAL;

bp = tbuf;
- cd->flush_time = get_expiry(&bp);
- cd->nextcheck = seconds_since_boot();
+ then = get_expiry(&bp);
+ now = seconds_since_boot();
+ cd->nextcheck = now;
+ /* Can only set flush_time to 1 second beyond "now", or
+ * possibly 1 second beyond flushtime. This is because
+ * flush_time never goes backwards so it mustn't get too far
+ * ahead of time.
+ */
+ if (then >= now) {
+ /* Want to flush everything, so behave like cache_purge() */
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+ then = now + 1;
+ }
+
+ cd->flush_time = then;
cache_flush();

*ppos += count;


Attachments:
signature.asc (818.00 B)

2015-10-15 06:39:54

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH/RFC] sunrpc/cache: make cache flushing more reliable.


Sorry.. I didn't refresh before posting....

> @@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
>
> void cache_purge(struct cache_detail *detail)
> {
> - detail->flush_time = LONG_MAX;
> + time_t now = seconds_since_boot();
> + if (detail->flush_time >= now)
> + now = detail->flush_time + 1;
> + /* 'now' is the maximum value any 'last_refresh' can have */
> + detail->flush_time = now + 1;

This should not have the '+ 1'.

> detail->nextcheck = seconds_since_boot();
> cache_flush();
> - detail->flush_time = 1;
> }
> EXPORT_SYMBOL_GPL(cache_purge);
>
> @@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
> return -EINVAL;
>
> bp = tbuf;
> - cd->flush_time = get_expiry(&bp);
> - cd->nextcheck = seconds_since_boot();
> + then = get_expiry(&bp);
> + now = seconds_since_boot();
> + cd->nextcheck = now;
> + /* Can only set flush_time to 1 second beyond "now", or
> + * possibly 1 second beyond flushtime. This is because
> + * flush_time never goes backwards so it mustn't get too far
> + * ahead of time.
> + */
> + if (then >= now) {
> + /* Want to flush everything, so behave like cache_purge() */
> + if (cd->flush_time >= now)
> + now = cd->flush_time + 1;
> + then = now + 1;

Nor should.

I'll resend it people are otherwise happy.

NeilBrown


Attachments:
signature.asc (818.00 B)

2015-10-15 13:50:39

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH/RFC] sunrpc/cache: make cache flushing more reliable.

On Thu, Oct 15, 2015 at 05:39:47PM +1100, Neil Brown wrote:
>
> Sorry.. I didn't refresh before posting....
>
> > @@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
> >
> > void cache_purge(struct cache_detail *detail)
> > {
> > - detail->flush_time = LONG_MAX;
> > + time_t now = seconds_since_boot();
> > + if (detail->flush_time >= now)
> > + now = detail->flush_time + 1;
> > + /* 'now' is the maximum value any 'last_refresh' can have */
> > + detail->flush_time = now + 1;
>
> This should not have the '+ 1'.
>
> > detail->nextcheck = seconds_since_boot();
> > cache_flush();
> > - detail->flush_time = 1;
> > }
> > EXPORT_SYMBOL_GPL(cache_purge);
> >
> > @@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
> > return -EINVAL;
> >
> > bp = tbuf;
> > - cd->flush_time = get_expiry(&bp);
> > - cd->nextcheck = seconds_since_boot();
> > + then = get_expiry(&bp);
> > + now = seconds_since_boot();
> > + cd->nextcheck = now;
> > + /* Can only set flush_time to 1 second beyond "now", or
> > + * possibly 1 second beyond flushtime. This is because
> > + * flush_time never goes backwards so it mustn't get too far
> > + * ahead of time.
> > + */
> > + if (then >= now) {
> > + /* Want to flush everything, so behave like cache_purge() */
> > + if (cd->flush_time >= now)
> > + now = cd->flush_time + 1;
> > + then = now + 1;
>
> Nor should.
>
> I'll resend it people are otherwise happy.

I'm otherwise happy! Looks like a reasonable approach.

(The one thing I wonder is whether it would be clearer to outright fail
writes that attempt to create already-expired cache entries.

But I think that's an unimportant corner case really. And if there are
existing cases where that's happening then perhaps it's less disruptive
just to let them expire a second later rathern than to introduce a new
error.)

--b.

2015-10-15 21:57:31

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH/RFC] sunrpc/cache: make cache flushing more reliable.

"J. Bruce Fields" <[email protected]> writes:

>>
>> I'll resend it people are otherwise happy.
>
> I'm otherwise happy! Looks like a reasonable approach.
>
> (The one thing I wonder is whether it would be clearer to outright fail
> writes that attempt to create already-expired cache entries.
>
> But I think that's an unimportant corner case really. And if there are
> existing cases where that's happening then perhaps it's less disruptive
> just to let them expire a second later rathern than to introduce a new
> error.)

I hadn't thought about. There is often a good case for failing a
meaningless value rather than silently doing something different.
Were I creating a new interface, I would probably do that.
But revising an old interface that doesn't currently return an error...
I agree with you that it is likely less disruptive to not introduce a
new error. Thanks for asking the question.

I'll post the proper patch separately.

Thanks,
NeilBrown


Attachments:
signature.asc (818.00 B)

2015-10-15 21:59:16

by NeilBrown

[permalink] [raw]
Subject: [PATCH v2] sunrpc/cache: make cache flushing more reliable.


The caches used to store sunrpc authentication information can be
flushed by writing a timestamp to a file in /proc.

This timestamp has a one-second resolution and any entry in cache that
was last_refreshed *before* that time is treated as expired.

This is problematic as it is not possible to reliably flush the cache
without interrupting NFS service.
If the current time is written to the "flush" file, any entry that was
added since the current second started will still be treated as valid.
If one second beyond than the current time is written to the file
then no entries can be valid until the second ticks over. This will
mean that no NFS request will be handled for up to 1 second.

To resolve this issue we make two changes:

1/ treat an entry as expired if the timestamp when it was last_refreshed
is before *or the same as* the expiry time. This means that current
code which writes out the current time will now flush the cache
reliably.

2/ when a new entry in added to the cache - set the last_refresh timestamp
to 1 second *beyond* the current flush time, when that not in the
past.
This ensures that newly added entries will always be valid.


Now that we have a very reliable way to flush the cache, and also
since we are using "since-boot" timestamps which are monotonic,
change cache_purge() to set the smallest future flush_time which
will work, and leave it there: don't revert to '1'.

Also disable the setting of the 'flush_time' far into the future.
That has never been useful and is now awkward as it would cause
last_refresh times to be strange.
Finally: if a request is made to set the 'flush_time' to the current
second, assume the intent is to flush the cache and advance it, if
necessary, to 1 second beyond the current 'flush_time' so that all
active entries will be deemed to be expired.

As part of this we need to add a 'cache_detail' arg to cache_init()
and cache_fresh_locked() so they can find the current ->flush_time.

Signed-off-by: NeilBrown <[email protected]>
Reported-by: Olaf Kirch <[email protected]>

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 03d3b4c92d9f..ed03c9f7f908 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -48,8 +48,10 @@
struct cache_head {
struct hlist_node cache_list;
time_t expiry_time; /* After time time, don't use the data */
- time_t last_refresh; /* If CACHE_PENDING, this is when upcall
- * was sent, else this is when update was received
+ time_t last_refresh; /* If CACHE_PENDING, this is when upcall was
+ * sent, else this is when update was
+ * received, though it is alway set to
+ * be *after* ->flush_time.
*/
struct kref ref;
unsigned long flags;
@@ -105,8 +107,12 @@ struct cache_detail {
/* fields below this comment are for internal use
* and should not be touched by cache owners
*/
- time_t flush_time; /* flush all cache items with last_refresh
- * earlier than this */
+ time_t flush_time; /* flush all cache items with
+ * last_refresh at or earlier
+ * than this. last_refresh
+ * is never set at or earlier
+ * than this.
+ */
struct list_head others;
time_t nextcheck;
int entries;
@@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
{
return (h->expiry_time < seconds_since_boot()) ||
- (detail->flush_time > h->last_refresh);
+ (detail->flush_time >= h->last_refresh);
}

extern int cache_check(struct cache_detail *detail,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
static void cache_revisit_request(struct cache_head *item);

-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
{
time_t now = seconds_since_boot();
INIT_HLIST_NODE(&h->cache_list);
h->flags = 0;
kref_init(&h->ref);
h->expiry_time = now + CACHE_NEW_EXPIRY;
+ if (now <= detail->flush_time)
+ /* ensure it isn't already expired */
+ now = detail->flush_time + 1;
h->last_refresh = now;
}

@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
* we might get lose if we need to
* cache_put it soon.
*/
- cache_init(new);
+ cache_init(new, detail);
detail->init(new, key);

write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);

static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);

-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+ struct cache_detail *detail)
{
+ time_t now = seconds_since_boot();
+ if (now <= detail->flush_time)
+ /* ensure it isn't immediately treated as expired */
+ now = detail->flush_time + 1;
head->expiry_time = expiry;
- head->last_refresh = seconds_since_boot();
+ head->last_refresh = now;
smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
set_bit(CACHE_VALID, &head->flags);
}
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
set_bit(CACHE_NEGATIVE, &old->flags);
else
detail->update(old, new);
- cache_fresh_locked(old, new->expiry_time);
+ cache_fresh_locked(old, new->expiry_time, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(old, detail);
return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
cache_put(old, detail);
return NULL;
}
- cache_init(tmp);
+ cache_init(tmp, detail);
detail->init(tmp, old);

write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
detail->entries++;
cache_get(tmp);
- cache_fresh_locked(tmp, new->expiry_time);
- cache_fresh_locked(old, 0);
+ cache_fresh_locked(tmp, new->expiry_time, detail);
+ cache_fresh_locked(old, 0, detail);
write_unlock(&detail->hash_lock);
cache_fresh_unlocked(tmp, detail);
cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
rv = cache_is_valid(h);
if (rv == -EAGAIN) {
set_bit(CACHE_NEGATIVE, &h->flags);
- cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+ cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+ detail);
rv = -ENOENT;
}
write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);

void cache_purge(struct cache_detail *detail)
{
- detail->flush_time = LONG_MAX;
+ time_t now = seconds_since_boot();
+ if (detail->flush_time >= now)
+ now = detail->flush_time + 1;
+ /* 'now' is the maximum value any 'last_refresh' can have */
+ detail->flush_time = now;
detail->nextcheck = seconds_since_boot();
cache_flush();
- detail->flush_time = 1;
}
EXPORT_SYMBOL_GPL(cache_purge);

@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
{
char tbuf[20];
char *bp, *ep;
+ time_t then, now;

if (*ppos || count > sizeof(tbuf)-1)
return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
return -EINVAL;

bp = tbuf;
- cd->flush_time = get_expiry(&bp);
- cd->nextcheck = seconds_since_boot();
+ then = get_expiry(&bp);
+ now = seconds_since_boot();
+ cd->nextcheck = now;
+ /* Can only set flush_time to 1 second beyond "now", or
+ * possibly 1 second beyond flushtime. This is because
+ * flush_time never goes backwards so it mustn't get too far
+ * ahead of time.
+ */
+ if (then >= now) {
+ /* Want to flush everything, so behave like cache_purge() */
+ if (cd->flush_time >= now)
+ now = cd->flush_time + 1;
+ then = now;
+ }
+
+ cd->flush_time = then;
cache_flush();

*ppos += count;


Attachments:
signature.asc (818.00 B)

2015-10-22 19:14:35

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] sunrpc/cache: make cache flushing more reliable.

Thanks, applying.--b.

On Fri, Oct 16, 2015 at 08:59:08AM +1100, Neil Brown wrote:
>
> The caches used to store sunrpc authentication information can be
> flushed by writing a timestamp to a file in /proc.
>
> This timestamp has a one-second resolution and any entry in cache that
> was last_refreshed *before* that time is treated as expired.
>
> This is problematic as it is not possible to reliably flush the cache
> without interrupting NFS service.
> If the current time is written to the "flush" file, any entry that was
> added since the current second started will still be treated as valid.
> If one second beyond than the current time is written to the file
> then no entries can be valid until the second ticks over. This will
> mean that no NFS request will be handled for up to 1 second.
>
> To resolve this issue we make two changes:
>
> 1/ treat an entry as expired if the timestamp when it was last_refreshed
> is before *or the same as* the expiry time. This means that current
> code which writes out the current time will now flush the cache
> reliably.
>
> 2/ when a new entry in added to the cache - set the last_refresh timestamp
> to 1 second *beyond* the current flush time, when that not in the
> past.
> This ensures that newly added entries will always be valid.
>
>
> Now that we have a very reliable way to flush the cache, and also
> since we are using "since-boot" timestamps which are monotonic,
> change cache_purge() to set the smallest future flush_time which
> will work, and leave it there: don't revert to '1'.
>
> Also disable the setting of the 'flush_time' far into the future.
> That has never been useful and is now awkward as it would cause
> last_refresh times to be strange.
> Finally: if a request is made to set the 'flush_time' to the current
> second, assume the intent is to flush the cache and advance it, if
> necessary, to 1 second beyond the current 'flush_time' so that all
> active entries will be deemed to be expired.
>
> As part of this we need to add a 'cache_detail' arg to cache_init()
> and cache_fresh_locked() so they can find the current ->flush_time.
>
> Signed-off-by: NeilBrown <[email protected]>
> Reported-by: Olaf Kirch <[email protected]>
>
> diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
> index 03d3b4c92d9f..ed03c9f7f908 100644
> --- a/include/linux/sunrpc/cache.h
> +++ b/include/linux/sunrpc/cache.h
> @@ -48,8 +48,10 @@
> struct cache_head {
> struct hlist_node cache_list;
> time_t expiry_time; /* After time time, don't use the data */
> - time_t last_refresh; /* If CACHE_PENDING, this is when upcall
> - * was sent, else this is when update was received
> + time_t last_refresh; /* If CACHE_PENDING, this is when upcall was
> + * sent, else this is when update was
> + * received, though it is alway set to
> + * be *after* ->flush_time.
> */
> struct kref ref;
> unsigned long flags;
> @@ -105,8 +107,12 @@ struct cache_detail {
> /* fields below this comment are for internal use
> * and should not be touched by cache owners
> */
> - time_t flush_time; /* flush all cache items with last_refresh
> - * earlier than this */
> + time_t flush_time; /* flush all cache items with
> + * last_refresh at or earlier
> + * than this. last_refresh
> + * is never set at or earlier
> + * than this.
> + */
> struct list_head others;
> time_t nextcheck;
> int entries;
> @@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
> static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
> {
> return (h->expiry_time < seconds_since_boot()) ||
> - (detail->flush_time > h->last_refresh);
> + (detail->flush_time >= h->last_refresh);
> }
>
> extern int cache_check(struct cache_detail *detail,
> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> index 4a2340a54401..5e4f815c2b34 100644
> --- a/net/sunrpc/cache.c
> +++ b/net/sunrpc/cache.c
> @@ -41,13 +41,16 @@
> static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
> static void cache_revisit_request(struct cache_head *item);
>
> -static void cache_init(struct cache_head *h)
> +static void cache_init(struct cache_head *h, struct cache_detail *detail)
> {
> time_t now = seconds_since_boot();
> INIT_HLIST_NODE(&h->cache_list);
> h->flags = 0;
> kref_init(&h->ref);
> h->expiry_time = now + CACHE_NEW_EXPIRY;
> + if (now <= detail->flush_time)
> + /* ensure it isn't already expired */
> + now = detail->flush_time + 1;
> h->last_refresh = now;
> }
>
> @@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
> * we might get lose if we need to
> * cache_put it soon.
> */
> - cache_init(new);
> + cache_init(new, detail);
> detail->init(new, key);
>
> write_lock(&detail->hash_lock);
> @@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
>
> static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
>
> -static void cache_fresh_locked(struct cache_head *head, time_t expiry)
> +static void cache_fresh_locked(struct cache_head *head, time_t expiry,
> + struct cache_detail *detail)
> {
> + time_t now = seconds_since_boot();
> + if (now <= detail->flush_time)
> + /* ensure it isn't immediately treated as expired */
> + now = detail->flush_time + 1;
> head->expiry_time = expiry;
> - head->last_refresh = seconds_since_boot();
> + head->last_refresh = now;
> smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
> set_bit(CACHE_VALID, &head->flags);
> }
> @@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
> set_bit(CACHE_NEGATIVE, &old->flags);
> else
> detail->update(old, new);
> - cache_fresh_locked(old, new->expiry_time);
> + cache_fresh_locked(old, new->expiry_time, detail);
> write_unlock(&detail->hash_lock);
> cache_fresh_unlocked(old, detail);
> return old;
> @@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
> cache_put(old, detail);
> return NULL;
> }
> - cache_init(tmp);
> + cache_init(tmp, detail);
> detail->init(tmp, old);
>
> write_lock(&detail->hash_lock);
> @@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
> hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
> detail->entries++;
> cache_get(tmp);
> - cache_fresh_locked(tmp, new->expiry_time);
> - cache_fresh_locked(old, 0);
> + cache_fresh_locked(tmp, new->expiry_time, detail);
> + cache_fresh_locked(old, 0, detail);
> write_unlock(&detail->hash_lock);
> cache_fresh_unlocked(tmp, detail);
> cache_fresh_unlocked(old, detail);
> @@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
> rv = cache_is_valid(h);
> if (rv == -EAGAIN) {
> set_bit(CACHE_NEGATIVE, &h->flags);
> - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
> + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
> + detail);
> rv = -ENOENT;
> }
> write_unlock(&detail->hash_lock);
> @@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
>
> void cache_purge(struct cache_detail *detail)
> {
> - detail->flush_time = LONG_MAX;
> + time_t now = seconds_since_boot();
> + if (detail->flush_time >= now)
> + now = detail->flush_time + 1;
> + /* 'now' is the maximum value any 'last_refresh' can have */
> + detail->flush_time = now;
> detail->nextcheck = seconds_since_boot();
> cache_flush();
> - detail->flush_time = 1;
> }
> EXPORT_SYMBOL_GPL(cache_purge);
>
> @@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
> {
> char tbuf[20];
> char *bp, *ep;
> + time_t then, now;
>
> if (*ppos || count > sizeof(tbuf)-1)
> return -EINVAL;
> @@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
> return -EINVAL;
>
> bp = tbuf;
> - cd->flush_time = get_expiry(&bp);
> - cd->nextcheck = seconds_since_boot();
> + then = get_expiry(&bp);
> + now = seconds_since_boot();
> + cd->nextcheck = now;
> + /* Can only set flush_time to 1 second beyond "now", or
> + * possibly 1 second beyond flushtime. This is because
> + * flush_time never goes backwards so it mustn't get too far
> + * ahead of time.
> + */
> + if (then >= now) {
> + /* Want to flush everything, so behave like cache_purge() */
> + if (cd->flush_time >= now)
> + now = cd->flush_time + 1;
> + then = now;
> + }
> +
> + cd->flush_time = then;
> cache_flush();
>
> *ppos += count;