This should fix for soft lockup bugs triggered on slow systems.
Signed-off-by: Phil Sutter <[email protected]>
---
lib/test_rhashtable.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 8c1ad1c..63654e3 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -236,6 +236,8 @@ static int thread_lookup_test(struct thread_data *tdata)
obj->value, key);
err++;
}
+
+ cond_resched();
}
return err;
}
@@ -251,6 +253,7 @@ static int threadfunc(void *data)
for (i = 0; i < entries; i++) {
tdata->objs[i].value = (tdata->id << 16) | i;
+ cond_resched();
err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
test_rht_params);
if (err == -ENOMEM || err == -EBUSY) {
@@ -285,6 +288,8 @@ static int threadfunc(void *data)
goto out;
}
tdata->objs[i].value = TEST_INSERT_FAIL;
+
+ cond_resched();
}
err = thread_lookup_test(tdata);
if (err) {
--
2.1.2
After adding cond_resched() calls to threadfunc(), a surprisingly high
rate of insert failures occurred probably due to table resizes getting a
better chance to run in background. To not soften up the remaining
tests, retry inserts until they either succeed or fail permanently.
Signed-off-by: Phil Sutter <[email protected]>
---
lib/test_rhashtable.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 63654e3..093cf84 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -244,7 +244,7 @@ static int thread_lookup_test(struct thread_data *tdata)
static int threadfunc(void *data)
{
- int i, step, err = 0, insert_fails = 0;
+ int i, step, err = 0, retries = 0;
struct thread_data *tdata = data;
up(&prestart_sem);
@@ -253,21 +253,22 @@ static int threadfunc(void *data)
for (i = 0; i < entries; i++) {
tdata->objs[i].value = (tdata->id << 16) | i;
+insert_retry:
cond_resched();
err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
test_rht_params);
if (err == -ENOMEM || err == -EBUSY) {
- tdata->objs[i].value = TEST_INSERT_FAIL;
- insert_fails++;
+ retries++;
+ goto insert_retry;
} else if (err) {
pr_err(" thread[%d]: rhashtable_insert_fast failed\n",
tdata->id);
goto out;
}
}
- if (insert_fails)
- pr_info(" thread[%d]: %d insert failures\n",
- tdata->id, insert_fails);
+ if (retries)
+ pr_info(" thread[%d]: retried insert operation %d times\n",
+ tdata->id, retries);
err = thread_lookup_test(tdata);
if (err) {
--
2.1.2
A maximum table size of 64k entries is insufficient for the multiple
threads test even in default configuration (10 threads * 50000 objects =
500000 objects in total). Since we know how many objects will be
inserted, calculate the max size unless overridden by parameter.
Note that specifying the exact number of objects upon table init won't
suffice as that value is being rounded down to the next power of two -
anticipate this by rounding up to the next power of two in beforehand.
Signed-off-by: Phil Sutter <[email protected]>
---
lib/test_rhashtable.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 093cf84..73fcb8e 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -36,9 +36,9 @@ static int runs = 4;
module_param(runs, int, 0);
MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)");
-static int max_size = 65536;
+static int max_size = 0;
module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: 65536)");
+MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
static bool shrinking = false;
module_param(shrinking, bool, 0);
@@ -317,7 +317,7 @@ static int __init test_rht_init(void)
entries = min(entries, MAX_ENTRIES);
test_rht_params.automatic_shrinking = shrinking;
- test_rht_params.max_size = max_size;
+ test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries);
test_rht_params.nelem_hint = size;
pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n",
@@ -363,6 +363,8 @@ static int __init test_rht_init(void)
return -ENOMEM;
}
+ test_rht_params.max_size = max_size ? :
+ roundup_pow_of_two(tcount * entries);
err = rhashtable_init(&ht, &test_rht_params);
if (err < 0) {
pr_warn("Test failed: Unable to initialize hashtable: %d\n",
--
2.1.2
On 08/28/15 at 12:28pm, Phil Sutter wrote:
> This should fix for soft lockup bugs triggered on slow systems.
>
> Signed-off-by: Phil Sutter <[email protected]>
Acked-by: Thomas Graf <[email protected]>
On 08/28/15 at 12:28pm, Phil Sutter wrote:
> After adding cond_resched() calls to threadfunc(), a surprisingly high
> rate of insert failures occurred probably due to table resizes getting a
> better chance to run in background. To not soften up the remaining
> tests, retry inserts until they either succeed or fail permanently.
>
> Signed-off-by: Phil Sutter <[email protected]>
> ---
> lib/test_rhashtable.c | 13 +++++++------
> 1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
> index 63654e3..093cf84 100644
> --- a/lib/test_rhashtable.c
> +++ b/lib/test_rhashtable.c
> @@ -244,7 +244,7 @@ static int thread_lookup_test(struct thread_data *tdata)
>
> static int threadfunc(void *data)
> {
> - int i, step, err = 0, insert_fails = 0;
> + int i, step, err = 0, retries = 0;
> struct thread_data *tdata = data;
>
> up(&prestart_sem);
> @@ -253,21 +253,22 @@ static int threadfunc(void *data)
>
> for (i = 0; i < entries; i++) {
> tdata->objs[i].value = (tdata->id << 16) | i;
> +insert_retry:
> cond_resched();
> err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
> test_rht_params);
> if (err == -ENOMEM || err == -EBUSY) {
> - tdata->objs[i].value = TEST_INSERT_FAIL;
> - insert_fails++;
> + retries++;
> + goto insert_retry;
Is it safe to retry indefinitely on ENOMEM? Retrying on EBUSY is
definitely an improvement and we should do the same in the non
threaded test as well.
On 08/28/15 at 12:28pm, Phil Sutter wrote:
> A maximum table size of 64k entries is insufficient for the multiple
> threads test even in default configuration (10 threads * 50000 objects =
> 500000 objects in total). Since we know how many objects will be
> inserted, calculate the max size unless overridden by parameter.
>
> Note that specifying the exact number of objects upon table init won't
> suffice as that value is being rounded down to the next power of two -
> anticipate this by rounding up to the next power of two in beforehand.
>
> Signed-off-by: Phil Sutter <[email protected]>
Acked-by: Thomas Graf <[email protected]>
Thanks for doing this work.
On Fri, Aug 28, 2015 at 01:09:29PM +0200, Thomas Graf wrote:
> On 08/28/15 at 12:28pm, Phil Sutter wrote:
> > After adding cond_resched() calls to threadfunc(), a surprisingly high
> > rate of insert failures occurred probably due to table resizes getting a
> > better chance to run in background. To not soften up the remaining
> > tests, retry inserts until they either succeed or fail permanently.
> >
> > Signed-off-by: Phil Sutter <[email protected]>
> > ---
> > lib/test_rhashtable.c | 13 +++++++------
> > 1 file changed, 7 insertions(+), 6 deletions(-)
> >
> > diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
> > index 63654e3..093cf84 100644
> > --- a/lib/test_rhashtable.c
> > +++ b/lib/test_rhashtable.c
> > @@ -244,7 +244,7 @@ static int thread_lookup_test(struct thread_data *tdata)
> >
> > static int threadfunc(void *data)
> > {
> > - int i, step, err = 0, insert_fails = 0;
> > + int i, step, err = 0, retries = 0;
> > struct thread_data *tdata = data;
> >
> > up(&prestart_sem);
> > @@ -253,21 +253,22 @@ static int threadfunc(void *data)
> >
> > for (i = 0; i < entries; i++) {
> > tdata->objs[i].value = (tdata->id << 16) | i;
> > +insert_retry:
> > cond_resched();
> > err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
> > test_rht_params);
> > if (err == -ENOMEM || err == -EBUSY) {
> > - tdata->objs[i].value = TEST_INSERT_FAIL;
> > - insert_fails++;
> > + retries++;
> > + goto insert_retry;
>
> Is it safe to retry indefinitely on ENOMEM? Retrying on EBUSY is
> definitely an improvement and we should do the same in the non
> threaded test as well.
Oh yes, that is definitely a bug. I will respin and add the same for the
normal test, too.
Thanks, Phil
On Fri, Aug 28, 2015 at 01:13:20PM +0200, Phil Sutter wrote:
> On Fri, Aug 28, 2015 at 01:09:29PM +0200, Thomas Graf wrote:
> > On 08/28/15 at 12:28pm, Phil Sutter wrote:
> > > After adding cond_resched() calls to threadfunc(), a surprisingly high
> > > rate of insert failures occurred probably due to table resizes getting a
> > > better chance to run in background. To not soften up the remaining
> > > tests, retry inserts until they either succeed or fail permanently.
> > >
> > > Signed-off-by: Phil Sutter <[email protected]>
> > > ---
> > > lib/test_rhashtable.c | 13 +++++++------
> > > 1 file changed, 7 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
> > > index 63654e3..093cf84 100644
> > > --- a/lib/test_rhashtable.c
> > > +++ b/lib/test_rhashtable.c
> > > @@ -244,7 +244,7 @@ static int thread_lookup_test(struct thread_data *tdata)
> > >
> > > static int threadfunc(void *data)
> > > {
> > > - int i, step, err = 0, insert_fails = 0;
> > > + int i, step, err = 0, retries = 0;
> > > struct thread_data *tdata = data;
> > >
> > > up(&prestart_sem);
> > > @@ -253,21 +253,22 @@ static int threadfunc(void *data)
> > >
> > > for (i = 0; i < entries; i++) {
> > > tdata->objs[i].value = (tdata->id << 16) | i;
> > > +insert_retry:
> > > cond_resched();
> > > err = rhashtable_insert_fast(&ht, &tdata->objs[i].node,
> > > test_rht_params);
> > > if (err == -ENOMEM || err == -EBUSY) {
> > > - tdata->objs[i].value = TEST_INSERT_FAIL;
> > > - insert_fails++;
> > > + retries++;
> > > + goto insert_retry;
> >
> > Is it safe to retry indefinitely on ENOMEM? Retrying on EBUSY is
> > definitely an improvement and we should do the same in the non
> > threaded test as well.
>
> Oh yes, that is definitely a bug. I will respin and add the same for the
> normal test, too.
Quite ugly, IMHO: rhashtable_insert_fast() may return -ENOMEM as
non-permanent error, if allocation in GFP_ATOMIC failed. In this case,
allocation in GFP_KERNEL is retried by rht_deferred_worker(). Sadly,
there is no way to determine if that has already been tried and failed.
The thread test triggers GFP_ATOMIC allocation failure quite easily, so
I can't really just ignore this issue. :)
Cheers, Phil
On 08/28/15 at 03:34pm, Phil Sutter wrote:
> Quite ugly, IMHO: rhashtable_insert_fast() may return -ENOMEM as
> non-permanent error, if allocation in GFP_ATOMIC failed. In this case,
> allocation in GFP_KERNEL is retried by rht_deferred_worker(). Sadly,
> there is no way to determine if that has already been tried and failed.
>
> The thread test triggers GFP_ATOMIC allocation failure quite easily, so
> I can't really just ignore this issue. :)
Return EBUSY or ENOBUFS in the non-permanent case? It is definitely
helpful if the API allows to differ between permanent and
non-permanent errors.
On Sat, Aug 29, 2015 at 12:43:03AM +0200, Thomas Graf wrote:
> On 08/28/15 at 03:34pm, Phil Sutter wrote:
> > Quite ugly, IMHO: rhashtable_insert_fast() may return -ENOMEM as
> > non-permanent error, if allocation in GFP_ATOMIC failed. In this case,
> > allocation in GFP_KERNEL is retried by rht_deferred_worker(). Sadly,
> > there is no way to determine if that has already been tried and failed.
> >
> > The thread test triggers GFP_ATOMIC allocation failure quite easily, so
> > I can't really just ignore this issue. :)
>
> Return EBUSY or ENOBUFS in the non-permanent case? It is definitely
> helpful if the API allows to differ between permanent and
> non-permanent errors.
Yes, indeed. Therefore rht_deferred_worker() needs to check the return
value of rhashtable_expand(). The question is how to propagate the error
condition, as the worker's return value is not being kept track of
(function returns void even).
Should we introduce a new field to struct rhashtable to track the
internal state? This might allow to clean up some rather obscure tests,
e.g. whether a table resize is in progress or not.
Cheers, Phil
Phil Sutter <[email protected]> wrote:
>
> Should we introduce a new field to struct rhashtable to track the
> internal state? This might allow to clean up some rather obscure tests,
> e.g. whether a table resize is in progress or not.
Why would we want to do that? The deferred expansion is done
on a best effort basis so its failure has nothing to do with
the failure of a subsequent insertion.
The insertion must have tried its own last-ditch synchronous
expansion and only fail if that fails.
Cheers,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Sun, Aug 30, 2015 at 03:47:17PM +0800, Herbert Xu wrote:
> Phil Sutter <[email protected]> wrote:
> >
> > Should we introduce a new field to struct rhashtable to track the
> > internal state? This might allow to clean up some rather obscure tests,
> > e.g. whether a table resize is in progress or not.
>
> Why would we want to do that? The deferred expansion is done
> on a best effort basis so its failure has nothing to do with
> the failure of a subsequent insertion.
The variable would be used to track if the worker has failed to allocate
memory in background.
Since the failing insertion will be retried, subsequent inserts are not
necessary unrelated.
> The insertion must have tried its own last-ditch synchronous
> expansion and only fail if that fails.
Who do you mean with "the insertion"? The user or the API?
Cheers, Phil