2019-08-27 19:03:37

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()

Now that kfree_rcu() special casing have been removed from tree RCU,
remove kfree_call_rcu_nobatch() since it is not needed.

Signed-off-by: Joel Fernandes (Google) <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 4 ---
include/linux/rcutiny.h | 5 ---
include/linux/rcutree.h | 1 -
kernel/rcu/rcuperf.c | 10 +-----
kernel/rcu/tree.c | 33 ++++++++-----------
5 files changed, 14 insertions(+), 39 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 24fe8aefb12c..56be0e30100b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3909,10 +3909,6 @@
Number of loops doing rcuperf.kfree_alloc_num number
of allocations and frees.

- rcuperf.kfree_no_batch= [KNL]
- Use the non-batching (less efficient) version of kfree_rcu().
- This is useful for comparing with the batched version.
-
rcuperf.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N, where N is the number of CPUs. A value
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 949841f52ec5..7aa93afa5d8d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
call_rcu(head, func);
}

-static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
-{
- call_rcu(head, func);
-}
-
void rcu_qs(void);

static inline void rcu_softirq_qs(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 961b7e05d141..0b68aa952f8b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu)

void synchronize_rcu_expedited(void);
void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
-void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);

void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index c1e25fd10f2a..da94b89cd531 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg)
torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
-torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu().");

static struct task_struct **kfree_reader_tasks;
static int kfree_nrealthreads;
@@ -632,14 +631,7 @@ kfree_perf_thread(void *arg)
if (!alloc_ptr)
return -ENOMEM;

- if (!kfree_no_batch) {
- kfree_rcu(alloc_ptr, rh);
- } else {
- rcu_callback_t cb;
-
- cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
- kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb);
- }
+ kfree_rcu(alloc_ptr, rh);
}

cond_resched();
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 12c17e10f2b4..c767973d62ac 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
rcu_lock_acquire(&rcu_callback_map);
trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);

- /* Could be possible to optimize with kfree_bulk in future */
- kfree((void *)head - offset);
+ if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
+ /* Could be optimized with kfree_bulk() in future. */
+ kfree((void *)head - offset);
+ }

rcu_lock_release(&rcu_callback_map);
cond_resched_tasks_rcu_qs();
@@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
spin_unlock_irqrestore(&krcp->lock, flags);
}

-/*
- * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
- * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
- */
-void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
-{
- __call_rcu(head, func);
-}
-EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
-
/*
* Queue a request for lazy invocation of kfree() after a grace period.
*
@@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
unsigned long flags;
struct kfree_rcu_cpu *krcp;

- /* kfree_call_rcu() batching requires timers to be up. If the scheduler
- * is not yet up, just skip batching and do the non-batched version.
- */
- if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
- return kfree_call_rcu_nobatch(head, func);
-
if (debug_rcu_head_queue(head)) {
/* Probable double kfree_rcu() */
WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
@@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;

/* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
- if (!xchg(&krcp->monitor_todo, true))
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ if (!xchg(&krcp->monitor_todo, true)) {
+ /* Scheduling the monitor requires scheduler/timers to be up,
+ * if it is not, just skip it. An eventual kfree_rcu() will
+ * kick it again.
+ */
+ if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
+ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ }
+ }

spin_unlock(&krcp->lock);
local_irq_restore(flags);
--
2.23.0.187.g17f5b7556c-goog


2019-08-28 21:58:16

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()

On Tue, Aug 27, 2019 at 03:01:59PM -0400, Joel Fernandes (Google) wrote:
> Now that kfree_rcu() special casing have been removed from tree RCU,
> remove kfree_call_rcu_nobatch() since it is not needed.
>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>

Now -this- one qualifies as a nice negative delta! ;-)

A few things below, please fix in next version.

Thanx, Paul

> ---
> .../admin-guide/kernel-parameters.txt | 4 ---
> include/linux/rcutiny.h | 5 ---
> include/linux/rcutree.h | 1 -
> kernel/rcu/rcuperf.c | 10 +-----
> kernel/rcu/tree.c | 33 ++++++++-----------
> 5 files changed, 14 insertions(+), 39 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 24fe8aefb12c..56be0e30100b 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3909,10 +3909,6 @@
> Number of loops doing rcuperf.kfree_alloc_num number
> of allocations and frees.
>
> - rcuperf.kfree_no_batch= [KNL]
> - Use the non-batching (less efficient) version of kfree_rcu().
> - This is useful for comparing with the batched version.
> -
> rcuperf.nreaders= [KNL]
> Set number of RCU readers. The value -1 selects
> N, where N is the number of CPUs. A value
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index 949841f52ec5..7aa93afa5d8d 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -39,11 +39,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> call_rcu(head, func);
> }
>
> -static inline void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> - call_rcu(head, func);
> -}
> -
> void rcu_qs(void);
>
> static inline void rcu_softirq_qs(void)
> diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
> index 961b7e05d141..0b68aa952f8b 100644
> --- a/include/linux/rcutree.h
> +++ b/include/linux/rcutree.h
> @@ -34,7 +34,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
>
> void synchronize_rcu_expedited(void);
> void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func);
>
> void rcu_barrier(void);
> bool rcu_eqs_special_set(int cpu);
> diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
> index c1e25fd10f2a..da94b89cd531 100644
> --- a/kernel/rcu/rcuperf.c
> +++ b/kernel/rcu/rcuperf.c
> @@ -593,7 +593,6 @@ rcu_perf_shutdown(void *arg)
> torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
> torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
> torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
> -torture_param(int, kfree_no_batch, 0, "Use the non-batching (slower) version of kfree_rcu().");
>
> static struct task_struct **kfree_reader_tasks;
> static int kfree_nrealthreads;
> @@ -632,14 +631,7 @@ kfree_perf_thread(void *arg)
> if (!alloc_ptr)
> return -ENOMEM;
>
> - if (!kfree_no_batch) {
> - kfree_rcu(alloc_ptr, rh);
> - } else {
> - rcu_callback_t cb;
> -
> - cb = (rcu_callback_t)(unsigned long)offsetof(struct kfree_obj, rh);
> - kfree_call_rcu_nobatch(&(alloc_ptr->rh), cb);
> - }
> + kfree_rcu(alloc_ptr, rh);
> }
>
> cond_resched();
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 12c17e10f2b4..c767973d62ac 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
> rcu_lock_acquire(&rcu_callback_map);
> trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
>
> - /* Could be possible to optimize with kfree_bulk in future */
> - kfree((void *)head - offset);
> + if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
> + /* Could be optimized with kfree_bulk() in future. */
> + kfree((void *)head - offset);
> + }

This really needs to be in the previous patch until such time as Tiny RCU
no longer needs the restriction.

> rcu_lock_release(&rcu_callback_map);
> cond_resched_tasks_rcu_qs();
> @@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
> spin_unlock_irqrestore(&krcp->lock, flags);
> }
>
> -/*
> - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
> - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> - */
> -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> -{
> - __call_rcu(head, func);
> -}
> -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> -
> /*
> * Queue a request for lazy invocation of kfree() after a grace period.
> *
> @@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> unsigned long flags;
> struct kfree_rcu_cpu *krcp;
>
> - /* kfree_call_rcu() batching requires timers to be up. If the scheduler
> - * is not yet up, just skip batching and do the non-batched version.
> - */
> - if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
> - return kfree_call_rcu_nobatch(head, func);
> -
> if (debug_rcu_head_queue(head)) {
> /* Probable double kfree_rcu() */
> WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
> @@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> krcp->head = head;
>
> /* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
> - if (!xchg(&krcp->monitor_todo, true))
> - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> + if (!xchg(&krcp->monitor_todo, true)) {
> + /* Scheduling the monitor requires scheduler/timers to be up,
> + * if it is not, just skip it. An eventual kfree_rcu() will
> + * kick it again.
> + */
> + if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
> + schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> + }
> + }

And this also needs to be in an earlier patch. Bisectability and all that!

Are we really guaranteed that there will be an eventual kfree_rcu()?
More of a worry for Tiny RCU than for Tree RCU, but still could be
annoying for someone trying to debug a memory leak.

Thanx, Paul

> spin_unlock(&krcp->lock);
> local_irq_restore(flags);
> --
> 2.23.0.187.g17f5b7556c-goog
>

2019-08-29 22:24:42

by Joel Fernandes

[permalink] [raw]
Subject: Re: [PATCH 5/5] rcu: Remove kfree_call_rcu_nobatch()

Hi Paul,

I think this is the only contentious patch preventing my resend of the
series, let me know what you think, I replied below:

On Wed, Aug 28, 2019 at 02:56:36PM -0700, Paul E. McKenney wrote:
> On Tue, Aug 27, 2019 at 03:01:59PM -0400, Joel Fernandes (Google) wrote:
[snip]
> > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> > index 12c17e10f2b4..c767973d62ac 100644
> > --- a/kernel/rcu/tree.c
> > +++ b/kernel/rcu/tree.c
> > @@ -2777,8 +2777,10 @@ static void kfree_rcu_work(struct work_struct *work)
> > rcu_lock_acquire(&rcu_callback_map);
> > trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
> >
> > - /* Could be possible to optimize with kfree_bulk in future */
> > - kfree((void *)head - offset);
> > + if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
> > + /* Could be optimized with kfree_bulk() in future. */
> > + kfree((void *)head - offset);
> > + }
>
> This really needs to be in the previous patch until such time as Tiny RCU
> no longer needs the restriction.

I was only going by whatever is already committed to the -rcu dev branch. The
series is based on the -dev branch.

The original patch adding the kfree_rcu() batching is already merged into the
-rcu dev branch (that version just had 1 list, this series adds multiple
lists).

In the above diff, I just added the WARN_ON_ONCE() as extra checking for tree
RCU kfree batching. It has nothing to do with tiny RCU per-se. Should I
submit the WARN_ON_ONCE() as a separate patch then?

To prevent confusion, could you let me know if I am supposed to submitting
patches against a branch other than the dev branch?

> > rcu_lock_release(&rcu_callback_map);
> > cond_resched_tasks_rcu_qs();
> > @@ -2856,16 +2858,6 @@ static void kfree_rcu_monitor(struct work_struct *work)
> > spin_unlock_irqrestore(&krcp->lock, flags);
> > }
> >
> > -/*
> > - * This version of kfree_call_rcu does not do batching of kfree_rcu() requests.
> > - * Used only by rcuperf torture test for comparison with kfree_rcu_batch().
> > - */
> > -void kfree_call_rcu_nobatch(struct rcu_head *head, rcu_callback_t func)
> > -{
> > - __call_rcu(head, func);
> > -}
> > -EXPORT_SYMBOL_GPL(kfree_call_rcu_nobatch);
> > -
> > /*
> > * Queue a request for lazy invocation of kfree() after a grace period.
> > *
> > @@ -2885,12 +2877,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> > unsigned long flags;
> > struct kfree_rcu_cpu *krcp;
> >
> > - /* kfree_call_rcu() batching requires timers to be up. If the scheduler
> > - * is not yet up, just skip batching and do the non-batched version.
> > - */
> > - if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING)
> > - return kfree_call_rcu_nobatch(head, func);
> > -
> > if (debug_rcu_head_queue(head)) {
> > /* Probable double kfree_rcu() */
> > WARN_ONCE(1, "kfree_call_rcu(): Double-freed call. rcu_head %p\n",
> > @@ -2909,8 +2895,15 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
> > krcp->head = head;
> >
> > /* Schedule monitor for timely drain after KFREE_DRAIN_JIFFIES. */
> > - if (!xchg(&krcp->monitor_todo, true))
> > - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> > + if (!xchg(&krcp->monitor_todo, true)) {
> > + /* Scheduling the monitor requires scheduler/timers to be up,
> > + * if it is not, just skip it. An eventual kfree_rcu() will
> > + * kick it again.
> > + */
> > + if ((rcu_scheduler_active == RCU_SCHEDULER_RUNNING)) {
> > + schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
> > + }
> > + }
>
> And this also needs to be in an earlier patch. Bisectability and all that!
>
> Are we really guaranteed that there will be an eventual kfree_rcu()?
> More of a worry for Tiny RCU than for Tree RCU, but still could be
> annoying for someone trying to debug a memory leak.

Same comment as above, the original patch adding the schedule_delayed_work()
is already merged into the -dev branch. This series is based on top of that.
The reason I had to rearrange &krcp->monitor_todo code above is because we no
longer have kfree_rcu_no_batch() which this patch removes.

thanks,

- Joel