2020-11-14 19:35:14

by Joel Fernandes

[permalink] [raw]
Subject: [PATCH rcu-dev] rcu/trace: Add tracing for how segcb list changes

Track how the segcb list changes before/after acceleration, during
queuing and during dequeuing.

This has proved useful to discover an optimization to avoid unwanted GP
requests when there are no callbacks accelerated. The overhead is minimal as
each segment's length is now stored in the respective segment.

Reviewed-by: Frederic Weisbecker <[email protected]>
Reviewed-by: Neeraj Upadhyay <[email protected]>
Signed-off-by: Joel Fernandes (Google) <[email protected]>

---
include/trace/events/rcu.h | 26 ++++++++++++++++++++++++++
kernel/rcu/tree.c | 9 +++++++++
2 files changed, 35 insertions(+)

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 155b5cb43cfd..5fc29400e1a2 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -505,6 +505,32 @@ TRACE_EVENT_RCU(rcu_callback,
__entry->qlen)
);

+TRACE_EVENT_RCU(rcu_segcb_stats,
+
+ TP_PROTO(struct rcu_segcblist *rs, const char *ctx),
+
+ TP_ARGS(rs, ctx),
+
+ TP_STRUCT__entry(
+ __field(const char *, ctx)
+ __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
+ __array(long, seglen, RCU_CBLIST_NSEGS)
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ memcpy(__entry->seglen, rs->seglen, RCU_CBLIST_NSEGS * sizeof(long));
+ memcpy(__entry->gp_seq, rs->gp_seq, RCU_CBLIST_NSEGS * sizeof(unsigned long));
+
+ ),
+
+ TP_printk("%s seglen: (DONE=%ld, WAIT=%ld, NEXT_READY=%ld, NEXT=%ld) "
+ "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, NEXT=%lu)", __entry->ctx,
+ __entry->seglen[0], __entry->seglen[1], __entry->seglen[2], __entry->seglen[3],
+ __entry->gp_seq[0], __entry->gp_seq[1], __entry->gp_seq[2], __entry->gp_seq[3])
+
+);
+
/*
* Tracepoint for the registration of a single RCU callback of the special
* kvfree() form. The first argument is the RCU type, the second argument
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 413831b48648..b96d26d0d44a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1497,6 +1497,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
if (!rcu_segcblist_pend_cbs(&rdp->cblist))
return false;

+ trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
+
/*
* Callbacks are often registered with incomplete grace-period
* information. Something about the fact that getting exact
@@ -1517,6 +1519,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
else
trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));

+ trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
+
return ret;
}

@@ -2473,11 +2477,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
if (offloaded)
rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
+
+ trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
rcu_nocb_unlock_irqrestore(rdp, flags);

/* Invoke callbacks. */
tick_dep_set_task(current, TICK_DEP_BIT_RCU);
rhp = rcu_cblist_dequeue(&rcl);
+
for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
rcu_callback_t f;

@@ -2989,6 +2996,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));

+ trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
+
/* Go handle any RCU core processing required. */
if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
--
2.29.2.299.gdc1121823c-goog


2020-11-16 21:49:27

by Paul E. McKenney

[permalink] [raw]
Subject: Re: [PATCH rcu-dev] rcu/trace: Add tracing for how segcb list changes

On Sat, Nov 14, 2020 at 02:31:32PM -0500, Joel Fernandes (Google) wrote:
> Track how the segcb list changes before/after acceleration, during
> queuing and during dequeuing.
>
> This has proved useful to discover an optimization to avoid unwanted GP
> requests when there are no callbacks accelerated. The overhead is minimal as
> each segment's length is now stored in the respective segment.
>
> Reviewed-by: Frederic Weisbecker <[email protected]>
> Reviewed-by: Neeraj Upadhyay <[email protected]>
> Signed-off-by: Joel Fernandes (Google) <[email protected]>

Queued for further review and testing, thank you all!

If testing goes well I expect to put this in the v5.11 bucket. Here is
hoping...

Thanx, Paul

> ---
> include/trace/events/rcu.h | 26 ++++++++++++++++++++++++++
> kernel/rcu/tree.c | 9 +++++++++
> 2 files changed, 35 insertions(+)
>
> diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
> index 155b5cb43cfd..5fc29400e1a2 100644
> --- a/include/trace/events/rcu.h
> +++ b/include/trace/events/rcu.h
> @@ -505,6 +505,32 @@ TRACE_EVENT_RCU(rcu_callback,
> __entry->qlen)
> );
>
> +TRACE_EVENT_RCU(rcu_segcb_stats,
> +
> + TP_PROTO(struct rcu_segcblist *rs, const char *ctx),
> +
> + TP_ARGS(rs, ctx),
> +
> + TP_STRUCT__entry(
> + __field(const char *, ctx)
> + __array(unsigned long, gp_seq, RCU_CBLIST_NSEGS)
> + __array(long, seglen, RCU_CBLIST_NSEGS)
> + ),
> +
> + TP_fast_assign(
> + __entry->ctx = ctx;
> + memcpy(__entry->seglen, rs->seglen, RCU_CBLIST_NSEGS * sizeof(long));
> + memcpy(__entry->gp_seq, rs->gp_seq, RCU_CBLIST_NSEGS * sizeof(unsigned long));
> +
> + ),
> +
> + TP_printk("%s seglen: (DONE=%ld, WAIT=%ld, NEXT_READY=%ld, NEXT=%ld) "
> + "gp_seq: (DONE=%lu, WAIT=%lu, NEXT_READY=%lu, NEXT=%lu)", __entry->ctx,
> + __entry->seglen[0], __entry->seglen[1], __entry->seglen[2], __entry->seglen[3],
> + __entry->gp_seq[0], __entry->gp_seq[1], __entry->gp_seq[2], __entry->gp_seq[3])
> +
> +);
> +
> /*
> * Tracepoint for the registration of a single RCU callback of the special
> * kvfree() form. The first argument is the RCU type, the second argument
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 413831b48648..b96d26d0d44a 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1497,6 +1497,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
> if (!rcu_segcblist_pend_cbs(&rdp->cblist))
> return false;
>
> + trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPreAcc"));
> +
> /*
> * Callbacks are often registered with incomplete grace-period
> * information. Something about the fact that getting exact
> @@ -1517,6 +1519,8 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
> else
> trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB"));
>
> + trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbPostAcc"));
> +
> return ret;
> }
>
> @@ -2473,11 +2477,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
> rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
> if (offloaded)
> rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
> +
> + trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCbDequeued"));
> rcu_nocb_unlock_irqrestore(rdp, flags);
>
> /* Invoke callbacks. */
> tick_dep_set_task(current, TICK_DEP_BIT_RCU);
> rhp = rcu_cblist_dequeue(&rcl);
> +
> for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
> rcu_callback_t f;
>
> @@ -2989,6 +2996,8 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func)
> trace_rcu_callback(rcu_state.name, head,
> rcu_segcblist_n_cbs(&rdp->cblist));
>
> + trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
> +
> /* Go handle any RCU core processing required. */
> if (unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
> __call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
> --
> 2.29.2.299.gdc1121823c-goog
>