From: Kan Liang <[email protected]>
The PMU specific data for the monitored tasks is
allocated/initialized/freed during LBR call stack monitoring. Several
helper functions are provided.
alloc_task_ctx_data_rcu() is used to allocate the perf_ctx_data for a
task when RCU protected perf_ctx_data is NULL. It doesn't update the
refcount if the perf_ctx_data has already been allocated.
init_task_ctx_data_rcu() is similar as alloc_task_ctx_data_rcu(). But it
updates the refcount if the perf_ctx_data was already allocated.
fini_task_ctx_data_rcu() is to free the RCU protected perf_ctx_data when
there are no users, or force flag is set.
A lock is required for these functions, which is used to sync the
writers of perf_ctx_data RCU pointer and refcount.
The functions will be used by the following patch.
Reviewed-by: Alexey Budankov <[email protected]>
Signed-off-by: Kan Liang <[email protected]>
---
kernel/events/core.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 154 insertions(+)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 43567d1..b4976e0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4440,6 +4440,160 @@ static void unaccount_freq_event(void)
atomic_dec(&nr_freq_events);
}
+static int
+alloc_perf_ctx_data(size_t ctx_size, gfp_t flags,
+ struct perf_ctx_data **task_ctx_data)
+{
+ struct perf_ctx_data *ctx_data;
+
+ ctx_data = kzalloc(sizeof(struct perf_ctx_data), flags);
+ if (!ctx_data)
+ return -ENOMEM;
+
+ ctx_data->data = kzalloc(ctx_size, flags);
+ if (!ctx_data->data) {
+ kfree(ctx_data);
+ return -ENOMEM;
+ }
+
+ ctx_data->data_size = ctx_size;
+ *task_ctx_data = ctx_data;
+
+ return 0;
+}
+
+static int
+__alloc_task_ctx_data_rcu(struct task_struct *task,
+ size_t ctx_size, gfp_t flags)
+{
+ struct perf_ctx_data *ctx_data = task->perf_ctx_data;
+ int ret;
+
+ lockdep_assert_held_once(&task->perf_ctx_data_lock);
+
+ ret = alloc_perf_ctx_data(ctx_size, flags, &ctx_data);
+ if (ret)
+ return ret;
+
+ ctx_data->refcount = 1;
+
+ rcu_assign_pointer(task->perf_ctx_data, ctx_data);
+
+ return 0;
+}
+
+/**
+ * alloc perf_ctx_data for a task
+ * @task: Target Task
+ * @ctx_size: Size of PMU specific data
+ * @flags: Allocation flags
+ *
+ * Allocate perf_ctx_data and update the RCU pointer.
+ * If the perf_ctx_data has been allocated, return 0.
+ * Lock is required to sync the writers of perf_ctx_data RCU pointer
+ * and refcount.
+ */
+static int
+alloc_task_ctx_data_rcu(struct task_struct *task,
+ size_t ctx_size, gfp_t flags)
+{
+ unsigned long lock_flags;
+ int ret = 0;
+
+ raw_spin_lock_irqsave(&task->perf_ctx_data_lock, lock_flags);
+
+ if (task->perf_ctx_data)
+ goto unlock;
+
+ ret = __alloc_task_ctx_data_rcu(task, ctx_size, flags);
+
+unlock:
+ raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, lock_flags);
+
+ return ret;
+}
+
+static int
+__init_task_ctx_data_rcu(struct task_struct *task, size_t ctx_size, gfp_t flags)
+{
+ struct perf_ctx_data *ctx_data = task->perf_ctx_data;
+
+ lockdep_assert_held_once(&task->perf_ctx_data_lock);
+
+ if (ctx_data) {
+ ctx_data->refcount++;
+ return 0;
+ }
+
+ return __alloc_task_ctx_data_rcu(task, ctx_size, flags);
+}
+
+/**
+ * Init perf_ctx_data for a task
+ * @task: Target Task
+ * @ctx_size: Size of PMU specific data
+ * @flags: Allocation flags
+ *
+ * If the perf_ctx_data has been allocated, update the refcount.
+ * Otherwise, allocate perf_ctx_data and update the RCU pointer.
+ * Lock is required to sync the writers of perf_ctx_data RCU pointer
+ * and refcount.
+ */
+static int
+init_task_ctx_data_rcu(struct task_struct *task, size_t ctx_size, gfp_t flags)
+{
+ unsigned long lock_flags;
+ int ret;
+
+ raw_spin_lock_irqsave(&task->perf_ctx_data_lock, lock_flags);
+ ret = __init_task_ctx_data_rcu(task, ctx_size, flags);
+ raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, lock_flags);
+
+ return ret;
+}
+
+static void
+free_perf_ctx_data(struct rcu_head *rcu_head)
+{
+ struct perf_ctx_data *ctx_data;
+
+ ctx_data = container_of(rcu_head, struct perf_ctx_data, rcu_head);
+ kfree(ctx_data->data);
+ kfree(ctx_data);
+}
+
+/**
+ * Free perf_ctx_data RCU pointer for a task
+ * @task: Target Task
+ * @force: Unconditionally free perf_ctx_data
+ *
+ * If force is set, free perf_ctx_data unconditionally.
+ * Otherwise, free perf_ctx_data when there are no users.
+ * Lock is required to sync the writers of perf_ctx_data RCU pointer
+ * and refcount.
+ */
+static void
+fini_task_ctx_data_rcu(struct task_struct *task, bool force)
+{
+ struct perf_ctx_data *ctx_data;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&task->perf_ctx_data_lock, flags);
+
+ ctx_data = task->perf_ctx_data;
+ if (!ctx_data)
+ goto unlock;
+
+ if (!force && --ctx_data->refcount)
+ goto unlock;
+
+ RCU_INIT_POINTER(task->perf_ctx_data, NULL);
+ call_rcu(&ctx_data->rcu_head, free_perf_ctx_data);
+
+unlock:
+ raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, flags);
+}
+
static void unaccount_event(struct perf_event *event)
{
bool dec = false;
--
2.7.4
On Thu, Nov 28, 2019 at 07:14:25AM -0800, [email protected] wrote:
> +static int
> +__alloc_task_ctx_data_rcu(struct task_struct *task,
> + size_t ctx_size, gfp_t flags)
> +{
> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
> + int ret;
> +
> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
> +
> + ret = alloc_perf_ctx_data(ctx_size, flags, &ctx_data);
> + if (ret)
> + return ret;
> +
> + ctx_data->refcount = 1;
> +
> + rcu_assign_pointer(task->perf_ctx_data, ctx_data);
> +
> + return 0;
> +}
> +static int
> +__init_task_ctx_data_rcu(struct task_struct *task, size_t ctx_size, gfp_t flags)
> +{
> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
> +
> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
> +
> + if (ctx_data) {
> + ctx_data->refcount++;
> + return 0;
> + }
> +
> + return __alloc_task_ctx_data_rcu(task, ctx_size, flags);
> +}
> +/**
> + * Free perf_ctx_data RCU pointer for a task
> + * @task: Target Task
> + * @force: Unconditionally free perf_ctx_data
> + *
> + * If force is set, free perf_ctx_data unconditionally.
> + * Otherwise, free perf_ctx_data when there are no users.
> + * Lock is required to sync the writers of perf_ctx_data RCU pointer
> + * and refcount.
> + */
> +static void
> +fini_task_ctx_data_rcu(struct task_struct *task, bool force)
> +{
> + struct perf_ctx_data *ctx_data;
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&task->perf_ctx_data_lock, flags);
> +
> + ctx_data = task->perf_ctx_data;
> + if (!ctx_data)
> + goto unlock;
> +
> + if (!force && --ctx_data->refcount)
> + goto unlock;
> +
> + RCU_INIT_POINTER(task->perf_ctx_data, NULL);
> + call_rcu(&ctx_data->rcu_head, free_perf_ctx_data);
> +
> +unlock:
> + raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, flags);
> +}
All this refcount under lock is an anti-pattern. Also the naming is
insane.
On 02.12.2019 16:16, Peter Zijlstra wrote:
> On Thu, Nov 28, 2019 at 07:14:25AM -0800, [email protected] wrote:
>
>> +static int
>> +__alloc_task_ctx_data_rcu(struct task_struct *task,
>> + size_t ctx_size, gfp_t flags)
>> +{
>> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
>> + int ret;
>> +
>> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
>> +
>> + ret = alloc_perf_ctx_data(ctx_size, flags, &ctx_data);
>> + if (ret)
>> + return ret;
>> +
>> + ctx_data->refcount = 1;
>> +
>> + rcu_assign_pointer(task->perf_ctx_data, ctx_data);
>> +
>> + return 0;
>> +}
>
>> +static int
>> +__init_task_ctx_data_rcu(struct task_struct *task, size_t ctx_size, gfp_t flags)
>> +{
>> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
>> +
>> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
>> +
>> + if (ctx_data) {
>> + ctx_data->refcount++;
>> + return 0;
>> + }
>> +
>> + return __alloc_task_ctx_data_rcu(task, ctx_size, flags);
>> +}
>
>> +/**
>> + * Free perf_ctx_data RCU pointer for a task
>> + * @task: Target Task
>> + * @force: Unconditionally free perf_ctx_data
>> + *
>> + * If force is set, free perf_ctx_data unconditionally.
>> + * Otherwise, free perf_ctx_data when there are no users.
>> + * Lock is required to sync the writers of perf_ctx_data RCU pointer
>> + * and refcount.
>> + */
>> +static void
>> +fini_task_ctx_data_rcu(struct task_struct *task, bool force)
>> +{
>> + struct perf_ctx_data *ctx_data;
>> + unsigned long flags;
>> +
>> + raw_spin_lock_irqsave(&task->perf_ctx_data_lock, flags);
>> +
>> + ctx_data = task->perf_ctx_data;
>> + if (!ctx_data)
>> + goto unlock;
>> +
>> + if (!force && --ctx_data->refcount)
>> + goto unlock;
>> +
>> + RCU_INIT_POINTER(task->perf_ctx_data, NULL);
>> + call_rcu(&ctx_data->rcu_head, free_perf_ctx_data);
>> +
>> +unlock:
>> + raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, flags);
>> +}
>
> All this refcount under lock is an anti-pattern. Also the naming is
> insane.
Could you please provide proper patterning examples for such or similar cases?
Thanks,
Alexey
On 12/2/2019 8:16 AM, Peter Zijlstra wrote:
> On Thu, Nov 28, 2019 at 07:14:25AM -0800, [email protected] wrote:
>
>> +static int
>> +__alloc_task_ctx_data_rcu(struct task_struct *task,
>> + size_t ctx_size, gfp_t flags)
>> +{
>> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
>> + int ret;
>> +
>> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
>> +
>> + ret = alloc_perf_ctx_data(ctx_size, flags, &ctx_data);
>> + if (ret)
>> + return ret;
>> +
>> + ctx_data->refcount = 1;
>> +
>> + rcu_assign_pointer(task->perf_ctx_data, ctx_data);
>> +
>> + return 0;
>> +}
>
>> +static int
>> +__init_task_ctx_data_rcu(struct task_struct *task, size_t ctx_size, gfp_t flags)
>> +{
>> + struct perf_ctx_data *ctx_data = task->perf_ctx_data;
>> +
>> + lockdep_assert_held_once(&task->perf_ctx_data_lock);
>> +
>> + if (ctx_data) {
>> + ctx_data->refcount++;
>> + return 0;
>> + }
>> +
>> + return __alloc_task_ctx_data_rcu(task, ctx_size, flags);
>> +}
>
>> +/**
>> + * Free perf_ctx_data RCU pointer for a task
>> + * @task: Target Task
>> + * @force: Unconditionally free perf_ctx_data
>> + *
>> + * If force is set, free perf_ctx_data unconditionally.
>> + * Otherwise, free perf_ctx_data when there are no users.
>> + * Lock is required to sync the writers of perf_ctx_data RCU pointer
>> + * and refcount.
>> + */
>> +static void
>> +fini_task_ctx_data_rcu(struct task_struct *task, bool force)
>> +{
>> + struct perf_ctx_data *ctx_data;
>> + unsigned long flags;
>> +
>> + raw_spin_lock_irqsave(&task->perf_ctx_data_lock, flags);
>> +
>> + ctx_data = task->perf_ctx_data;
>> + if (!ctx_data)
>> + goto unlock;
>> +
>> + if (!force && --ctx_data->refcount)
>> + goto unlock;
>> +
>> + RCU_INIT_POINTER(task->perf_ctx_data, NULL);
>> + call_rcu(&ctx_data->rcu_head, free_perf_ctx_data);
>> +
>> +unlock:
>> + raw_spin_unlock_irqrestore(&task->perf_ctx_data_lock, flags);
>> +}
>
> All this refcount under lock is an anti-pattern. Also the naming is
> insane.
>
Could you please give me an example?
I think we do need something to protect the refcount. Are you suggesting
atomic_*?
Thanks,
Kan
On Mon, Dec 02, 2019 at 03:35:00PM -0500, Liang, Kan wrote:
> Could you please give me an example?
There's a ton of refcounting in perf, none of it follows this pattern.
> I think we do need something to protect the refcount. Are you suggesting
> atomic_*?
refcount_t comes to mind.