2018-12-18 10:36:40

by He Zhe

[permalink] [raw]
Subject: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

From: He Zhe <[email protected]>

The memory of shared_regs excl_cntrs and constraint_list in struct cpu_hw_events
is currently allocated in hotplug prepare state and freed in dying state. The
memory can actually be reused across multiple cpu pluggings.

Besides, in preempt-rt full mode, the freeing can happen in atomic context and
thus cause the following BUG.

BUG: scheduling while atomic: migration/4/44/0x00000002
---- snip ----
Preemption disabled at:
[<ffffffffa1b282e1>] cpu_stopper_thread+0x71/0x100
CPU: 4 PID: 44 Comm: migration/4 Not tainted 4.19.8-rt6-preempt-rt #1
Hardware name: Intel Corporation Broadwell Client platform/Basking Ridge, BIOS BDW-E1R1.86C.0100.R03.1411050121 11/05/2014
Call Trace:
dump_stack+0x4f/0x6a
? cpu_stopper_thread+0x71/0x100
__schedule_bug.cold.16+0x38/0x55
__schedule+0x484/0x6c0
schedule+0x3d/0xf0
rt_spin_lock_slowlock_locked+0x11a/0x2a0
rt_spin_lock_slowlock+0x57/0x90
__rt_spin_lock+0x26/0x30
__write_rt_lock+0x23/0x1a0
? intel_pmu_cpu_dying+0x67/0x70
rt_write_lock+0x2a/0x30
find_and_remove_object+0x1e/0x80
delete_object_full+0x10/0x20
kmemleak_free+0x32/0x50
kfree+0x104/0x1f0
intel_pmu_cpu_dying+0x67/0x70
? x86_pmu_starting_cpu+0x30/0x30
x86_pmu_dying_cpu+0x1a/0x30
cpuhp_invoke_callback+0x9c/0x770
? cpu_disable_common+0x241/0x250
take_cpu_down+0x70/0xa0
multi_cpu_stop+0x62/0xc0
? cpu_stop_queue_work+0x130/0x130
cpu_stopper_thread+0x79/0x100
smpboot_thread_fn+0x217/0x2e0
kthread+0x121/0x140
? sort_range+0x30/0x30
? kthread_park+0x90/0x90
ret_from_fork+0x35/0x40

This patch changes to allocate the memory only when it has not been allocated,
and fill it with all zero when it has already been allocated, and remove the
unnecessary freeings.

Credit to Sebastian Andrzej Siewior for his suggestion.

Signed-off-by: He Zhe <[email protected]>
---
arch/x86/events/core.c | 2 +-
arch/x86/events/intel/core.c | 45 ++++++++++++++++++++------------------------
arch/x86/events/perf_event.h | 5 ++---
3 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 374a197..f07d1b1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2010,7 +2010,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void)

/* only needed, if we have extra_regs */
if (x86_pmu.extra_regs) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
+ allocate_shared_regs(&cpuc->shared_regs, cpu);
if (!cpuc->shared_regs)
goto error;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ecc3e34..a3c18de 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3398,13 +3398,16 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
return x86_event_sysfs_show(page, config, event);
}

-struct intel_shared_regs *allocate_shared_regs(int cpu)
+void allocate_shared_regs(struct intel_shared_regs **pregs, int cpu)
{
- struct intel_shared_regs *regs;
+ struct intel_shared_regs *regs = *pregs;
int i;

- regs = kzalloc_node(sizeof(struct intel_shared_regs),
- GFP_KERNEL, cpu_to_node(cpu));
+ if (regs)
+ memset(regs, 0, sizeof(struct intel_shared_regs));
+ else
+ regs = *pregs = kzalloc_node(sizeof(struct intel_shared_regs),
+ GFP_KERNEL, cpu_to_node(cpu));
if (regs) {
/*
* initialize the locks to keep lockdep happy
@@ -3414,20 +3417,21 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)

regs->core_id = -1;
}
- return regs;
}

-static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
+static void allocate_excl_cntrs(struct intel_excl_cntrs **pc, int cpu)
{
- struct intel_excl_cntrs *c;
+ struct intel_excl_cntrs *c = *pc;

- c = kzalloc_node(sizeof(struct intel_excl_cntrs),
- GFP_KERNEL, cpu_to_node(cpu));
+ if (c)
+ memset(c, 0, sizeof(struct intel_excl_cntrs));
+ else
+ c = *pc = kzalloc_node(sizeof(struct intel_excl_cntrs),
+ GFP_KERNEL, cpu_to_node(cpu));
if (c) {
raw_spin_lock_init(&c->lock);
c->core_id = -1;
}
- return c;
}

static int intel_pmu_cpu_prepare(int cpu)
@@ -3435,7 +3439,7 @@ static int intel_pmu_cpu_prepare(int cpu)
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
- cpuc->shared_regs = allocate_shared_regs(cpu);
+ allocate_shared_regs(&cpuc->shared_regs, cpu);
if (!cpuc->shared_regs)
goto err;
}
@@ -3443,11 +3447,14 @@ static int intel_pmu_cpu_prepare(int cpu)
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);

- cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
+ if (cpuc->constraint_list)
+ memset(cpuc->constraint_list, 0, sz);
+ else
+ cpuc->constraint_list = kzalloc(sz, GFP_KERNEL);
if (!cpuc->constraint_list)
goto err_shared_regs;

- cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
+ allocate_excl_cntrs(&cpuc->excl_cntrs, cpu);
if (!cpuc->excl_cntrs)
goto err_constraint_list;

@@ -3559,18 +3566,6 @@ static void free_excl_cntrs(int cpu)

static void intel_pmu_cpu_dying(int cpu)
{
- struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
- struct intel_shared_regs *pc;
-
- pc = cpuc->shared_regs;
- if (pc) {
- if (pc->core_id == -1 || --pc->refcnt == 0)
- kfree(pc);
- cpuc->shared_regs = NULL;
- }
-
- free_excl_cntrs(cpu);
-
fini_debug_store_on_cpu(cpu);

if (x86_pmu.counter_freezing)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 78d7b70..967bdb6 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -877,7 +877,7 @@ struct event_constraint *
x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event);

-struct intel_shared_regs *allocate_shared_regs(int cpu);
+void allocate_shared_regs(struct intel_shared_regs **pregs, int cpu);

int intel_pmu_init(void);

@@ -1013,9 +1013,8 @@ static inline int intel_pmu_init(void)
return 0;
}

-static inline struct intel_shared_regs *allocate_shared_regs(int cpu)
+static inline void allocate_shared_regs(struct intel_shared_regs **pregs, int cpu)
{
- return NULL;
}

static inline int is_ht_workaround_enabled(void)
--
2.7.4



2018-12-18 11:03:53

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> thus cause the following BUG.

Hurm, I though we fixed all those long ago..

And no, the patch is horrible; that's what we have things like
x86_pmu::cpu_dead() for.

Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On 2018-12-18 18:30:33 [+0800], [email protected] wrote:
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -3398,13 +3398,16 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)
> return x86_event_sysfs_show(page, config, event);
> }
>
> -struct intel_shared_regs *allocate_shared_regs(int cpu)
> +void allocate_shared_regs(struct intel_shared_regs **pregs, int cpu)
> {
> - struct intel_shared_regs *regs;
> + struct intel_shared_regs *regs = *pregs;
> int i;
>
> - regs = kzalloc_node(sizeof(struct intel_shared_regs),
> - GFP_KERNEL, cpu_to_node(cpu));
> + if (regs)
> + memset(regs, 0, sizeof(struct intel_shared_regs));
> + else
> + regs = *pregs = kzalloc_node(sizeof(struct intel_shared_regs),
> + GFP_KERNEL, cpu_to_node(cpu));
> if (regs) {
> /*
> * initialize the locks to keep lockdep happy

void allocate_shared_regs(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *regs = puc->shared_regs;
int i;

if (!regs)
regs = kmalloc_node(sizeof(struct intel_shared_regs),
GFP_KERNEL, cpu_to_node(cpu));
if (!regs)
return;
memset(regs, 0, sizeof(struct intel_shared_regs));
for (i = 0; i < EXTRA_REG_MAX; i++)
raw_spin_lock_init(&regs->regs[i].lock);

return regs;
}


> @@ -3414,20 +3417,21 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
>
> regs->core_id = -1;
> }
> - return regs;
> }
>
> -static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
> +static void allocate_excl_cntrs(struct intel_excl_cntrs **pc, int cpu)
> {
> - struct intel_excl_cntrs *c;
> + struct intel_excl_cntrs *c = *pc;
>
> - c = kzalloc_node(sizeof(struct intel_excl_cntrs),
> - GFP_KERNEL, cpu_to_node(cpu));
> + if (c)
> + memset(c, 0, sizeof(struct intel_excl_cntrs));
> + else
> + c = *pc = kzalloc_node(sizeof(struct intel_excl_cntrs),
> + GFP_KERNEL, cpu_to_node(cpu));
> if (c) {
> raw_spin_lock_init(&c->lock);
> c->core_id = -1;
> }
> - return c;
> }

static void allocate_excl_cntrs(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_excl_cntrs *c = cpuc->excl_cntrs;

if (!c)
c = kmalloc_node(sizeof(struct intel_excl_cntrs),
GFP_KERNEL, cpu_to_node(cpu));
if (!c)
return;
memset(c, 0, sizeof(struct intel_excl_cntrs));
raw_spin_lock_init(&c->lock);
c->core_id = -1;
cpuc->excl_cntrs = c;
}


> static void intel_pmu_cpu_dying(int cpu)
> {
> - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
> - struct intel_shared_regs *pc;
> -
> - pc = cpuc->shared_regs;
> - if (pc) {
> - if (pc->core_id == -1 || --pc->refcnt == 0)

I think ->refcnt member can go, too. It is only incremented now for no
reason now.

> - kfree(pc);
> - cpuc->shared_regs = NULL;
> - }
> -
> - free_excl_cntrs(cpu);
> -
> fini_debug_store_on_cpu(cpu);
>
> if (x86_pmu.counter_freezing)

Sebastian

Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
> On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > thus cause the following BUG.
>
> Hurm, I though we fixed all those long ago..
>
> And no, the patch is horrible; that's what we have things like
> x86_pmu::cpu_dead() for.

ehm, you say we keep memory allocation +free on CPU up/down?

Sebastian

2018-12-18 11:27:26

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, 18 Dec 2018, Peter Zijlstra wrote:
> On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > thus cause the following BUG.
>
> Hurm, I though we fixed all those long ago..

Either we missed that one or it came back somehow.

> And no, the patch is horrible; that's what we have things like
> x86_pmu::cpu_dead() for.

Like the below?

Thanks,

tglx

8<------------

--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3559,6 +3559,14 @@ static void free_excl_cntrs(int cpu)

static void intel_pmu_cpu_dying(int cpu)
{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
+{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;

@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}

free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};

static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,7 @@ static __initconst const struct x86_pmu
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};


2018-12-18 11:32:31

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, Dec 18, 2018 at 12:16:37PM +0100, Sebastian Andrzej Siewior wrote:
> On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
> > On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > > thus cause the following BUG.
> >
> > Hurm, I though we fixed all those long ago..
> >
> > And no, the patch is horrible; that's what we have things like
> > x86_pmu::cpu_dead() for.
>
> ehm, you say we keep memory allocation +free on CPU up/down?

Sure, why not?

2018-12-18 11:39:41

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, Dec 18, 2018 at 12:31:19PM +0100, Peter Zijlstra wrote:
> On Tue, Dec 18, 2018 at 12:16:37PM +0100, Sebastian Andrzej Siewior wrote:
> > On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
> > > On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > > > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > > > thus cause the following BUG.
> > >
> > > Hurm, I though we fixed all those long ago..
> > >
> > > And no, the patch is horrible; that's what we have things like
> > > x86_pmu::cpu_dead() for.
> >
> > ehm, you say we keep memory allocation +free on CPU up/down?
>
> Sure, why not?

I suspect the below is all we really need.

---
arch/x86/events/intel/core.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}

static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}

free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};

static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};

2018-12-18 11:40:45

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, Dec 18, 2018 at 12:25:23PM +0100, Thomas Gleixner wrote:
> On Tue, 18 Dec 2018, Peter Zijlstra wrote:
> > On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > > thus cause the following BUG.
> >
> > Hurm, I though we fixed all those long ago..
>
> Either we missed that one or it came back somehow.
>
> > And no, the patch is horrible; that's what we have things like
> > x86_pmu::cpu_dead() for.
>
> Like the below?

Look familiar that ;-)

Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On 2018-12-18 12:37:00 [+0100], Peter Zijlstra wrote:
> On Tue, Dec 18, 2018 at 12:31:19PM +0100, Peter Zijlstra wrote:
> > On Tue, Dec 18, 2018 at 12:16:37PM +0100, Sebastian Andrzej Siewior wrote:
> > > On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
> > > > On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > > > > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > > > > thus cause the following BUG.
> > > >
> > > > Hurm, I though we fixed all those long ago..
> > > >
> > > > And no, the patch is horrible; that's what we have things like
> > > > x86_pmu::cpu_dead() for.
> > >
> > > ehm, you say we keep memory allocation +free on CPU up/down?
> >
> > Sure, why not?

It does not seem to be useful to allocate & free memory which you need
anyway. So you could avoid the refcnt for instance.
Also I doubt the memory will remain unallocated for a longer period of
time (like you would remove the CPU for good and not boot it again a
minute later).
*Maybe* it is different in cloud environment where you attach vcpus
depending on guest load at runtime but still…

> I suspect the below is all we really need.
Zhe, could you please have a look?

Sebastian

2018-12-18 12:37:26

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On Tue, Dec 18, 2018 at 12:47:08PM +0100, Sebastian Andrzej Siewior wrote:
> On 2018-12-18 12:37:00 [+0100], Peter Zijlstra wrote:
> > On Tue, Dec 18, 2018 at 12:31:19PM +0100, Peter Zijlstra wrote:
> > > On Tue, Dec 18, 2018 at 12:16:37PM +0100, Sebastian Andrzej Siewior wrote:
> > > > On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
> > > > > On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
> > > > > > Besides, in preempt-rt full mode, the freeing can happen in atomic context and
> > > > > > thus cause the following BUG.
> > > > >
> > > > > Hurm, I though we fixed all those long ago..
> > > > >
> > > > > And no, the patch is horrible; that's what we have things like
> > > > > x86_pmu::cpu_dead() for.
> > > >
> > > > ehm, you say we keep memory allocation +free on CPU up/down?
> > >
> > > Sure, why not?
>
> It does not seem to be useful to allocate & free memory which you need
> anyway. So you could avoid the refcnt for instance.

It makes the code simpler and hotplug is not what you'd call a fast path
anyway.

> Also I doubt the memory will remain unallocated for a longer period of
> time (like you would remove the CPU for good and not boot it again a
> minute later).

SMT disable comes to mind...

> *Maybe* it is different in cloud environment where you attach vcpus
> depending on guest load at runtime but still…

And that..


2018-12-18 12:51:05

by He Zhe

[permalink] [raw]
Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state



On 2018/12/18 19:47, Sebastian Andrzej Siewior wrote:
> On 2018-12-18 12:37:00 [+0100], Peter Zijlstra wrote:
>> On Tue, Dec 18, 2018 at 12:31:19PM +0100, Peter Zijlstra wrote:
>>> On Tue, Dec 18, 2018 at 12:16:37PM +0100, Sebastian Andrzej Siewior wrote:
>>>> On 2018-12-18 12:02:09 [+0100], Peter Zijlstra wrote:
>>>>> On Tue, Dec 18, 2018 at 06:30:33PM +0800, [email protected] wrote:
>>>>>> Besides, in preempt-rt full mode, the freeing can happen in atomic context and
>>>>>> thus cause the following BUG.
>>>>> Hurm, I though we fixed all those long ago..
>>>>>
>>>>> And no, the patch is horrible; that's what we have things like
>>>>> x86_pmu::cpu_dead() for.
>>>> ehm, you say we keep memory allocation +free on CPU up/down?
>>> Sure, why not?
> It does not seem to be useful to allocate & free memory which you need
> anyway. So you could avoid the refcnt for instance.
> Also I doubt the memory will remain unallocated for a longer period of
> time (like you would remove the CPU for good and not boot it again a
> minute later).
> *Maybe* it is different in cloud environment where you attach vcpus
> depending on guest load at runtime but still…
>
>> I suspect the below is all we really need.
> Zhe, could you please have a look?

This works in my environment, taking CPUs offline and online many times.

Zhe

>
> Sebastian
>


Subject: Re: [PATCH] perf/x86/intel: Avoid unnecessary reallocations of memory allocated in cpu hotplug prepare state

On 2018-12-18 20:45:32 [+0800], He Zhe wrote:
> This works in my environment, taking CPUs offline and online many times.

thank you.

> Zhe
>
> >
Sebastian

Subject: [PATCH] perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu()

From: Peter Zijlstra <[email protected]>

intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other
members of struct cpu_hw_events. This memory is released in
intel_pmu_cpu_dying() which is wrong. The counterpart of the
intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu().

Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE
and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but
allocate new memory on the next attempt to online the CPU (leaking the
old memory).
Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and
CPUHP_PERF_X86_PREPARE then the CPU will go back online but never
allocate the memory that was released in x86_pmu_dying_cpu().

Make the memory allocation/free symmetrical in regard to the CPU hotplug
notifier by moving the deallocation to intel_pmu_cpu_dead().

This started in commit
a7e3ed1e47011 ("perf: Add support for supplementary event registers").

Cc: [email protected]
Reported-by: He Zhe <[email protected]>
Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers").
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
[bigeasy: patch description]
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
---
arch/x86/events/intel/core.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index ecc3e34ca955f..90b6718ff861d 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}

static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}

free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};

static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};
--
2.20.1


Subject: [tip:perf/core] perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu()

Commit-ID: 602cae04c4864bb3487dfe4c2126c8d9e7e1614a
Gitweb: https://git.kernel.org/tip/602cae04c4864bb3487dfe4c2126c8d9e7e1614a
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 19 Dec 2018 17:53:50 +0100
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 4 Feb 2019 08:44:51 +0100

perf/x86/intel: Delay memory deallocation until x86_pmu_dead_cpu()

intel_pmu_cpu_prepare() allocated memory for ->shared_regs among other
members of struct cpu_hw_events. This memory is released in
intel_pmu_cpu_dying() which is wrong. The counterpart of the
intel_pmu_cpu_prepare() callback is x86_pmu_dead_cpu().

Otherwise if the CPU fails on the UP path between CPUHP_PERF_X86_PREPARE
and CPUHP_AP_PERF_X86_STARTING then it won't release the memory but
allocate new memory on the next attempt to online the CPU (leaking the
old memory).
Also, if the CPU down path fails between CPUHP_AP_PERF_X86_STARTING and
CPUHP_PERF_X86_PREPARE then the CPU will go back online but never
allocate the memory that was released in x86_pmu_dying_cpu().

Make the memory allocation/free symmetrical in regard to the CPU hotplug
notifier by moving the deallocation to intel_pmu_cpu_dead().

This started in commit:

a7e3ed1e47011 ("perf: Add support for supplementary event registers").

In principle the bug was introduced in v2.6.39 (!), but it will almost
certainly not backport cleanly across the big CPU hotplug rewrite between v4.7-v4.15...

[ bigeasy: Added patch description. ]
[ mingo: Added backporting guidance. ]

Reported-by: He Zhe <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]> # With developer hat on
Signed-off-by: Sebastian Andrzej Siewior <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]> # With maintainer hat on
Cc: Alexander Shishkin <[email protected]>
Cc: Arnaldo Carvalho de Melo <[email protected]>
Cc: Jiri Olsa <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: <[email protected]>
Fixes: a7e3ed1e47011 ("perf: Add support for supplementary event registers").
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/events/intel/core.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 40e12cfc87f6..daafb893449b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3558,6 +3558,14 @@ static void free_excl_cntrs(int cpu)
}

static void intel_pmu_cpu_dying(int cpu)
+{
+ fini_debug_store_on_cpu(cpu);
+
+ if (x86_pmu.counter_freezing)
+ disable_counter_freeze();
+}
+
+static void intel_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct intel_shared_regs *pc;
@@ -3570,11 +3578,6 @@ static void intel_pmu_cpu_dying(int cpu)
}

free_excl_cntrs(cpu);
-
- fini_debug_store_on_cpu(cpu);
-
- if (x86_pmu.counter_freezing)
- disable_counter_freeze();
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
@@ -3663,6 +3666,7 @@ static __initconst const struct x86_pmu core_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
};

static struct attribute *intel_pmu_attrs[];
@@ -3703,6 +3707,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
+ .cpu_dead = intel_pmu_cpu_dead,
+
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
};