2021-09-10 18:59:21

by Wei Liu

[permalink] [raw]
Subject: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

It is not a good practice to allocate a cpumask on stack, given it may
consume up to 1 kilobytes of stack space if the kernel is configured to
have 8192 cpus.

The internal helper functions __send_ipi_mask{,_ex} need to loop over
the provided mask anyway, so it is not too difficult to skip `self'
there. We can thus do away with the on-stack cpumask in
hv_send_ipi_mask_allbutself.

Adjust call sites of __send_ipi_mask as needed.

Reported-by: Linus Torvalds <[email protected]>
Suggested-by: Michael Kelley <[email protected]>
Suggested-by: Linus Torvalds <[email protected]>
Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments")
Signed-off-by: Wei Liu <[email protected]>
---

v2: more robust check in __send_ipi_mask
---
arch/x86/hyperv/hv_apic.c | 43 +++++++++++++++++++++++----------------
1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 90e682a92820..48aefcea724b 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
/*
* IPI implementation on Hyper-V.
*/
-static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
+static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
+ bool exclude_self)
{
struct hv_send_ipi_ex **arg;
struct hv_send_ipi_ex *ipi_arg;
@@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)

if (!cpumask_equal(mask, cpu_present_mask)) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
+ if (exclude_self)
+ nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
+ else
+ nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
}
if (nr_bank < 0)
goto ipi_mask_ex_done;
@@ -138,15 +142,25 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
return hv_result_success(status);
}

-static bool __send_ipi_mask(const struct cpumask *mask, int vector)
+static bool __send_ipi_mask(const struct cpumask *mask, int vector,
+ bool exclude_self)
{
- int cur_cpu, vcpu;
+ int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
u64 status;
+ unsigned int weight;

trace_hyperv_send_ipi_mask(mask, vector);

- if (cpumask_empty(mask))
+ weight = cpumask_weight(mask);
+
+ /*
+ * Do nothing if
+ * 1. the mask is empty
+ * 2. the mask only contains self when exclude_self is true
+ */
+ if (weight == 0 ||
+ (exclude_self && weight == 1 && cpumask_first(mask) == this_cpu))
return true;

if (!hv_hypercall_pg)
@@ -172,6 +186,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
ipi_arg.cpu_mask = 0;

for_each_cpu(cur_cpu, mask) {
+ if (exclude_self && cur_cpu == this_cpu)
+ continue;
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
if (vcpu == VP_INVAL)
return false;
@@ -191,7 +207,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
return hv_result_success(status);

do_ex_hypercall:
- return __send_ipi_mask_ex(mask, vector);
+ return __send_ipi_mask_ex(mask, vector, exclude_self);
}

static bool __send_ipi_one(int cpu, int vector)
@@ -208,7 +224,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;

if (vp >= 64)
- return __send_ipi_mask_ex(cpumask_of(cpu), vector);
+ return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);

status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
return hv_result_success(status);
@@ -222,20 +238,13 @@ static void hv_send_ipi(int cpu, int vector)

static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
{
- if (!__send_ipi_mask(mask, vector))
+ if (!__send_ipi_mask(mask, vector, false))
orig_apic.send_IPI_mask(mask, vector);
}

static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
- unsigned int this_cpu = smp_processor_id();
- struct cpumask new_mask;
- const struct cpumask *local_mask;
-
- cpumask_copy(&new_mask, mask);
- cpumask_clear_cpu(this_cpu, &new_mask);
- local_mask = &new_mask;
- if (!__send_ipi_mask(local_mask, vector))
+ if (!__send_ipi_mask(mask, vector, true))
orig_apic.send_IPI_mask_allbutself(mask, vector);
}

@@ -246,7 +255,7 @@ static void hv_send_ipi_allbutself(int vector)

static void hv_send_ipi_all(int vector)
{
- if (!__send_ipi_mask(cpu_online_mask, vector))
+ if (!__send_ipi_mask(cpu_online_mask, vector, false))
orig_apic.send_IPI_all(vector);
}

--
2.30.2


2021-09-11 15:13:29

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

From: Wei Liu <[email protected]> Sent: Friday, September 10, 2021 11:57 AM
>
> It is not a good practice to allocate a cpumask on stack, given it may
> consume up to 1 kilobytes of stack space if the kernel is configured to
> have 8192 cpus.
>
> The internal helper functions __send_ipi_mask{,_ex} need to loop over
> the provided mask anyway, so it is not too difficult to skip `self'
> there. We can thus do away with the on-stack cpumask in
> hv_send_ipi_mask_allbutself.
>
> Adjust call sites of __send_ipi_mask as needed.
>
> Reported-by: Linus Torvalds <[email protected]>
> Suggested-by: Michael Kelley <[email protected]>
> Suggested-by: Linus Torvalds <[email protected]>
> Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments")
> Signed-off-by: Wei Liu <[email protected]>
> ---
>
> v2: more robust check in __send_ipi_mask
> ---
> arch/x86/hyperv/hv_apic.c | 43 +++++++++++++++++++++++----------------
> 1 file changed, 26 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 90e682a92820..48aefcea724b 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
> /*
> * IPI implementation on Hyper-V.
> */
> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> struct hv_send_ipi_ex **arg;
> struct hv_send_ipi_ex *ipi_arg;
> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>
> if (!cpumask_equal(mask, cpu_present_mask)) {
> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> + if (exclude_self)
> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> + else
> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> }
> if (nr_bank < 0)
> goto ipi_mask_ex_done;
> @@ -138,15 +142,25 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> return hv_result_success(status);
> }
>
> -static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> - int cur_cpu, vcpu;
> + int cur_cpu, vcpu, this_cpu = smp_processor_id();
> struct hv_send_ipi ipi_arg;
> u64 status;
> + unsigned int weight;
>
> trace_hyperv_send_ipi_mask(mask, vector);
>
> - if (cpumask_empty(mask))
> + weight = cpumask_weight(mask);
> +
> + /*
> + * Do nothing if
> + * 1. the mask is empty
> + * 2. the mask only contains self when exclude_self is true
> + */
> + if (weight == 0 ||
> + (exclude_self && weight == 1 && cpumask_first(mask) == this_cpu))

Nit: cpumask_test_cpu(this_cpu, mask) would seem to be a better fit for this
use case than cpumask_first(). But either works.

> return true;
>
> if (!hv_hypercall_pg)
> @@ -172,6 +186,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> ipi_arg.cpu_mask = 0;
>
> for_each_cpu(cur_cpu, mask) {
> + if (exclude_self && cur_cpu == this_cpu)
> + continue;
> vcpu = hv_cpu_number_to_vp_number(cur_cpu);
> if (vcpu == VP_INVAL)
> return false;
> @@ -191,7 +207,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> return hv_result_success(status);
>
> do_ex_hypercall:
> - return __send_ipi_mask_ex(mask, vector);
> + return __send_ipi_mask_ex(mask, vector, exclude_self);
> }
>
> static bool __send_ipi_one(int cpu, int vector)
> @@ -208,7 +224,7 @@ static bool __send_ipi_one(int cpu, int vector)
> return false;
>
> if (vp >= 64)
> - return __send_ipi_mask_ex(cpumask_of(cpu), vector);
> + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);
>
> status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
> return hv_result_success(status);
> @@ -222,20 +238,13 @@ static void hv_send_ipi(int cpu, int vector)
>
> static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
> {
> - if (!__send_ipi_mask(mask, vector))
> + if (!__send_ipi_mask(mask, vector, false))
> orig_apic.send_IPI_mask(mask, vector);
> }
>
> static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> {
> - unsigned int this_cpu = smp_processor_id();
> - struct cpumask new_mask;
> - const struct cpumask *local_mask;
> -
> - cpumask_copy(&new_mask, mask);
> - cpumask_clear_cpu(this_cpu, &new_mask);
> - local_mask = &new_mask;
> - if (!__send_ipi_mask(local_mask, vector))
> + if (!__send_ipi_mask(mask, vector, true))
> orig_apic.send_IPI_mask_allbutself(mask, vector);
> }
>
> @@ -246,7 +255,7 @@ static void hv_send_ipi_allbutself(int vector)
>
> static void hv_send_ipi_all(int vector)
> {
> - if (!__send_ipi_mask(cpu_online_mask, vector))
> + if (!__send_ipi_mask(cpu_online_mask, vector, false))
> orig_apic.send_IPI_all(vector);
> }
>
> --
> 2.30.2

Reviewed-by: Michael Kelley <[email protected]>

2021-09-11 15:26:58

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

On Sat, Sep 11, 2021 at 03:09:50PM +0000, Michael Kelley wrote:
> From: Wei Liu <[email protected]> Sent: Friday, September 10, 2021 11:57 AM
[...]
> > -static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > +static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> > + bool exclude_self)
> > {
> > - int cur_cpu, vcpu;
> > + int cur_cpu, vcpu, this_cpu = smp_processor_id();
> > struct hv_send_ipi ipi_arg;
> > u64 status;
> > + unsigned int weight;
> >
> > trace_hyperv_send_ipi_mask(mask, vector);
> >
> > - if (cpumask_empty(mask))
> > + weight = cpumask_weight(mask);
> > +
> > + /*
> > + * Do nothing if
> > + * 1. the mask is empty
> > + * 2. the mask only contains self when exclude_self is true
> > + */
> > + if (weight == 0 ||
> > + (exclude_self && weight == 1 && cpumask_first(mask) == this_cpu))
>
> Nit: cpumask_test_cpu(this_cpu, mask) would seem to be a better fit for this
> use case than cpumask_first(). But either works.

I will adjust the code when I commit this patch.

Wei.

2021-09-26 22:07:59

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

Wei!

On Fri, Sep 10 2021 at 18:57, Wei Liu wrote:
> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> struct hv_send_ipi_ex **arg;
> struct hv_send_ipi_ex *ipi_arg;
> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>
> if (!cpumask_equal(mask, cpu_present_mask)) {

Not part of that patch, but is checking cpu_present_mask correct here?
If so then this really lacks a comment for the casual reader.

> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> + if (exclude_self)
> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> + else
> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> }

But, what happens in the case that mask == cpu_present_mask and
exclude_self == true?

AFAICT it ends up sending the IPI to all CPUs including self:

if (!nr_bank)
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;

Not entirely correct, right?

Thanks,

tglx

2021-10-05 12:55:57

by Vitaly Kuznetsov

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

Thomas Gleixner <[email protected]> writes:

> Wei!
>

Not Wei here but I don't see the question answered on the mailing list
so let me give my thoughts.

> On Fri, Sep 10 2021 at 18:57, Wei Liu wrote:
>> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
>> + bool exclude_self)
>> {
>> struct hv_send_ipi_ex **arg;
>> struct hv_send_ipi_ex *ipi_arg;
>> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>>
>> if (!cpumask_equal(mask, cpu_present_mask)) {
>
> Not part of that patch, but is checking cpu_present_mask correct here?
> If so then this really lacks a comment for the casual reader.

It seems it *was* correct prior to 'exclude_self': the idea is that for
everything but 'cpu_present_mask' we use HV_GENERIC_SET_SPARSE_4K
format, for 'cpu_present_mask' we just use 'all' (HV_GENERIC_SET_ALL)
to avoid specifying individual CPUs.

>
>> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
>> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
>> + if (exclude_self)
>> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
>> + else
>> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
>> }
>
> But, what happens in the case that mask == cpu_present_mask and
> exclude_self == true?
>
> AFAICT it ends up sending the IPI to all CPUs including self:
>
> if (!nr_bank)
> ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
>
> Not entirely correct, right?

It's not, I think we need something like (completely untested)

diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 32a1ad356c18..80b7660208e4 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -122,17 +122,17 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
ipi_arg->reserved = 0;
ipi_arg->vp_set.valid_bank_mask = 0;

- if (!cpumask_equal(mask, cpu_present_mask)) {
+ if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
if (exclude_self)
nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
else
nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
- }
- if (nr_bank < 0)
- goto ipi_mask_ex_done;
- if (!nr_bank)
+ if (nr_bank =< 0)
+ goto ipi_mask_ex_done;
+ } else {
ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
+ }

status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
ipi_arg, NULL);

here. Wei, I can test and send this out if you're not on it already.

--
Vitaly

2021-10-06 11:40:34

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH v2 2/2] x86/hyperv: remove on-stack cpumask from hv_send_ipi_mask_allbutself

Hi Thomas and Vitaly

Sorry for the late reply. I was buried in my other work.

On Tue, Oct 05, 2021 at 02:53:29PM +0200, Vitaly Kuznetsov wrote:
> Thomas Gleixner <[email protected]> writes:
>
> > Wei!
> >
>
> Not Wei here but I don't see the question answered on the mailing list
> so let me give my thoughts.
>
> > On Fri, Sep 10 2021 at 18:57, Wei Liu wrote:
> >> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> >> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> >> + bool exclude_self)
> >> {
> >> struct hv_send_ipi_ex **arg;
> >> struct hv_send_ipi_ex *ipi_arg;
> >> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> >>
> >> if (!cpumask_equal(mask, cpu_present_mask)) {
> >
> > Not part of that patch, but is checking cpu_present_mask correct here?
> > If so then this really lacks a comment for the casual reader.
>
> It seems it *was* correct prior to 'exclude_self': the idea is that for
> everything but 'cpu_present_mask' we use HV_GENERIC_SET_SPARSE_4K
> format, for 'cpu_present_mask' we just use 'all' (HV_GENERIC_SET_ALL)
> to avoid specifying individual CPUs.

Yes, that's the intent.

It was correct before because cpumask would have been filtered to
exclude "self" when it came to this function.

>
> >
> >> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> >> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> >> + if (exclude_self)
> >> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> >> + else
> >> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> >> }
> >
> > But, what happens in the case that mask == cpu_present_mask and
> > exclude_self == true?
> >
> > AFAICT it ends up sending the IPI to all CPUs including self:
> >
> > if (!nr_bank)
> > ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
> >
> > Not entirely correct, right?
>
> It's not, I think we need something like (completely untested)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 32a1ad356c18..80b7660208e4 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -122,17 +122,17 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> ipi_arg->reserved = 0;
> ipi_arg->vp_set.valid_bank_mask = 0;
>
> - if (!cpumask_equal(mask, cpu_present_mask)) {
> + if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> if (exclude_self)
> nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> else
> nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> - }
> - if (nr_bank < 0)
> - goto ipi_mask_ex_done;
> - if (!nr_bank)
> + if (nr_bank =< 0)
> + goto ipi_mask_ex_done;
> + } else {
> ipi_arg->vp_set.format = HV_GENERIC_SET_ALL;
> + }
>
> status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
> ipi_arg, NULL);
>
> here. Wei, I can test and send this out if you're not on it already.
>

Please turn this into a patch and send it out. Thank you so much for
looking into it.

Wei.

> --
> Vitaly
>