It is not a good practice to allocate a cpumask on stack, given it may
consume up to 1 kilobytes of stack space if the kernel is configured to
have 8192 cpus.
The internal helper functions __send_ipi_mask{,_ex} need to loop over
the provided mask anyway, so it is not too difficult to skip `self'
there. We can thus do away with the on-stack cpumask in
hv_send_ipi_mask_allbutself.
Adjust call sites of __send_ipi_mask as needed.
Reported-by: Linus Torvalds <[email protected]>
Suggested-by: Michael Kelley <[email protected]>
Suggested-by: Linus Torvalds <[email protected]>
Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments")
Signed-off-by: Wei Liu <[email protected]>
---
arch/x86/hyperv/hv_apic.c | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 90e682a92820..911cd41d931c 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
/*
* IPI implementation on Hyper-V.
*/
-static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
+static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
+ bool exclude_self)
{
struct hv_send_ipi_ex **arg;
struct hv_send_ipi_ex *ipi_arg;
@@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
if (!cpumask_equal(mask, cpu_present_mask)) {
ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
- nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
+ if (exclude_self)
+ nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
+ else
+ nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
}
if (nr_bank < 0)
goto ipi_mask_ex_done;
@@ -138,9 +142,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
return hv_result_success(status);
}
-static bool __send_ipi_mask(const struct cpumask *mask, int vector)
+static bool __send_ipi_mask(const struct cpumask *mask, int vector,
+ bool exclude_self)
{
- int cur_cpu, vcpu;
+ int cur_cpu, vcpu, this_cpu = smp_processor_id();
struct hv_send_ipi ipi_arg;
u64 status;
@@ -172,6 +177,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
ipi_arg.cpu_mask = 0;
for_each_cpu(cur_cpu, mask) {
+ if (exclude_self && cur_cpu == this_cpu)
+ continue;
vcpu = hv_cpu_number_to_vp_number(cur_cpu);
if (vcpu == VP_INVAL)
return false;
@@ -191,7 +198,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
return hv_result_success(status);
do_ex_hypercall:
- return __send_ipi_mask_ex(mask, vector);
+ return __send_ipi_mask_ex(mask, vector, exclude_self);
}
static bool __send_ipi_one(int cpu, int vector)
@@ -208,7 +215,7 @@ static bool __send_ipi_one(int cpu, int vector)
return false;
if (vp >= 64)
- return __send_ipi_mask_ex(cpumask_of(cpu), vector);
+ return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);
status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
return hv_result_success(status);
@@ -222,20 +229,13 @@ static void hv_send_ipi(int cpu, int vector)
static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
{
- if (!__send_ipi_mask(mask, vector))
+ if (!__send_ipi_mask(mask, vector, false))
orig_apic.send_IPI_mask(mask, vector);
}
static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
{
- unsigned int this_cpu = smp_processor_id();
- struct cpumask new_mask;
- const struct cpumask *local_mask;
-
- cpumask_copy(&new_mask, mask);
- cpumask_clear_cpu(this_cpu, &new_mask);
- local_mask = &new_mask;
- if (!__send_ipi_mask(local_mask, vector))
+ if (!__send_ipi_mask(mask, vector, true))
orig_apic.send_IPI_mask_allbutself(mask, vector);
}
@@ -246,7 +246,7 @@ static void hv_send_ipi_allbutself(int vector)
static void hv_send_ipi_all(int vector)
{
- if (!__send_ipi_mask(cpu_online_mask, vector))
+ if (!__send_ipi_mask(cpu_online_mask, vector, false))
orig_apic.send_IPI_all(vector);
}
--
2.30.2
From: Wei Liu <[email protected]> Sent: Wednesday, September 8, 2021 12:43 PM
>
> It is not a good practice to allocate a cpumask on stack, given it may
> consume up to 1 kilobytes of stack space if the kernel is configured to
> have 8192 cpus.
>
> The internal helper functions __send_ipi_mask{,_ex} need to loop over
> the provided mask anyway, so it is not too difficult to skip `self'
> there. We can thus do away with the on-stack cpumask in
> hv_send_ipi_mask_allbutself.
>
> Adjust call sites of __send_ipi_mask as needed.
>
> Reported-by: Linus Torvalds <[email protected]>
> Suggested-by: Michael Kelley <[email protected]>
> Suggested-by: Linus Torvalds <[email protected]>
> Fixes: 68bb7bfb7985d ("X86/Hyper-V: Enable IPI enlightenments")
> Signed-off-by: Wei Liu <[email protected]>
> ---
> arch/x86/hyperv/hv_apic.c | 32 ++++++++++++++++----------------
> 1 file changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
> index 90e682a92820..911cd41d931c 100644
> --- a/arch/x86/hyperv/hv_apic.c
> +++ b/arch/x86/hyperv/hv_apic.c
> @@ -99,7 +99,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
> /*
> * IPI implementation on Hyper-V.
> */
> -static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> struct hv_send_ipi_ex **arg;
> struct hv_send_ipi_ex *ipi_arg;
> @@ -123,7 +124,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
>
> if (!cpumask_equal(mask, cpu_present_mask)) {
> ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
> - nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> + if (exclude_self)
> + nr_bank = cpumask_to_vpset_noself(&(ipi_arg->vp_set), mask);
> + else
> + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask);
> }
> if (nr_bank < 0)
> goto ipi_mask_ex_done;
> @@ -138,9 +142,10 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
> return hv_result_success(status);
> }
>
> -static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> +static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> + bool exclude_self)
> {
> - int cur_cpu, vcpu;
> + int cur_cpu, vcpu, this_cpu = smp_processor_id();
> struct hv_send_ipi ipi_arg;
> u64 status;
>
> @@ -172,6 +177,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> ipi_arg.cpu_mask = 0;
>
> for_each_cpu(cur_cpu, mask) {
> + if (exclude_self && cur_cpu == this_cpu)
> + continue;
> vcpu = hv_cpu_number_to_vp_number(cur_cpu);
> if (vcpu == VP_INVAL)
> return false;
> @@ -191,7 +198,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> return hv_result_success(status);
>
> do_ex_hypercall:
> - return __send_ipi_mask_ex(mask, vector);
> + return __send_ipi_mask_ex(mask, vector, exclude_self);
> }
This all looks correct to me, except for one difference compared with the
current code. In the current code, if the cpumask passed to
hv_send_ipi_mask_allbutself() indicates only a single CPU that is "self",
__send_ipi_mask() will detect that the cpumask is now empty, and
correctly return success without making the hypercall. But
the new code will make the hypercall with an empty input mask (both
in the SEND_IPI and SEND_IPI_EX cases). The Hyper-V TLFS is silent
on whether such a hypercall is a no-op that returns success or is an
error. We'll have a problem if it is an error. I think the safest thing
is to enhance the cpumask_empty() test at the beginning of
__send_ipi_mask() to also detect the case where only a single CPU
is specified, and it is "self". This could be done using cpumask_weight()
and checking for zero as the "empty" case. Then check for "1", and if
exclude_self is set, check if it is the "self" CPU.
The check for VP number >= 64 could also give a false positive since
"self" isn't filtered out yet, but that's OK as all that will happen is using
the _ex version where the non-ex version would have worked.
>
> static bool __send_ipi_one(int cpu, int vector)
> @@ -208,7 +215,7 @@ static bool __send_ipi_one(int cpu, int vector)
> return false;
>
> if (vp >= 64)
> - return __send_ipi_mask_ex(cpumask_of(cpu), vector);
> + return __send_ipi_mask_ex(cpumask_of(cpu), vector, false);
>
> status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
> return hv_result_success(status);
> @@ -222,20 +229,13 @@ static void hv_send_ipi(int cpu, int vector)
>
> static void hv_send_ipi_mask(const struct cpumask *mask, int vector)
> {
> - if (!__send_ipi_mask(mask, vector))
> + if (!__send_ipi_mask(mask, vector, false))
> orig_apic.send_IPI_mask(mask, vector);
> }
>
> static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> {
> - unsigned int this_cpu = smp_processor_id();
> - struct cpumask new_mask;
> - const struct cpumask *local_mask;
> -
> - cpumask_copy(&new_mask, mask);
> - cpumask_clear_cpu(this_cpu, &new_mask);
> - local_mask = &new_mask;
> - if (!__send_ipi_mask(local_mask, vector))
> + if (!__send_ipi_mask(mask, vector, true))
> orig_apic.send_IPI_mask_allbutself(mask, vector);
> }
>
> @@ -246,7 +246,7 @@ static void hv_send_ipi_allbutself(int vector)
>
> static void hv_send_ipi_all(int vector)
> {
> - if (!__send_ipi_mask(cpu_online_mask, vector))
> + if (!__send_ipi_mask(cpu_online_mask, vector, false))
> orig_apic.send_IPI_all(vector);
> }
>
> --
> 2.30.2
On Fri, Sep 10, 2021 at 05:25:15PM +0000, Michael Kelley wrote:
> From: Wei Liu <[email protected]> Sent: Wednesday, September 8, 2021 12:43 PM
> >
[...]
> > -static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > +static bool __send_ipi_mask(const struct cpumask *mask, int vector,
> > + bool exclude_self)
> > {
> > - int cur_cpu, vcpu;
> > + int cur_cpu, vcpu, this_cpu = smp_processor_id();
> > struct hv_send_ipi ipi_arg;
> > u64 status;
> >
> > @@ -172,6 +177,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > ipi_arg.cpu_mask = 0;
> >
> > for_each_cpu(cur_cpu, mask) {
> > + if (exclude_self && cur_cpu == this_cpu)
> > + continue;
> > vcpu = hv_cpu_number_to_vp_number(cur_cpu);
> > if (vcpu == VP_INVAL)
> > return false;
> > @@ -191,7 +198,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > return hv_result_success(status);
> >
> > do_ex_hypercall:
> > - return __send_ipi_mask_ex(mask, vector);
> > + return __send_ipi_mask_ex(mask, vector, exclude_self);
> > }
>
> This all looks correct to me, except for one difference compared with the
> current code. In the current code, if the cpumask passed to
> hv_send_ipi_mask_allbutself() indicates only a single CPU that is "self",
> __send_ipi_mask() will detect that the cpumask is now empty, and
> correctly return success without making the hypercall. But
> the new code will make the hypercall with an empty input mask (both
> in the SEND_IPI and SEND_IPI_EX cases). The Hyper-V TLFS is silent
> on whether such a hypercall is a no-op that returns success or is an
> error. We'll have a problem if it is an error. I think the safest thing
> is to enhance the cpumask_empty() test at the beginning of
> __send_ipi_mask() to also detect the case where only a single CPU
> is specified, and it is "self". This could be done using cpumask_weight()
> and checking for zero as the "empty" case. Then check for "1", and if
> exclude_self is set, check if it is the "self" CPU.
Sure. Making this change should not be too difficult.
Wei.