2021-07-12 03:27:22

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: [PATCH 1/3] Drivers: hv: Make portions of Hyper-V init code be arch neutral

The code to allocate and initialize the hv_vp_index array is
architecture neutral. Similarly, the code to allocate and
populate the hypercall input and output arg pages is architecture
neutral. Move both sets of code out from arch/x86 and into
utility functions in drivers/hv/hv_common.c that can be shared
by Hyper-V initialization on ARM64.

No functional changes. However, the allocation of the hypercall
input and output arg pages is done differently so that the
size is always the Hyper-V page size, even if not the same as
the guest page size (such as with ARM64's 64K page size).

Signed-off-by: Michael Kelley <[email protected]>
---
arch/x86/hyperv/hv_init.c | 91 +++-------------------------
arch/x86/include/asm/mshyperv.h | 4 --
arch/x86/kernel/cpu/mshyperv.c | 7 ---
drivers/hv/hv_common.c | 131 ++++++++++++++++++++++++++++++++++++++++
include/asm-generic/mshyperv.h | 10 +++
5 files changed, 151 insertions(+), 92 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6952e21..5cc0c0f 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -39,48 +39,17 @@
/* Storage to save the hypercall page temporarily for hibernation */
static void *hv_hypercall_pg_saved;

-u32 *hv_vp_index;
-EXPORT_SYMBOL_GPL(hv_vp_index);
-
struct hv_vp_assist_page **hv_vp_assist_page;
EXPORT_SYMBOL_GPL(hv_vp_assist_page);

-void __percpu **hyperv_pcpu_input_arg;
-EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
-
-void __percpu **hyperv_pcpu_output_arg;
-EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
-
-u32 hv_max_vp_index;
-EXPORT_SYMBOL_GPL(hv_max_vp_index);
-
static int hv_cpu_init(unsigned int cpu)
{
- u64 msr_vp_index;
struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()];
- void **input_arg;
- struct page *pg;
-
- /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
- pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0);
- if (unlikely(!pg))
- return -ENOMEM;
-
- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- *input_arg = page_address(pg);
- if (hv_root_partition) {
- void **output_arg;
-
- output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *output_arg = page_address(pg + 1);
- }
-
- msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
-
- hv_vp_index[smp_processor_id()] = msr_vp_index;
+ int ret;

- if (msr_vp_index > hv_max_vp_index)
- hv_max_vp_index = msr_vp_index;
+ ret = hv_common_cpu_init(cpu);
+ if (ret)
+ return ret;

if (!hv_vp_assist_page)
return 0;
@@ -198,25 +167,8 @@ static int hv_cpu_die(unsigned int cpu)
{
struct hv_reenlightenment_control re_ctrl;
unsigned int new_cpu;
- unsigned long flags;
- void **input_arg;
- void *pg;

- local_irq_save(flags);
- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- pg = *input_arg;
- *input_arg = NULL;
-
- if (hv_root_partition) {
- void **output_arg;
-
- output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *output_arg = NULL;
- }
-
- local_irq_restore(flags);
-
- free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
+ hv_common_cpu_die(cpu);

if (hv_vp_assist_page && hv_vp_assist_page[cpu])
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
@@ -368,7 +320,7 @@ void __init hyperv_init(void)
{
u64 guest_id, required_msrs;
union hv_x64_msr_hypercall_contents hypercall_msr;
- int cpuhp, i;
+ int cpuhp;

if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
@@ -380,36 +332,14 @@ void __init hyperv_init(void)
if ((ms_hyperv.features & required_msrs) != required_msrs)
return;

- /*
- * Allocate the per-CPU state for the hypercall input arg.
- * If this allocation fails, we will not be able to setup
- * (per-CPU) hypercall input page and thus this failure is
- * fatal on Hyper-V.
- */
- hyperv_pcpu_input_arg = alloc_percpu(void *);
-
- BUG_ON(hyperv_pcpu_input_arg == NULL);
-
- /* Allocate the per-CPU state for output arg for root */
- if (hv_root_partition) {
- hyperv_pcpu_output_arg = alloc_percpu(void *);
- BUG_ON(hyperv_pcpu_output_arg == NULL);
- }
-
- /* Allocate percpu VP index */
- hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
- GFP_KERNEL);
- if (!hv_vp_index)
+ if (hv_common_init())
return;

- for (i = 0; i < num_possible_cpus(); i++)
- hv_vp_index[i] = VP_INVAL;
-
hv_vp_assist_page = kcalloc(num_possible_cpus(),
sizeof(*hv_vp_assist_page), GFP_KERNEL);
if (!hv_vp_assist_page) {
ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
- goto free_vp_index;
+ goto common_free;
}

cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
@@ -507,9 +437,8 @@ void __init hyperv_init(void)
free_vp_assist_page:
kfree(hv_vp_assist_page);
hv_vp_assist_page = NULL;
-free_vp_index:
- kfree(hv_vp_index);
- hv_vp_index = NULL;
+common_free:
+ hv_common_free();
}

/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 67ff0d6..adccbc20 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -36,8 +36,6 @@ static inline u64 hv_get_register(unsigned int reg)
extern int hyperv_init_cpuhp;

extern void *hv_hypercall_pg;
-extern void __percpu **hyperv_pcpu_input_arg;
-extern void __percpu **hyperv_pcpu_output_arg;

extern u64 hv_current_partition_id;

@@ -170,8 +168,6 @@ int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
u64 start_gfn, u64 end_gfn);

-extern bool hv_root_partition;
-
#ifdef CONFIG_X86_64
void hv_apic_init(void);
void __init hv_init_spinlocks(void);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index cc8f177..9bcf417 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -34,13 +34,6 @@
#include <clocksource/hyperv_timer.h>
#include <asm/numa.h>

-/* Is Linux running as the root partition? */
-bool hv_root_partition;
-EXPORT_SYMBOL_GPL(hv_root_partition);
-
-struct ms_hyperv_info ms_hyperv;
-EXPORT_SYMBOL_GPL(ms_hyperv);
-
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 7f42da9..9305850 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -15,10 +15,141 @@
#include <linux/types.h>
#include <linux/export.h>
#include <linux/bitfield.h>
+#include <linux/cpumask.h>
+#include <linux/slab.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>


+/* Is Linux running as the root partition? */
+bool hv_root_partition;
+EXPORT_SYMBOL_GPL(hv_root_partition);
+
+struct ms_hyperv_info ms_hyperv;
+EXPORT_SYMBOL_GPL(ms_hyperv);
+
+u32 *hv_vp_index;
+EXPORT_SYMBOL_GPL(hv_vp_index);
+
+u32 hv_max_vp_index;
+EXPORT_SYMBOL_GPL(hv_max_vp_index);
+
+void __percpu **hyperv_pcpu_input_arg;
+EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
+
+void __percpu **hyperv_pcpu_output_arg;
+EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
+
+/*
+ * Hyper-V specific initialization and shutdown code that is
+ * common across all architectures. Called from architecture
+ * specific initialization functions.
+ */
+
+void __init hv_common_free(void)
+{
+ kfree(hv_vp_index);
+ hv_vp_index = NULL;
+
+ free_percpu(hyperv_pcpu_output_arg);
+ hyperv_pcpu_output_arg = NULL;
+
+ free_percpu(hyperv_pcpu_input_arg);
+ hyperv_pcpu_input_arg = NULL;
+}
+
+int __init hv_common_init(void)
+{
+ int i;
+
+ /*
+ * Allocate the per-CPU state for the hypercall input arg.
+ * If this allocation fails, we will not be able to setup
+ * (per-CPU) hypercall input page and thus this failure is
+ * fatal on Hyper-V.
+ */
+ hyperv_pcpu_input_arg = alloc_percpu(void *);
+ BUG_ON(!hyperv_pcpu_input_arg);
+
+ /* Allocate the per-CPU state for output arg for root */
+ if (hv_root_partition) {
+ hyperv_pcpu_output_arg = alloc_percpu(void *);
+ BUG_ON(!hyperv_pcpu_output_arg);
+ }
+
+ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
+ GFP_KERNEL);
+ if (!hv_vp_index) {
+ hv_common_free();
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < num_possible_cpus(); i++)
+ hv_vp_index[i] = VP_INVAL;
+
+ return 0;
+}
+
+/*
+ * Hyper-V specific initialization and die code for
+ * individual CPUs that is common across all architectures.
+ * Called by the CPU hotplug mechanism.
+ */
+
+int hv_common_cpu_init(unsigned int cpu)
+{
+ void **inputarg, **outputarg;
+ u64 msr_vp_index;
+ gfp_t flags;
+ int pgcount = hv_root_partition ? 2 : 1;
+
+ /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
+ flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
+
+ inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
+ if (!(*inputarg))
+ return -ENOMEM;
+
+ if (hv_root_partition) {
+ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ }
+
+ msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
+
+ hv_vp_index[cpu] = msr_vp_index;
+
+ if (msr_vp_index > hv_max_vp_index)
+ hv_max_vp_index = msr_vp_index;
+
+ return 0;
+}
+
+int hv_common_cpu_die(unsigned int cpu)
+{
+ unsigned long flags;
+ void **inputarg, **outputarg;
+ void *mem;
+
+ local_irq_save(flags);
+
+ inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ mem = *inputarg;
+ *inputarg = NULL;
+
+ if (hv_root_partition) {
+ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *outputarg = NULL;
+ }
+
+ local_irq_restore(flags);
+
+ kfree(mem);
+
+ return 0;
+}
+
/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
bool hv_query_ext_cap(u64 cap_query)
{
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 9a000ba..2a187fe 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -38,6 +38,9 @@ struct ms_hyperv_info {
};
extern struct ms_hyperv_info ms_hyperv;

+extern void __percpu **hyperv_pcpu_input_arg;
+extern void __percpu **hyperv_pcpu_output_arg;
+
extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);

@@ -151,6 +154,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
extern int vmbus_interrupt;
extern int vmbus_irq;

+extern bool hv_root_partition;
+
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Hypervisor's notion of virtual processor ID is different from
@@ -164,6 +169,11 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
/* Sentinel value for an uninitialized entry in hv_vp_index array */
#define VP_INVAL U32_MAX

+int __init hv_common_init(void);
+void __init hv_common_free(void);
+int hv_common_cpu_init(unsigned int cpu);
+int hv_common_cpu_die(unsigned int cpu);
+
void *hv_alloc_hyperv_page(void);
void *hv_alloc_hyperv_zeroed_page(void);
void hv_free_hyperv_page(unsigned long addr);
--
1.8.3.1


2021-07-12 18:27:38

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 1/3] Drivers: hv: Make portions of Hyper-V init code be arch neutral

On Sun, Jul 11, 2021 at 08:25:14PM -0700, Michael Kelley wrote:
[...]
> +int hv_common_cpu_init(unsigned int cpu)
> +{
> + void **inputarg, **outputarg;
> + u64 msr_vp_index;
> + gfp_t flags;
> + int pgcount = hv_root_partition ? 2 : 1;
> +
> + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
> +
> + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);

This is changed from alloc_pages to kmalloc. Does it ensure the
alignment is still correct?

Wei.

2021-07-12 18:42:17

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 1/3] Drivers: hv: Make portions of Hyper-V init code be arch neutral

On Mon, Jul 12, 2021 at 06:24:00PM +0000, Wei Liu wrote:
> On Sun, Jul 11, 2021 at 08:25:14PM -0700, Michael Kelley wrote:
> [...]
> > +int hv_common_cpu_init(unsigned int cpu)
> > +{
> > + void **inputarg, **outputarg;
> > + u64 msr_vp_index;
> > + gfp_t flags;
> > + int pgcount = hv_root_partition ? 2 : 1;
> > +
> > + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> > + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
> > +
> > + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
>
> This is changed from alloc_pages to kmalloc. Does it ensure the
> alignment is still correct?

kmalloc is rather complex and can be backed by either SLUB, SLAB or
SLOB, all of which differ from the others.

I _think_ for large allocations (> 1 native page) they tend to pass the
request on to the page allocator, but still there is a level of
indirection.

If the host page size is 64KiB, while the allocation is only 4KiB or
8KiB, could there be a chance that they become misaligned?

Wei.

>
> Wei.

2021-07-12 18:45:17

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 1/3] Drivers: hv: Make portions of Hyper-V init code be arch neutral

From: Wei Liu <[email protected]> Sent: Monday, July 12, 2021 11:24 AM
>
> On Sun, Jul 11, 2021 at 08:25:14PM -0700, Michael Kelley wrote:
> [...]
> > +int hv_common_cpu_init(unsigned int cpu)
> > +{
> > + void **inputarg, **outputarg;
> > + u64 msr_vp_index;
> > + gfp_t flags;
> > + int pgcount = hv_root_partition ? 2 : 1;
> > +
> > + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> > + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
> > +
> > + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
>
> This is changed from alloc_pages to kmalloc. Does it ensure the
> alignment is still correct?
>
> Wei.

It does. The alignment guarantee made by kmalloc() was changed a
couple of years ago. See commit 59bb47985c1d. Here's the current text
from the comments preceding kmalloc():

* The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
* bytes. For @size of power of two bytes, the alignment is also guaranteed
* to be at least to the size.

Michael

2021-07-12 18:52:41

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH 1/3] Drivers: hv: Make portions of Hyper-V init code be arch neutral

On Mon, Jul 12, 2021 at 06:41:21PM +0000, Michael Kelley wrote:
> From: Wei Liu <[email protected]> Sent: Monday, July 12, 2021 11:24 AM
> >
> > On Sun, Jul 11, 2021 at 08:25:14PM -0700, Michael Kelley wrote:
> > [...]
> > > +int hv_common_cpu_init(unsigned int cpu)
> > > +{
> > > + void **inputarg, **outputarg;
> > > + u64 msr_vp_index;
> > > + gfp_t flags;
> > > + int pgcount = hv_root_partition ? 2 : 1;
> > > +
> > > + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> > > + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
> > > +
> > > + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > > + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
> >
> > This is changed from alloc_pages to kmalloc. Does it ensure the
> > alignment is still correct?
> >
> > Wei.
>
> It does. The alignment guarantee made by kmalloc() was changed a
> couple of years ago. See commit 59bb47985c1d. Here's the current text
> from the comments preceding kmalloc():
>
> * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
> * bytes. For @size of power of two bytes, the alignment is also guaranteed
> * to be at least to the size.

OK. That's good.

Wei.

>
> Michael
>