2021-01-20 12:59:55

by Wei Liu

[permalink] [raw]
Subject: [PATCH v5 06/16] x86/hyperv: allocate output arg pages if required

When Linux runs as the root partition, it will need to make hypercalls
which return data from the hypervisor.

Allocate pages for storing results when Linux runs as the root
partition.

Signed-off-by: Lillian Grassin-Drake <[email protected]>
Co-Developed-by: Lillian Grassin-Drake <[email protected]>
Signed-off-by: Wei Liu <[email protected]>
---
v3: Fix hv_cpu_die to use free_pages.
v2: Address Vitaly's comments
---
arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++-----
arch/x86/include/asm/mshyperv.h | 1 +
2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index e04d90af4c27..6f4cb40e53fe 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
void __percpu **hyperv_pcpu_input_arg;
EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);

+void __percpu **hyperv_pcpu_output_arg;
+EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
+
u32 hv_max_vp_index;
EXPORT_SYMBOL_GPL(hv_max_vp_index);

@@ -73,12 +76,19 @@ static int hv_cpu_init(unsigned int cpu)
void **input_arg;
struct page *pg;

- input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
/* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
- pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
+ pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0);
if (unlikely(!pg))
return -ENOMEM;
+
+ input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
*input_arg = page_address(pg);
+ if (hv_root_partition) {
+ void **output_arg;
+
+ output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *output_arg = page_address(pg + 1);
+ }

hv_get_vp_index(msr_vp_index);

@@ -205,14 +215,23 @@ static int hv_cpu_die(unsigned int cpu)
unsigned int new_cpu;
unsigned long flags;
void **input_arg;
- void *input_pg = NULL;
+ void *pg;

local_irq_save(flags);
input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- input_pg = *input_arg;
+ pg = *input_arg;
*input_arg = NULL;
+
+ if (hv_root_partition) {
+ void **output_arg;
+
+ output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *output_arg = NULL;
+ }
+
local_irq_restore(flags);
- free_page((unsigned long)input_pg);
+
+ free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);

if (hv_vp_assist_page && hv_vp_assist_page[cpu])
wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
@@ -346,6 +365,12 @@ void __init hyperv_init(void)

BUG_ON(hyperv_pcpu_input_arg == NULL);

+ /* Allocate the per-CPU state for output arg for root */
+ if (hv_root_partition) {
+ hyperv_pcpu_output_arg = alloc_percpu(void *);
+ BUG_ON(hyperv_pcpu_output_arg == NULL);
+ }
+
/* Allocate percpu VP index */
hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
GFP_KERNEL);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index ac2b0d110f03..62d9390f1ddf 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -76,6 +76,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV)
extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg;
+extern void __percpu **hyperv_pcpu_output_arg;

static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
--
2.20.1


2021-01-20 15:17:06

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v5 06/16] x86/hyperv: allocate output arg pages if required

Hi Wei,

I love your patch! Perhaps something to improve:

[auto build test WARNING on e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62]

url: https://github.com/0day-ci/linux/commits/Wei-Liu/Introducing-Linux-root-partition-support-for-Microsoft-Hypervisor/20210120-215640
base: e71ba9452f0b5b2e8dc8aa5445198cd9214a6a62
config: x86_64-randconfig-s021-20210120 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-208-g46a52ca4-dirty
# https://github.com/0day-ci/linux/commit/f93337fc44e13a1506633f5d308bf74a8311dada
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Wei-Liu/Introducing-Linux-root-partition-support-for-Microsoft-Hypervisor/20210120-215640
git checkout f93337fc44e13a1506633f5d308bf74a8311dada
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>


"sparse warnings: (new ones prefixed by >>)"
arch/x86/hyperv/hv_init.c:84:30: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got void [noderef] __percpu ** @@
arch/x86/hyperv/hv_init.c:84:30: sparse: expected void const [noderef] __percpu *__vpp_verify
arch/x86/hyperv/hv_init.c:84:30: sparse: got void [noderef] __percpu **
arch/x86/hyperv/hv_init.c:89:39: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got void [noderef] __percpu ** @@
arch/x86/hyperv/hv_init.c:89:39: sparse: expected void const [noderef] __percpu *__vpp_verify
arch/x86/hyperv/hv_init.c:89:39: sparse: got void [noderef] __percpu **
arch/x86/hyperv/hv_init.c:221:30: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got void [noderef] __percpu ** @@
arch/x86/hyperv/hv_init.c:221:30: sparse: expected void const [noderef] __percpu *__vpp_verify
arch/x86/hyperv/hv_init.c:221:30: sparse: got void [noderef] __percpu **
arch/x86/hyperv/hv_init.c:228:39: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got void [noderef] __percpu ** @@
arch/x86/hyperv/hv_init.c:228:39: sparse: expected void const [noderef] __percpu *__vpp_verify
arch/x86/hyperv/hv_init.c:228:39: sparse: got void [noderef] __percpu **
arch/x86/hyperv/hv_init.c:364:31: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void [noderef] __percpu **extern [addressable] [toplevel] hyperv_pcpu_input_arg @@ got void *[noderef] __percpu * @@
arch/x86/hyperv/hv_init.c:364:31: sparse: expected void [noderef] __percpu **extern [addressable] [toplevel] hyperv_pcpu_input_arg
arch/x86/hyperv/hv_init.c:364:31: sparse: got void *[noderef] __percpu *
>> arch/x86/hyperv/hv_init.c:370:40: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void [noderef] __percpu **extern [addressable] [toplevel] hyperv_pcpu_output_arg @@ got void *[noderef] __percpu * @@
arch/x86/hyperv/hv_init.c:370:40: sparse: expected void [noderef] __percpu **extern [addressable] [toplevel] hyperv_pcpu_output_arg
arch/x86/hyperv/hv_init.c:370:40: sparse: got void *[noderef] __percpu *

vim +370 arch/x86/hyperv/hv_init.c

211
212 static int hv_cpu_die(unsigned int cpu)
213 {
214 struct hv_reenlightenment_control re_ctrl;
215 unsigned int new_cpu;
216 unsigned long flags;
217 void **input_arg;
218 void *pg;
219
220 local_irq_save(flags);
> 221 input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
222 pg = *input_arg;
223 *input_arg = NULL;
224
225 if (hv_root_partition) {
226 void **output_arg;
227
228 output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
229 *output_arg = NULL;
230 }
231
232 local_irq_restore(flags);
233
234 free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
235
236 if (hv_vp_assist_page && hv_vp_assist_page[cpu])
237 wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
238
239 if (hv_reenlightenment_cb == NULL)
240 return 0;
241
242 rdmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
243 if (re_ctrl.target_vp == hv_vp_index[cpu]) {
244 /*
245 * Reassign reenlightenment notifications to some other online
246 * CPU or just disable the feature if there are no online CPUs
247 * left (happens on hibernation).
248 */
249 new_cpu = cpumask_any_but(cpu_online_mask, cpu);
250
251 if (new_cpu < nr_cpu_ids)
252 re_ctrl.target_vp = hv_vp_index[new_cpu];
253 else
254 re_ctrl.enabled = 0;
255
256 wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl));
257 }
258
259 return 0;
260 }
261
262 static int __init hv_pci_init(void)
263 {
264 int gen2vm = efi_enabled(EFI_BOOT);
265
266 /*
267 * For Generation-2 VM, we exit from pci_arch_init() by returning 0.
268 * The purpose is to suppress the harmless warning:
269 * "PCI: Fatal: No config space access function found"
270 */
271 if (gen2vm)
272 return 0;
273
274 /* For Generation-1 VM, we'll proceed in pci_arch_init(). */
275 return 1;
276 }
277
278 static int hv_suspend(void)
279 {
280 union hv_x64_msr_hypercall_contents hypercall_msr;
281 int ret;
282
283 /*
284 * Reset the hypercall page as it is going to be invalidated
285 * accross hibernation. Setting hv_hypercall_pg to NULL ensures
286 * that any subsequent hypercall operation fails safely instead of
287 * crashing due to an access of an invalid page. The hypercall page
288 * pointer is restored on resume.
289 */
290 hv_hypercall_pg_saved = hv_hypercall_pg;
291 hv_hypercall_pg = NULL;
292
293 /* Disable the hypercall page in the hypervisor */
294 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
295 hypercall_msr.enable = 0;
296 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
297
298 ret = hv_cpu_die(0);
299 return ret;
300 }
301
302 static void hv_resume(void)
303 {
304 union hv_x64_msr_hypercall_contents hypercall_msr;
305 int ret;
306
307 ret = hv_cpu_init(0);
308 WARN_ON(ret);
309
310 /* Re-enable the hypercall page */
311 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
312 hypercall_msr.enable = 1;
313 hypercall_msr.guest_physical_address =
314 vmalloc_to_pfn(hv_hypercall_pg_saved);
315 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
316
317 hv_hypercall_pg = hv_hypercall_pg_saved;
318 hv_hypercall_pg_saved = NULL;
319
320 /*
321 * Reenlightenment notifications are disabled by hv_cpu_die(0),
322 * reenable them here if hv_reenlightenment_cb was previously set.
323 */
324 if (hv_reenlightenment_cb)
325 set_hv_tscchange_cb(hv_reenlightenment_cb);
326 }
327
328 /* Note: when the ops are called, only CPU0 is online and IRQs are disabled. */
329 static struct syscore_ops hv_syscore_ops = {
330 .suspend = hv_suspend,
331 .resume = hv_resume,
332 };
333
334 /*
335 * This function is to be invoked early in the boot sequence after the
336 * hypervisor has been detected.
337 *
338 * 1. Setup the hypercall page.
339 * 2. Register Hyper-V specific clocksource.
340 * 3. Setup Hyper-V specific APIC entry points.
341 */
342 void __init hyperv_init(void)
343 {
344 u64 guest_id, required_msrs;
345 union hv_x64_msr_hypercall_contents hypercall_msr;
346 int cpuhp, i;
347
348 if (x86_hyper_type != X86_HYPER_MS_HYPERV)
349 return;
350
351 /* Absolutely required MSRs */
352 required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
353 HV_MSR_VP_INDEX_AVAILABLE;
354
355 if ((ms_hyperv.features & required_msrs) != required_msrs)
356 return;
357
358 /*
359 * Allocate the per-CPU state for the hypercall input arg.
360 * If this allocation fails, we will not be able to setup
361 * (per-CPU) hypercall input page and thus this failure is
362 * fatal on Hyper-V.
363 */
364 hyperv_pcpu_input_arg = alloc_percpu(void *);
365
366 BUG_ON(hyperv_pcpu_input_arg == NULL);
367
368 /* Allocate the per-CPU state for output arg for root */
369 if (hv_root_partition) {
> 370 hyperv_pcpu_output_arg = alloc_percpu(void *);
371 BUG_ON(hyperv_pcpu_output_arg == NULL);
372 }
373
374 /* Allocate percpu VP index */
375 hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
376 GFP_KERNEL);
377 if (!hv_vp_index)
378 return;
379
380 for (i = 0; i < num_possible_cpus(); i++)
381 hv_vp_index[i] = VP_INVAL;
382
383 hv_vp_assist_page = kcalloc(num_possible_cpus(),
384 sizeof(*hv_vp_assist_page), GFP_KERNEL);
385 if (!hv_vp_assist_page) {
386 ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
387 goto free_vp_index;
388 }
389
390 cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
391 hv_cpu_init, hv_cpu_die);
392 if (cpuhp < 0)
393 goto free_vp_assist_page;
394
395 /*
396 * Setup the hypercall page and enable hypercalls.
397 * 1. Register the guest ID
398 * 2. Enable the hypercall and register the hypercall page
399 */
400 guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0);
401 wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id);
402
403 hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START,
404 VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX,
405 VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
406 __builtin_return_address(0));
407 if (hv_hypercall_pg == NULL) {
408 wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0);
409 goto remove_cpuhp_state;
410 }
411
412 rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
413 hypercall_msr.enable = 1;
414 hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
415 wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
416
417 /*
418 * Ignore any errors in setting up stimer clockevents
419 * as we can run with the LAPIC timer as a fallback.
420 */
421 (void)hv_stimer_alloc();
422
423 hv_apic_init();
424
425 x86_init.pci.arch_init = hv_pci_init;
426
427 register_syscore_ops(&hv_syscore_ops);
428
429 return;
430
431 remove_cpuhp_state:
432 cpuhp_remove_state(cpuhp);
433 free_vp_assist_page:
434 kfree(hv_vp_assist_page);
435 hv_vp_assist_page = NULL;
436 free_vp_index:
437 kfree(hv_vp_index);
438 hv_vp_index = NULL;
439 }
440

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (11.39 kB)
.config.gz (36.07 kB)
Download all attachments

2021-01-20 19:56:17

by Pasha Tatashin

[permalink] [raw]
Subject: Re: [PATCH v5 06/16] x86/hyperv: allocate output arg pages if required

On Wed, Jan 20, 2021 at 7:01 AM Wei Liu <[email protected]> wrote:
>
> When Linux runs as the root partition, it will need to make hypercalls
> which return data from the hypervisor.
>
> Allocate pages for storing results when Linux runs as the root
> partition.
>
> Signed-off-by: Lillian Grassin-Drake <[email protected]>
> Co-Developed-by: Lillian Grassin-Drake <[email protected]>
> Signed-off-by: Wei Liu <[email protected]>

Reviewed-by: Pavel Tatashin <[email protected]>

The new warnings reported by the robot are the same as for the input argument.

Pasha

> ---
> v3: Fix hv_cpu_die to use free_pages.
> v2: Address Vitaly's comments
> ---
> arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++-----
> arch/x86/include/asm/mshyperv.h | 1 +
> 2 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index e04d90af4c27..6f4cb40e53fe 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
> void __percpu **hyperv_pcpu_input_arg;
> EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
>
> +void __percpu **hyperv_pcpu_output_arg;
> +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
> +
> u32 hv_max_vp_index;
> EXPORT_SYMBOL_GPL(hv_max_vp_index);
>
> @@ -73,12 +76,19 @@ static int hv_cpu_init(unsigned int cpu)
> void **input_arg;
> struct page *pg;
>
> - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
> + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0);
> if (unlikely(!pg))
> return -ENOMEM;
> +
> + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> *input_arg = page_address(pg);
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = page_address(pg + 1);
> + }
>
> hv_get_vp_index(msr_vp_index);
>
> @@ -205,14 +215,23 @@ static int hv_cpu_die(unsigned int cpu)
> unsigned int new_cpu;
> unsigned long flags;
> void **input_arg;
> - void *input_pg = NULL;
> + void *pg;
>
> local_irq_save(flags);
> input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> - input_pg = *input_arg;
> + pg = *input_arg;
> *input_arg = NULL;
> +
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = NULL;
> + }
> +
> local_irq_restore(flags);
> - free_page((unsigned long)input_pg);
> +
> + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
>
> if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> @@ -346,6 +365,12 @@ void __init hyperv_init(void)
>
> BUG_ON(hyperv_pcpu_input_arg == NULL);
>
> + /* Allocate the per-CPU state for output arg for root */
> + if (hv_root_partition) {
> + hyperv_pcpu_output_arg = alloc_percpu(void *);
> + BUG_ON(hyperv_pcpu_output_arg == NULL);
> + }
> +
> /* Allocate percpu VP index */
> hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
> GFP_KERNEL);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index ac2b0d110f03..62d9390f1ddf 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -76,6 +76,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
> #if IS_ENABLED(CONFIG_HYPERV)
> extern void *hv_hypercall_pg;
> extern void __percpu **hyperv_pcpu_input_arg;
> +extern void __percpu **hyperv_pcpu_output_arg;
>
> static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> {
> --
> 2.20.1
>

2021-01-26 10:37:16

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH v5 06/16] x86/hyperv: allocate output arg pages if required

From: Wei Liu <[email protected]> Sent: Wednesday, January 20, 2021 4:01 AM
>
> When Linux runs as the root partition, it will need to make hypercalls
> which return data from the hypervisor.
>
> Allocate pages for storing results when Linux runs as the root
> partition.
>
> Signed-off-by: Lillian Grassin-Drake <[email protected]>
> Co-Developed-by: Lillian Grassin-Drake <[email protected]>
> Signed-off-by: Wei Liu <[email protected]>
> ---
> v3: Fix hv_cpu_die to use free_pages.
> v2: Address Vitaly's comments
> ---
> arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++-----
> arch/x86/include/asm/mshyperv.h | 1 +
> 2 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index e04d90af4c27..6f4cb40e53fe 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
> void __percpu **hyperv_pcpu_input_arg;
> EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
>
> +void __percpu **hyperv_pcpu_output_arg;
> +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
> +
> u32 hv_max_vp_index;
> EXPORT_SYMBOL_GPL(hv_max_vp_index);
>
> @@ -73,12 +76,19 @@ static int hv_cpu_init(unsigned int cpu)
> void **input_arg;
> struct page *pg;
>
> - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
> + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ?
> 1 : 0);
> if (unlikely(!pg))
> return -ENOMEM;
> +
> + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> *input_arg = page_address(pg);
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = page_address(pg + 1);
> + }
>
> hv_get_vp_index(msr_vp_index);
>
> @@ -205,14 +215,23 @@ static int hv_cpu_die(unsigned int cpu)
> unsigned int new_cpu;
> unsigned long flags;
> void **input_arg;
> - void *input_pg = NULL;
> + void *pg;
>
> local_irq_save(flags);
> input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> - input_pg = *input_arg;
> + pg = *input_arg;
> *input_arg = NULL;
> +
> + if (hv_root_partition) {
> + void **output_arg;
> +
> + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> + *output_arg = NULL;
> + }
> +
> local_irq_restore(flags);
> - free_page((unsigned long)input_pg);
> +
> + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
>
> if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> @@ -346,6 +365,12 @@ void __init hyperv_init(void)
>
> BUG_ON(hyperv_pcpu_input_arg == NULL);
>
> + /* Allocate the per-CPU state for output arg for root */
> + if (hv_root_partition) {
> + hyperv_pcpu_output_arg = alloc_percpu(void *);
> + BUG_ON(hyperv_pcpu_output_arg == NULL);
> + }
> +
> /* Allocate percpu VP index */
> hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
> GFP_KERNEL);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index ac2b0d110f03..62d9390f1ddf 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -76,6 +76,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
> #if IS_ENABLED(CONFIG_HYPERV)
> extern void *hv_hypercall_pg;
> extern void __percpu **hyperv_pcpu_input_arg;
> +extern void __percpu **hyperv_pcpu_output_arg;
>
> static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> {
> --
> 2.20.1

I think this all works OK. But a meta question: Do we need a separate
per-cpu output argument page? From the Hyper-V hypercall standpoint, I
don't think input and output args need to be in separate pages. They both
just need to not cross a page boundary. As long as we don't have a hypercall
where the sum of the sizes of the input and output args exceeds a page,
we could just have a single page, and split it up in any manner that works
for the particular hypercall.

Thoughts?

Michael


2021-01-26 20:48:48

by Wei Liu

[permalink] [raw]
Subject: Re: [PATCH v5 06/16] x86/hyperv: allocate output arg pages if required

On Tue, Jan 26, 2021 at 12:41:05AM +0000, Michael Kelley wrote:
> From: Wei Liu <[email protected]> Sent: Wednesday, January 20, 2021 4:01 AM
> >
> > When Linux runs as the root partition, it will need to make hypercalls
> > which return data from the hypervisor.
> >
> > Allocate pages for storing results when Linux runs as the root
> > partition.
> >
> > Signed-off-by: Lillian Grassin-Drake <[email protected]>
> > Co-Developed-by: Lillian Grassin-Drake <[email protected]>
> > Signed-off-by: Wei Liu <[email protected]>
> > ---
> > v3: Fix hv_cpu_die to use free_pages.
> > v2: Address Vitaly's comments
> > ---
> > arch/x86/hyperv/hv_init.c | 35 ++++++++++++++++++++++++++++-----
> > arch/x86/include/asm/mshyperv.h | 1 +
> > 2 files changed, 31 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> > index e04d90af4c27..6f4cb40e53fe 100644
> > --- a/arch/x86/hyperv/hv_init.c
> > +++ b/arch/x86/hyperv/hv_init.c
> > @@ -41,6 +41,9 @@ EXPORT_SYMBOL_GPL(hv_vp_assist_page);
> > void __percpu **hyperv_pcpu_input_arg;
> > EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg);
> >
> > +void __percpu **hyperv_pcpu_output_arg;
> > +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg);
> > +
> > u32 hv_max_vp_index;
> > EXPORT_SYMBOL_GPL(hv_max_vp_index);
> >
> > @@ -73,12 +76,19 @@ static int hv_cpu_init(unsigned int cpu)
> > void **input_arg;
> > struct page *pg;
> >
> > - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */
> > - pg = alloc_page(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL);
> > + pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ?
> > 1 : 0);
> > if (unlikely(!pg))
> > return -ENOMEM;
> > +
> > + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > *input_arg = page_address(pg);
> > + if (hv_root_partition) {
> > + void **output_arg;
> > +
> > + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> > + *output_arg = page_address(pg + 1);
> > + }
> >
> > hv_get_vp_index(msr_vp_index);
> >
> > @@ -205,14 +215,23 @@ static int hv_cpu_die(unsigned int cpu)
> > unsigned int new_cpu;
> > unsigned long flags;
> > void **input_arg;
> > - void *input_pg = NULL;
> > + void *pg;
> >
> > local_irq_save(flags);
> > input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
> > - input_pg = *input_arg;
> > + pg = *input_arg;
> > *input_arg = NULL;
> > +
> > + if (hv_root_partition) {
> > + void **output_arg;
> > +
> > + output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
> > + *output_arg = NULL;
> > + }
> > +
> > local_irq_restore(flags);
> > - free_page((unsigned long)input_pg);
> > +
> > + free_pages((unsigned long)pg, hv_root_partition ? 1 : 0);
> >
> > if (hv_vp_assist_page && hv_vp_assist_page[cpu])
> > wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0);
> > @@ -346,6 +365,12 @@ void __init hyperv_init(void)
> >
> > BUG_ON(hyperv_pcpu_input_arg == NULL);
> >
> > + /* Allocate the per-CPU state for output arg for root */
> > + if (hv_root_partition) {
> > + hyperv_pcpu_output_arg = alloc_percpu(void *);
> > + BUG_ON(hyperv_pcpu_output_arg == NULL);
> > + }
> > +
> > /* Allocate percpu VP index */
> > hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index),
> > GFP_KERNEL);
> > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> > index ac2b0d110f03..62d9390f1ddf 100644
> > --- a/arch/x86/include/asm/mshyperv.h
> > +++ b/arch/x86/include/asm/mshyperv.h
> > @@ -76,6 +76,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
> > #if IS_ENABLED(CONFIG_HYPERV)
> > extern void *hv_hypercall_pg;
> > extern void __percpu **hyperv_pcpu_input_arg;
> > +extern void __percpu **hyperv_pcpu_output_arg;
> >
> > static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> > {
> > --
> > 2.20.1
>
> I think this all works OK. But a meta question: Do we need a separate
> per-cpu output argument page? From the Hyper-V hypercall standpoint, I
> don't think input and output args need to be in separate pages. They both

That's correct. They don't have to be in separate pages.

> just need to not cross a page boundary. As long as we don't have a hypercall
> where the sum of the sizes of the input and output args exceeds a page,
> we could just have a single page, and split it up in any manner that works
> for the particular hypercall.
>

There is one more requirement: The pointers must be 8-byte aligned. That
means we may need to explicitly pad things a bit. That quickly becomes
tedious if we do it in every call site; or we will need to provide a
macro to do the calculation correctly.

Another consideration is hypercalls that take variable-length input /
output. Admittedly I haven't seen one that takes variable-length
arguments and needs to do input and output at the same time, I wouldn't
want to paint ourselves into the corner now because sizing
variable-length input and output at the same time can be non-trivial.

Wei.

> Thoughts?
>
> Michael
>
>