On 11/29/2017 12:21 AM, Juergen Gross wrote:
> On 28/11/17 20:34, Maran Wilson wrote:
>> For certain applications it is desirable to rapidly boot a KVM virtual
>> machine. In cases where legacy hardware and software support within the
>> guest is not needed, Qemu should be able to boot directly into the
>> uncompressed Linux kernel binary without the need to run firmware.
>>
>> There already exists an ABI to allow this for Xen PVH guests and the ABI is
>> supported by Linux and FreeBSD:
>>
>> https://xenbits.xen.org/docs/unstable/misc/hvmlite.html
>>
>> This PoC patch enables Qemu to use that same entry point for booting KVM
>> guests.
>>
>> Even though the code is still PoC quality, I'm sending this as an RFC now
>> since there are a number of different ways the specific implementation
>> details can be handled. I chose a shared code path for Xen and KVM guests
>> but could just as easily create a separate code path that is advertised by
>> a different ELF note for KVM. There also seems to be some flexibility in
>> how the e820 table data is passed and how (or if) it should be identified
>> as e820 data. As a starting point, I've chosen the options that seem to
>> result in the smallest patch with minimal to no changes required of the
>> x86/HVM direct boot ABI.
> I like the idea.
>
> I'd rather split up the different hypervisor types early and use a
> common set of service functions instead of special casing xen_guest
> everywhere. This would make it much easier to support the KVM PVH
> boot without the need to configure the kernel with CONFIG_XEN.
Thanks for the feedback. I'll try doing something like that as this
patch moves from proof of concept to a real proposal.
> Another option would be to use the same boot path as with grub: set
> the boot params in zeropage and start at startup_32.
I think others have already responded about that. The main thing I was
trying to avoid, was adding any Linux OS specific initialization (like
zeropage) to QEMU. Especially since this PVH entry point already exists
in Linux.
Thanks,
-Maran
>
> Juergen
>
>> ---
>> arch/x86/xen/enlighten_pvh.c | 74 ++++++++++++++++++++++++++++++++------------
>> 1 file changed, 55 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
>> index 98ab176..d93f711 100644
>> --- a/arch/x86/xen/enlighten_pvh.c
>> +++ b/arch/x86/xen/enlighten_pvh.c
>> @@ -31,21 +31,46 @@ static void xen_pvh_arch_setup(void)
>> acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM;
>> }
>>
>> -static void __init init_pvh_bootparams(void)
>> +static void __init init_pvh_bootparams(bool xen_guest)
>> {
>> struct xen_memory_map memmap;
>> int rc;
>>
>> memset(&pvh_bootparams, 0, sizeof(pvh_bootparams));
>>
>> - memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> - set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> - if (rc) {
>> - xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> - BUG();
>> + if (xen_guest) {
>> + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_table);
>> + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_table);
>> + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
>> + if (rc) {
>> + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc);
>> + BUG();
>> + }
>> + pvh_bootparams.e820_entries = memmap.nr_entries;
>> + } else if (pvh_start_info.nr_modules > 1) {
>> + /* The second module should be the e820 data for KVM guests */
>> + struct hvm_modlist_entry *modaddr;
>> + char e820_sig[] = "e820 data";
>> + struct boot_e820_entry *ep;
>> + struct e820_table *tp;
>> + char *cmdline_str;
>> + int idx;
>> +
>> + modaddr = __va(pvh_start_info.modlist_paddr +
>> + sizeof(struct hvm_modlist_entry));
>> + cmdline_str = __va(modaddr->cmdline_paddr);
>> +
>> + if ((modaddr->cmdline_paddr) &&
>> + (!strncmp(e820_sig, cmdline_str, sizeof(e820_sig)))) {
>> + tp = __va(modaddr->paddr);
>> + ep = (struct boot_e820_entry *)tp->entries;
>> +
>> + pvh_bootparams.e820_entries = tp->nr_entries;
>> +
>> + for (idx = 0; idx < tp->nr_entries ; idx++, ep++)
>> + pvh_bootparams.e820_table[idx] = *ep;
>> + }
>> }
>> - pvh_bootparams.e820_entries = memmap.nr_entries;
>>
>> if (pvh_bootparams.e820_entries < E820_MAX_ENTRIES_ZEROPAGE - 1) {
>> pvh_bootparams.e820_table[pvh_bootparams.e820_entries].addr =
>> @@ -55,8 +80,9 @@ static void __init init_pvh_bootparams(void)
>> pvh_bootparams.e820_table[pvh_bootparams.e820_entries].type =
>> E820_TYPE_RESERVED;
>> pvh_bootparams.e820_entries++;
>> - } else
>> + } else if (xen_guest) {
>> xen_raw_printk("Warning: Can fit ISA range into e820\n");
>> + }
>>
>> pvh_bootparams.hdr.cmd_line_ptr =
>> pvh_start_info.cmdline_paddr;
>> @@ -76,7 +102,7 @@ static void __init init_pvh_bootparams(void)
>> * environment (i.e. hardware_subarch 0).
>> */
>> pvh_bootparams.hdr.version = 0x212;
>> - pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */
>> + pvh_bootparams.hdr.type_of_loader = ((xen_guest ? 0x9 : 0xb) << 4) | 0;
>> }
>>
>> /*
>> @@ -85,22 +111,32 @@ static void __init init_pvh_bootparams(void)
>> */
>> void __init xen_prepare_pvh(void)
>> {
>> - u32 msr;
>> +
>> + u32 msr = xen_cpuid_base();
>> u64 pfn;
>> + bool xen_guest = msr ? true : false;
>>
>> if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) {
>> - xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> - pvh_start_info.magic);
>> + if (xen_guest)
>> + xen_raw_printk("Error: Unexpected magic value (0x%08x)\n",
>> + pvh_start_info.magic);
>> BUG();
>> }
>>
>> - xen_pvh = 1;
>> + if (xen_guest) {
>> + xen_pvh = 1;
>> +
>> + msr = cpuid_ebx(msr + 2);
>> + pfn = __pa(hypercall_page);
>> + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> +
>> + } else if (!hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0)) {
>> + BUG();
>> + }
>>
>> - msr = cpuid_ebx(xen_cpuid_base() + 2);
>> - pfn = __pa(hypercall_page);
>> - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
>> + init_pvh_bootparams(xen_guest);
>>
>> - init_pvh_bootparams();
>> + if (xen_guest)
>> + x86_init.oem.arch_setup = xen_pvh_arch_setup;
>>
>> - x86_init.oem.arch_setup = xen_pvh_arch_setup;
>> }
>>
From 1585425576225448234@xxx Wed Nov 29 18:20:52 +0000 2017
X-GM-THRID: 1585339868639162159
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread