by Jiqian Chen

[permalink] [raw]

Subject: Re: [RFC KERNEL PATCH v2 2/3] xen/pvh: Unmask irq for passthrough device in PVH dom0

(Adding Juergen to the "To" list.)

Hi Juergen,
Looking forward to your opinions.

On 2023/12/7 10:18, Stefano Stabellini wrote:
> On Tue, 5 Dec 2023, Chen, Jiqian wrote:
>> When PVH dom0 enable a device, it will get trigger and polarity from ACPI (see acpi_pci_irq_enable)
>> I have a version of patch which tried that way, see below:
>
> This approach looks much better. I think this patch is OKish. Juergen,
> what do you think?
>
>
>> diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
>> index ada3868c02c2..43e1bda9f946 100644
>> --- a/arch/x86/xen/enlighten_pvh.c
>> +++ b/arch/x86/xen/enlighten_pvh.c
>> @@ -1,6 +1,7 @@
>> // SPDX-License-Identifier: GPL-2.0
>> #include <linux/acpi.h>
>> #include <linux/export.h>
>> +#include <linux/pci.h>
>>
>> #include <xen/hvc-console.h>
>>
>> @@ -25,6 +26,127 @@
>> bool __ro_after_init xen_pvh;
>> EXPORT_SYMBOL_GPL(xen_pvh);
>>
>> +typedef struct gsi_info {
>> + int gsi;
>> + int trigger;
>> + int polarity;
>> + int pirq;
>> +} gsi_info_t;
>> +
>> +struct acpi_prt_entry {
>> + struct acpi_pci_id id;
>> + u8 pin;
>> + acpi_handle link;
>> + u32 index; /* GSI, or link _CRS index */
>> +};
>> +
>> +static int xen_pvh_get_gsi_info(struct pci_dev *dev,
>> + gsi_info_t *gsi_info)
>> +{
>> + int gsi;
>> + u8 pin = 0;
>> + struct acpi_prt_entry *entry;
>> + int trigger = ACPI_LEVEL_SENSITIVE;
>> + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
>> + ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
>> +
>> + if (dev)
>> + pin = dev->pin;
>> + if (!pin) {
>> + xen_raw_printk("No interrupt pin configured\n");
>> + return -EINVAL;
>> + }
>> +
>> + entry = acpi_pci_irq_lookup(dev, pin);
>> + if (entry) {
>> + if (entry->link)
>> + gsi = acpi_pci_link_allocate_irq(entry->link,
>> + entry->index,
>> + &trigger, &polarity,
>> + NULL);
>> + else
>> + gsi = entry->index;
>> + } else
>> + return -EINVAL;
>> +
>> + gsi_info->gsi = gsi;
>> + gsi_info->trigger = trigger;
>> + gsi_info->polarity = polarity;
>> +
>> + return 0;
>> +}
>> +
>> +static int xen_pvh_map_pirq(gsi_info_t *gsi_info)
>> +{
>> + struct physdev_map_pirq map_irq;
>> + int ret;
>> +
>> + map_irq.domid = DOMID_SELF;
>> + map_irq.type = MAP_PIRQ_TYPE_GSI;
>> + map_irq.index = gsi_info->gsi;
>> + map_irq.pirq = gsi_info->gsi;
>> +
>> + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
>> + gsi_info->pirq = map_irq.pirq;
>> +
>> + return ret;
>> +}
>> +
>> +static int xen_pvh_unmap_pirq(gsi_info_t *gsi_info)
>> +{
>> + struct physdev_unmap_pirq unmap_irq;
>> +
>> + unmap_irq.domid = DOMID_SELF;
>> + unmap_irq.pirq = gsi_info->pirq;
>> +
>> + return HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
>> +}
>> +
>> +static int xen_pvh_setup_gsi(gsi_info_t *gsi_info)
>> +{
>> + struct physdev_setup_gsi setup_gsi;
>> +
>> + setup_gsi.gsi = gsi_info->gsi;
>> + setup_gsi.triggering = (gsi_info->trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
>> + setup_gsi.polarity = (gsi_info->polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
>> +
>> + return HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
>> +}
>> +
>> +int xen_pvh_passthrough_gsi(struct pci_dev *dev)
>> +{
>> + int ret;
>> + gsi_info_t gsi_info;
>> +
>> + if (!dev) {
>> + return -EINVAL;
>> + }
>> +
>> + ret = xen_pvh_get_gsi_info(dev, &gsi_info);
>> + if (ret) {
>> + xen_raw_printk("Fail to get gsi info!\n");
>> + return ret;
>> + }
>> +
>> + ret = xen_pvh_map_pirq(&gsi_info);
>> + if (ret) {
>> + xen_raw_printk("Fail to map pirq for gsi (%d)!\n", gsi_info.gsi);
>> + return ret;
>> + }
>> +
>> + ret = xen_pvh_setup_gsi(&gsi_info);
>> + if (ret == -EEXIST) {
>> + ret = 0;
>> + xen_raw_printk("Already setup the GSI :%u\n", gsi_info.gsi);
>> + } else if (ret) {
>> + xen_raw_printk("Fail to setup gsi (%d)!\n", gsi_info.gsi);
>> + xen_pvh_unmap_pirq(&gsi_info);
>> + }
>> +
>> + return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(xen_pvh_passthrough_gsi);
>> +
>> void __init xen_pvh_init(struct boot_params *boot_params)
>> {
>> u32 msr;
>> diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
>> index ff30ceca2203..630fe0a34bc6 100644
>> --- a/drivers/acpi/pci_irq.c
>> +++ b/drivers/acpi/pci_irq.c
>> @@ -288,7 +288,7 @@ static int acpi_reroute_boot_interrupt(struct pci_dev *dev,
>> }
>> #endif /* CONFIG_X86_IO_APIC */
>>
>> -static struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
>> +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin)
>> {
>> struct acpi_prt_entry *entry = NULL;
>> struct pci_dev *bridge;
>> diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
>> index e34b623e4b41..1abd4dad6f40 100644
>> --- a/drivers/xen/xen-pciback/pci_stub.c
>> +++ b/drivers/xen/xen-pciback/pci_stub.c
>> @@ -20,6 +20,7 @@
>> #include <linux/atomic.h>
>> #include <xen/events.h>
>> #include <xen/pci.h>
>> +#include <xen/acpi.h>
>> #include <xen/xen.h>
>> #include <asm/xen/hypervisor.h>
>> #include <xen/interface/physdev.h>
>> @@ -399,6 +400,12 @@ static int pcistub_init_device(struct pci_dev *dev)
>> if (err)
>> goto config_release;
>>
>> + if (xen_initial_domain() && xen_pvh_domain()) {
>> + err = xen_pvh_passthrough_gsi(dev);
>> + if (err)
>> + goto config_release;
>> + }
>> +
>> if (dev->msix_cap) {
>> struct physdev_pci_device ppdev = {
>> .seg = pci_domain_nr(dev->bus),
>> diff --git a/include/linux/acpi.h b/include/linux/acpi.h
>> index 641dc4843987..368d56ba2c5e 100644
>> --- a/include/linux/acpi.h
>> +++ b/include/linux/acpi.h
>> @@ -375,6 +375,7 @@ void acpi_unregister_gsi (u32 gsi);
>>
>> struct pci_dev;
>>
>> +struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
>> int acpi_pci_irq_enable (struct pci_dev *dev);
>> void acpi_penalize_isa_irq(int irq, int active);
>> bool acpi_isa_irq_available(int irq);
>> diff --git a/include/xen/acpi.h b/include/xen/acpi.h
>> index b1e11863144d..ce7f5554f88e 100644
>> --- a/include/xen/acpi.h
>> +++ b/include/xen/acpi.h
>> @@ -67,6 +67,7 @@ static inline void xen_acpi_sleep_register(void)
>> acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
>> }
>> }
>> +int xen_pvh_passthrough_gsi(struct pci_dev *dev);
>> #else
>> static inline void xen_acpi_sleep_register(void)
>> {
>>
>>>
>>> Jan
>>
>> --
>> Best regards,
>> Jiqian Chen.
>>

--
Best regards,
Jiqian Chen.

2023-12-07 06:44:08

On 2023/12/12 19:39, Roger Pau Monné wrote:
> On Tue, Dec 12, 2023 at 12:19:49PM +0100, Jan Beulich wrote:
>> On 12.12.2023 12:18, Roger Pau Monné wrote:
>>> On Tue, Dec 12, 2023 at 10:38:08AM +0100, Jan Beulich wrote:
>>>> (I think the Cc list is too long here, but then I don't know who to
>>>> keep and who to possibly drop.)
>>>>
>>>> On 12.12.2023 09:49, Roger Pau Monné wrote:
>>>>> On Tue, Dec 12, 2023 at 06:16:43AM +0000, Chen, Jiqian wrote:
>>>>>> On 2023/12/11 23:45, Roger Pau Monné wrote:
>>>>>>> On Wed, Dec 06, 2023 at 06:07:26AM +0000, Chen, Jiqian wrote:
>>>>>>>> +static int xen_pvh_setup_gsi(gsi_info_t *gsi_info)
>>>>>>>> +{
>>>>>>>> + struct physdev_setup_gsi setup_gsi;
>>>>>>>> +
>>>>>>>> + setup_gsi.gsi = gsi_info->gsi;
>>>>>>>> + setup_gsi.triggering = (gsi_info->trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
>>>>>>>> + setup_gsi.polarity = (gsi_info->polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
>>>>>>>> +
>>>>>>>> + return HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
>>>>>>>> +}
>>>>>>>
>>>>>>> Hm, why not simply call pcibios_enable_device() from pciback? What
>>>>>> pcibios_enable_device had been called when using cmd "xl pci-assignable-add sbdf" from pciback. But it didn't do map_pirq and setup_gsi.
>>>>>> Because pcibios_enable_device-> pcibios_enable_irq-> __acpi_register_gsi(acpi_register_gsi_ioapic PVH specific)
>>>>>>> you are doing here using the hypercalls is a backdoor into what's done
>>>>>>> automatically by Xen on IO-APIC accesses by a PVH dom0.
>>>>>> But the gsi didn't be unmasked, and vioapic_hwdom_map_gsi is never called.
>>>>>> So, I think in pciback, if we can do what vioapic_hwdom_map_gsi does.
>>>>>>
>>>>>
>>>>> I see, it does setup the IO-APIC pin but doesn't unmask it, that's
>>>>> what I feared.
>>>>>
>>>>>>> It will be much more natural for the PVH dom0 model to simply use the
>>>>>>> native way to configure and unmask the IO-APIC pin, and that would
>>>>>>> correctly setup the triggering/polarity and bind it to dom0 without
>>>>>>> requiring the usage of any hypercalls.
>>>>>> Do you still prefer that I called unmask_irq in pcistub_init_device, as this v2 patch do?
>>>>>> But Thomas Gleixner think it is not suitable to export unmask_irq.
>>>>>
>>>>> Yeah, that wasn't good.
>>>>>
>>>>>>>
>>>>>>> Is that an issue since in that case the gsi will get mapped and bound
>>>>>>> to dom0?
>>>>>> Dom0 do map_pirq is to pass the check xc_domain_irq_permission()-> pirq_access_permitted(),
>>>>>
>>>>> Can we see about finding another way to fix this check?
>>>>>
>>>>> One option would be granting permissions over the IRQ in
>>>>> PHYSDEVOP_setup_gsi?
>>>>
>>>> There's no domain available there, and imo it's also the wrong interface to
>>>> possibly grant any permissions.
>>>
>>> Well, the domain is the caller.
>>
>> Granting permission to itself?
>
> See below in the previous email, the issue is not with the
> permissions, which are correctly assigned from
> dom0_setup_permissions(), but the usage of domain_pirq_to_irq() in
> pirq_access_permitted() as called by XEN_DOMCTL_irq_permission.
> There's no need to play with the permissions at all.
Yes, the problem is pci_add_dm_done-> xc_domain_irq_permission-> XEN_DOMCTL_irq_permission-> pirq_access_permitted->domain_pirq_to_irq->return irq is 0, so it failed.
I am think that since the PVH doesn't use pirq, can we just skip this irq_permission check for PVH?

>
> Regards, Roger.

--
Best regards,
Jiqian Chen.

2023-12-13 07:41:25

by Jan Beulich

[permalink] [raw]

Subject: Re: [RFC KERNEL PATCH v2 2/3] xen/pvh: Unmask irq for passthrough device in PVH dom0

On 13.12.2023 08:14, Chen, Jiqian wrote:
> On 2023/12/12 19:39, Roger Pau Monné wrote:
>> On Tue, Dec 12, 2023 at 12:19:49PM +0100, Jan Beulich wrote:
>>> On 12.12.2023 12:18, Roger Pau Monné wrote:
>>>> On Tue, Dec 12, 2023 at 10:38:08AM +0100, Jan Beulich wrote:
>>>>> (I think the Cc list is too long here, but then I don't know who to
>>>>> keep and who to possibly drop.)
>>>>>
>>>>> On 12.12.2023 09:49, Roger Pau Monné wrote:
>>>>>> On Tue, Dec 12, 2023 at 06:16:43AM +0000, Chen, Jiqian wrote:
>>>>>>> On 2023/12/11 23:45, Roger Pau Monné wrote:
>>>>>>>> On Wed, Dec 06, 2023 at 06:07:26AM +0000, Chen, Jiqian wrote:
>>>>>>>>> +static int xen_pvh_setup_gsi(gsi_info_t *gsi_info)
>>>>>>>>> +{
>>>>>>>>> + struct physdev_setup_gsi setup_gsi;
>>>>>>>>> +
>>>>>>>>> + setup_gsi.gsi = gsi_info->gsi;
>>>>>>>>> + setup_gsi.triggering = (gsi_info->trigger == ACPI_EDGE_SENSITIVE ? 0 : 1);
>>>>>>>>> + setup_gsi.polarity = (gsi_info->polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
>>>>>>>>> +
>>>>>>>>> + return HYPERVISOR_physdev_op(PHYSDEVOP_setup_gsi, &setup_gsi);
>>>>>>>>> +}
>>>>>>>>
>>>>>>>> Hm, why not simply call pcibios_enable_device() from pciback? What
>>>>>>> pcibios_enable_device had been called when using cmd "xl pci-assignable-add sbdf" from pciback. But it didn't do map_pirq and setup_gsi.
>>>>>>> Because pcibios_enable_device-> pcibios_enable_irq-> __acpi_register_gsi(acpi_register_gsi_ioapic PVH specific)
>>>>>>>> you are doing here using the hypercalls is a backdoor into what's done
>>>>>>>> automatically by Xen on IO-APIC accesses by a PVH dom0.
>>>>>>> But the gsi didn't be unmasked, and vioapic_hwdom_map_gsi is never called.
>>>>>>> So, I think in pciback, if we can do what vioapic_hwdom_map_gsi does.
>>>>>>>
>>>>>>
>>>>>> I see, it does setup the IO-APIC pin but doesn't unmask it, that's
>>>>>> what I feared.
>>>>>>
>>>>>>>> It will be much more natural for the PVH dom0 model to simply use the
>>>>>>>> native way to configure and unmask the IO-APIC pin, and that would
>>>>>>>> correctly setup the triggering/polarity and bind it to dom0 without
>>>>>>>> requiring the usage of any hypercalls.
>>>>>>> Do you still prefer that I called unmask_irq in pcistub_init_device, as this v2 patch do?
>>>>>>> But Thomas Gleixner think it is not suitable to export unmask_irq.
>>>>>>
>>>>>> Yeah, that wasn't good.
>>>>>>
>>>>>>>>
>>>>>>>> Is that an issue since in that case the gsi will get mapped and bound
>>>>>>>> to dom0?
>>>>>>> Dom0 do map_pirq is to pass the check xc_domain_irq_permission()-> pirq_access_permitted(),
>>>>>>
>>>>>> Can we see about finding another way to fix this check?
>>>>>>
>>>>>> One option would be granting permissions over the IRQ in
>>>>>> PHYSDEVOP_setup_gsi?
>>>>>
>>>>> There's no domain available there, and imo it's also the wrong interface to
>>>>> possibly grant any permissions.
>>>>
>>>> Well, the domain is the caller.
>>>
>>> Granting permission to itself?
>>
>> See below in the previous email, the issue is not with the
>> permissions, which are correctly assigned from
>> dom0_setup_permissions(), but the usage of domain_pirq_to_irq() in
>> pirq_access_permitted() as called by XEN_DOMCTL_irq_permission.
>> There's no need to play with the permissions at all.
> Yes, the problem is pci_add_dm_done-> xc_domain_irq_permission-> XEN_DOMCTL_irq_permission-> pirq_access_permitted->domain_pirq_to_irq->return irq is 0, so it failed.
> I am think that since the PVH doesn't use pirq, can we just skip this irq_permission check for PVH?

If not the pIRQ, then the real IRQ would need checking for permissions.
You won't get away without any checking at all, I'm afraid.

Jan