2022-11-21 16:12:24

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V2 13/33] x86/apic/vector: Provide MSI parent domain

Enable MSI parent domain support in the x86 vector domain and fixup the
checks in the iommu implementations to check whether device::msi::domain is
the default MSI parent domain. That keeps the existing logic to protect
e.g. devices behind VMD working.

The interrupt remap PCI/MSI code still works because the underlying vector
domain still provides the same functionality.

None of the other x86 PCI/MSI, e.g. XEN and HyperV, implementations are
affected either. They still work the same way both at the low level and the
PCI/MSI implementations they provide.

Signed-off-by: Thomas Gleixner <[email protected]>
---
V2: Fix kernel doc (robot)
---
arch/x86/include/asm/msi.h | 6 +
arch/x86/include/asm/pci.h | 1
arch/x86/kernel/apic/msi.c | 176 ++++++++++++++++++++++++++----------
drivers/iommu/amd/iommu.c | 2
drivers/iommu/intel/irq_remapping.c | 2
5 files changed, 138 insertions(+), 49 deletions(-)

--- a/arch/x86/include/asm/msi.h
+++ b/arch/x86/include/asm/msi.h
@@ -62,4 +62,10 @@ typedef struct x86_msi_addr_hi {
struct msi_msg;
u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid);

+#define X86_VECTOR_MSI_FLAGS_SUPPORTED \
+ (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
+
+#define X86_VECTOR_MSI_FLAGS_REQUIRED \
+ (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
+
#endif /* _ASM_X86_MSI_H */
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -92,6 +92,7 @@ void pcibios_scan_root(int bus);
struct irq_routing_table *pcibios_get_irq_routing_table(void);
int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);

+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev);

#define HAVE_PCI_MMAP
#define arch_can_pci_mmap_wc() pat_enabled()
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -142,67 +142,131 @@ msi_set_affinity(struct irq_data *irqd,
return ret;
}

-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
+/**
+ * pci_dev_has_default_msi_parent_domain - Check whether the device has the default
+ * MSI parent domain associated
+ * @dev: Pointer to the PCI device
*/
-static struct irq_chip pci_msi_controller = {
- .name = "PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_ack = irq_chip_ack_parent,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
- .irq_set_affinity = msi_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE |
- IRQCHIP_AFFINITY_PRE_STARTUP,
-};
+bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(&dev->dev);

-int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
- msi_alloc_info_t *arg)
+ if (!domain)
+ domain = dev_get_msi_domain(&dev->bus->dev);
+ if (!domain)
+ return false;
+
+ return domain == x86_vector_domain;
+}
+
+/**
+ * x86_msi_prepare - Setup of msi_alloc_info_t for allocations
+ * @domain: The domain for which this setup happens
+ * @dev: The device for which interrupts are allocated
+ * @nvec: The number of vectors to allocate
+ * @alloc: The allocation info structure to initialize
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. It is always invoked from the
+ * top level interrupt domain. The domain specific allocation
+ * functionality is determined via the @domain's bus token which allows to
+ * map the X86 specific allocation type.
+ */
+static int x86_msi_prepare(struct irq_domain *domain, struct device *dev,
+ int nvec, msi_alloc_info_t *alloc)
{
- init_irq_alloc_info(arg, NULL);
- if (to_pci_dev(dev)->msix_enabled)
- arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
- else
- arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ struct msi_domain_info *info = domain->host_data;

- return 0;
+ init_irq_alloc_info(alloc, NULL);
+
+ switch (info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ return 0;
+ default:
+ return -EINVAL;
+ }
}
-EXPORT_SYMBOL_GPL(pci_msi_prepare);

-static struct msi_domain_ops pci_msi_domain_ops = {
- .msi_prepare = pci_msi_prepare,
-};
+/**
+ * x86_init_dev_msi_info - Domain info setup for MSI domains
+ * @dev: The device for which the domain should be created
+ * @domain: The (root) domain providing this callback
+ * @real_parent: The real parent domain of the to initialize domain
+ * @info: The domain info for the to initialize domain
+ *
+ * This function is to be used for all types of MSI domains above the x86
+ * vector domain and any intermediates. The domain specific functionality
+ * is determined via the @real_parent.
+ */
+static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+
+ /* MSI parent domain specific settings */
+ switch (real_parent->bus_token) {
+ case DOMAIN_BUS_ANY:
+ /* Only the vector domain can have the ANY token */
+ if (WARN_ON_ONCE(domain != real_parent))
+ return false;
+ info->chip->irq_set_affinity = msi_set_affinity;
+ /* See msi_set_affinity() for the gory details */
+ info->flags |= MSI_FLAG_NOMASK_QUIRK;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /* Is the target supported? */
+ switch(info->bus_token) {
+ case DOMAIN_BUS_PCI_DEVICE_MSI:
+ case DOMAIN_BUS_PCI_DEVICE_MSIX:
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ /*
+ * Mask out the domain specific MSI feature flags which are not
+ * supported by the real parent.
+ */
+ info->flags &= pops->supported_flags;
+ /* Enforce the required flags */
+ info->flags |= X86_VECTOR_MSI_FLAGS_REQUIRED;
+
+ /* This is always invoked from the top level MSI domain! */
+ info->ops->msi_prepare = x86_msi_prepare;
+
+ info->chip->irq_ack = irq_chip_ack_parent;
+ info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
+ info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
+ IRQCHIP_AFFINITY_PRE_STARTUP;

-static struct msi_domain_info pci_msi_domain_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX | MSI_FLAG_NOMASK_QUIRK,
-
- .ops = &pci_msi_domain_ops,
- .chip = &pci_msi_controller,
- .handler = handle_edge_irq,
- .handler_name = "edge",
+ info->handler = handle_edge_irq;
+ info->handler_name = "edge";
+
+ return true;
+}
+
+static const struct msi_parent_ops x86_vector_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED,
+ .init_dev_msi_info = x86_init_dev_msi_info,
};

struct irq_domain * __init native_create_pci_msi_domain(void)
{
- struct fwnode_handle *fn;
- struct irq_domain *d;
-
if (disable_apic)
return NULL;

- fn = irq_domain_alloc_named_fwnode("PCI-MSI");
- if (!fn)
- return NULL;
-
- d = pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
- x86_vector_domain);
- if (!d) {
- irq_domain_free_fwnode(fn);
- pr_warn("Failed to initialize PCI-MSI irqdomain.\n");
- }
- return d;
+ x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops;
+ return x86_vector_domain;
}

void __init x86_create_pci_msi_domain(void)
@@ -210,7 +274,25 @@ void __init x86_create_pci_msi_domain(vo
x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain();
}

+/* Keep around for hyperV and the remap code below */
+int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
+ msi_alloc_info_t *arg)
+{
+ init_irq_alloc_info(arg, NULL);
+
+ if (to_pci_dev(dev)->msix_enabled)
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX;
+ else
+ arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_msi_prepare);
+
#ifdef CONFIG_IRQ_REMAP
+static struct msi_domain_ops pci_msi_domain_ops = {
+ .msi_prepare = pci_msi_prepare,
+};
+
static struct irq_chip pci_msi_ir_controller = {
.name = "IR-PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -812,7 +812,7 @@ static void
amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu)
{
if (!irq_remapping_enabled || !dev_is_pci(dev) ||
- pci_dev_has_special_msi_domain(to_pci_dev(dev)))
+ !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev)))
return;

dev_set_msi_domain(dev, iommu->msi_domain);
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1107,7 +1107,7 @@ static int reenable_irq_remapping(int ei
*/
void intel_irq_remap_add_device(struct dmar_pci_notify_info *info)
{
- if (!irq_remapping_enabled || pci_dev_has_special_msi_domain(info->dev))
+ if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(info->dev))
return;

dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev));



2022-11-23 08:42:55

by Tian, Kevin

[permalink] [raw]
Subject: RE: [patch V2 13/33] x86/apic/vector: Provide MSI parent domain

> From: Thomas Gleixner <[email protected]>
> Sent: Monday, November 21, 2022 10:38 PM
>
> +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev)
> +{
> + struct irq_domain *domain = dev_get_msi_domain(&dev->dev);
>
> -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
> - msi_alloc_info_t *arg)
> + if (!domain)
> + domain = dev_get_msi_domain(&dev->bus->dev);
> + if (!domain)
> + return false;
> +
> + return domain == x86_vector_domain;

the function name is about parent domain but there is no check on
the parent flag. Probably just remove 'parent'?

> +/**
> + * x86_init_dev_msi_info - Domain info setup for MSI domains
> + * @dev: The device for which the domain should be created
> + * @domain: The (root) domain providing this callback

what is the purpose of '(root)'? it's also used by intermediate domain i.e. IR.

> +
> + /*
> + * Mask out the domain specific MSI feature flags which are not
> + * supported by the real parent.
> + */
> + info->flags &= pops->supported_flags;
> + /* Enforce the required flags */
> + info->flags |=
> X86_VECTOR_MSI_FLAGS_REQUIRED;
> +
> + /* This is always invoked from the top level MSI domain! */
> + info->ops->msi_prepare = x86_msi_prepare;
> +
> + info->chip->irq_ack = irq_chip_ack_parent;
> + info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
> + info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
> + IRQCHIP_AFFINITY_PRE_STARTUP;

Above are executed twice for both IR and vector after next patch comes.
Could skip it for IR.

2022-11-23 14:00:29

by Thomas Gleixner

[permalink] [raw]
Subject: RE: [patch V2 13/33] x86/apic/vector: Provide MSI parent domain

On Wed, Nov 23 2022 at 08:16, Kevin Tian wrote:
>> From: Thomas Gleixner <[email protected]>
>> Sent: Monday, November 21, 2022 10:38 PM
>>
>> +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev)
>> +{
>> + struct irq_domain *domain = dev_get_msi_domain(&dev->dev);
>>
>> -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec,
>> - msi_alloc_info_t *arg)
>> + if (!domain)
>> + domain = dev_get_msi_domain(&dev->bus->dev);
>> + if (!domain)
>> + return false;
>> +
>> + return domain == x86_vector_domain;
>
> the function name is about parent domain but there is no check on
> the parent flag. Probably just remove 'parent'?

No. This checks whether the device has the default MSI parent domain,
which _IS_ the vector domain.

I really don't have to check whether the vector domain has the MSI
parent flag set or not. It _IS_ set. If that gets lost later then the
result of the above function is the least of our problems.

>> +/**
>> + * x86_init_dev_msi_info - Domain info setup for MSI domains
>> + * @dev: The device for which the domain should be created
>> + * @domain: The (root) domain providing this callback
>
> what is the purpose of '(root)'? it's also used by intermediate domain
> i.e. IR.

It _can_ be used, yes. But the way I implemented IR MSI parents it is
not used by it.

>> +
>> + /*
>> + * Mask out the domain specific MSI feature flags which are not
>> + * supported by the real parent.
>> + */
>> + info->flags &= pops->supported_flags;
>> + /* Enforce the required flags */
>> + info->flags |=
>> X86_VECTOR_MSI_FLAGS_REQUIRED;
>> +
>> + /* This is always invoked from the top level MSI domain! */
>> + info->ops->msi_prepare = x86_msi_prepare;
>> +
>> + info->chip->irq_ack = irq_chip_ack_parent;
>> + info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
>> + info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
>> + IRQCHIP_AFFINITY_PRE_STARTUP;
>
> Above are executed twice for both IR and vector after next patch comes.
> Could skip it for IR.

How so?

+static const struct msi_parent_ops dmar_msi_parent_ops = {
+ .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
+ .prefix = "IR-",
+ .init_dev_msi_info = msi_parent_init_dev_msi_info,
+};

IR delegates the init to its parent domain, i.e. the vector domain. So
there is no double invocation.

Thanks,

tglx

2022-11-24 01:57:36

by Tian, Kevin

[permalink] [raw]
Subject: RE: [patch V2 13/33] x86/apic/vector: Provide MSI parent domain

> From: Thomas Gleixner <[email protected]>
> Sent: Wednesday, November 23, 2022 9:42 PM
>
> On Wed, Nov 23 2022 at 08:16, Kevin Tian wrote:
> >> From: Thomas Gleixner <[email protected]>
> >> Sent: Monday, November 21, 2022 10:38 PM
> >> +
> >> + /*
> >> + * Mask out the domain specific MSI feature flags which are not
> >> + * supported by the real parent.
> >> + */
> >> + info->flags &= pops->supported_flags;
> >> + /* Enforce the required flags */
> >> + info->flags |=
> >> X86_VECTOR_MSI_FLAGS_REQUIRED;
> >> +
> >> + /* This is always invoked from the top level MSI domain! */
> >> + info->ops->msi_prepare = x86_msi_prepare;
> >> +
> >> + info->chip->irq_ack = irq_chip_ack_parent;
> >> + info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
> >> + info->chip->flags |= IRQCHIP_SKIP_SET_WAKE |
> >> + IRQCHIP_AFFINITY_PRE_STARTUP;
> >
> > Above are executed twice for both IR and vector after next patch comes.
> > Could skip it for IR.
>
> How so?
>
> +static const struct msi_parent_ops dmar_msi_parent_ops = {
> + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED |
> MSI_FLAG_MULTI_PCI_MSI,
> + .prefix = "IR-",
> + .init_dev_msi_info = msi_parent_init_dev_msi_info,
> +};
>
> IR delegates the init to its parent domain, i.e. the vector domain. So
> there is no double invocation.
>

oops. How come I read msi_parent_init_dev_msi_info() as
x86_init_dev_msi_info(). ????