Subject: [PATCH] platform-msi: Add ABI to show msi_irqs of platform devices

Just like pci devices have msi_irqs which can be used by userspace irq affinity
tools or applications to bind irqs, platform devices also widely support msi
irqs. For platform devices, for example ARM SMMU, userspaces also care about
its msi irqs as applications can know the mapping between devices and irqs
and then make smarter decision on handling irq affinity. For example, for
SVA mode, it is better to pin io page fault to the numa node applications
are running on. Otherwise, io page fault will get a remote page from the
node iopf happens.
With this patch, a system with multiple arm SMMUs in multiple different numa
node can get the mapping between devices and irqs now:
root@ubuntu:/sys/devices/platform# ls -l arm-smmu-v3.*/msi_irqs/*
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.0.auto/msi_irqs/25
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.0.auto/msi_irqs/26
-r--r--r-- 1 root root 4096 Aug 11 10:28 arm-smmu-v3.1.auto/msi_irqs/27
-r--r--r-- 1 root root 4096 Aug 11 10:28 arm-smmu-v3.1.auto/msi_irqs/28
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.2.auto/msi_irqs/29
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.2.auto/msi_irqs/30
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.3.auto/msi_irqs/31
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.3.auto/msi_irqs/32
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.4.auto/msi_irqs/33
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.4.auto/msi_irqs/34
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.5.auto/msi_irqs/35
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.5.auto/msi_irqs/36
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.6.auto/msi_irqs/37
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.6.auto/msi_irqs/38
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.7.auto/msi_irqs/39
-r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.7.auto/msi_irqs/40

Applications can use the mapping and the numa node information to pin
irqs by leveraging the numa information which has also been exported:
root@ubuntu:/sys/devices/platform# cat arm-smmu-v3.0.auto/numa_node
0
root@ubuntu:/sys/devices/platform# cat arm-smmu-v3.4.auto/numa_node
2

Cc: Zhou Wang <[email protected]>
Signed-off-by: Barry Song <[email protected]>
---
Documentation/ABI/testing/sysfs-bus-platform | 14 +++
drivers/base/platform-msi.c | 122 +++++++++++++++++++++++++++
2 files changed, 136 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
index 194ca70..4498f89 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -28,3 +28,17 @@ Description:
value comes from an ACPI _PXM method or a similar firmware
source. Initial users for this file would be devices like
arm smmu which are populated by arm64 acpi_iort.
+
+What: /sys/bus/platform/devices/.../msi_irqs/
+Date: August 2021
+Contact: Barry Song <[email protected]>
+Description:
+ The /sys/devices/.../msi_irqs directory contains a variable set
+ of files, with each file being named after a corresponding msi
+ irq vector allocated to that device.
+
+What: /sys/bus/platform/devices/.../msi_irqs/<N>
+Date: August 2021
+Contact: Barry Song <[email protected]>
+Description:
+ This attribute will show "msi" if <N> is a valid msi irq
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 0b72b13..6a72ebc 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -23,6 +23,7 @@
struct platform_msi_priv_data {
struct device *dev;
void *host_data;
+ const struct attribute_group **msi_irq_groups;
msi_alloc_info_t arg;
irq_write_msi_msg_t write_msg;
int devid;
@@ -245,6 +246,120 @@ static void platform_msi_free_priv_data(struct platform_msi_priv_data *data)
kfree(data);
}

+static ssize_t platform_msi_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct msi_desc *entry;
+ unsigned long irq;
+ int retval;
+
+ retval = kstrtoul(attr->attr.name, 10, &irq);
+ if (retval)
+ return retval;
+
+ entry = irq_get_msi_desc(irq);
+ if (entry)
+ return sprintf(buf, "msi\n");
+
+ return -ENODEV;
+}
+
+static int platform_msi_populate_sysfs(struct device *dev,
+ int nvec,
+ struct platform_msi_priv_data *data)
+{
+ struct attribute **msi_attrs;
+ struct attribute *msi_attr;
+ struct device_attribute *msi_dev_attr;
+ struct attribute_group *msi_irq_group;
+ const struct attribute_group **msi_irq_groups;
+ struct msi_desc *entry;
+ int ret = -ENOMEM;
+ int count = 0;
+ int i;
+
+ /* Dynamically create the MSI attributes for the device */
+ msi_attrs = kcalloc(nvec + 1, sizeof(void *), GFP_KERNEL);
+ if (!msi_attrs)
+ return -ENOMEM;
+ for_each_msi_entry(entry, dev) {
+ for (i = 0; i < entry->nvec_used; i++) {
+ msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
+ if (!msi_dev_attr)
+ goto error_attrs;
+ msi_attrs[count] = &msi_dev_attr->attr;
+
+ sysfs_attr_init(&msi_dev_attr->attr);
+ msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
+ entry->irq + i);
+ if (!msi_dev_attr->attr.name)
+ goto error_attrs;
+ msi_dev_attr->attr.mode = S_IRUGO;
+ msi_dev_attr->show = platform_msi_show;
+ ++count;
+ }
+ }
+
+ msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
+ if (!msi_irq_group)
+ goto error_attrs;
+ msi_irq_group->name = "msi_irqs";
+ msi_irq_group->attrs = msi_attrs;
+
+ msi_irq_groups = kcalloc(2, sizeof(void *), GFP_KERNEL);
+ if (!msi_irq_groups)
+ goto error_irq_group;
+ msi_irq_groups[0] = msi_irq_group;
+
+ ret = sysfs_create_groups(&dev->kobj, msi_irq_groups);
+ if (ret)
+ goto error_irq_groups;
+ data->msi_irq_groups = msi_irq_groups;
+
+ return 0;
+
+error_irq_groups:
+ kfree(msi_irq_groups);
+error_irq_group:
+ kfree(msi_irq_group);
+error_attrs:
+ count = 0;
+ msi_attr = msi_attrs[count];
+ while (msi_attr) {
+ msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
+ kfree(msi_attr->name);
+ kfree(msi_dev_attr);
+ ++count;
+ msi_attr = msi_attrs[count];
+ }
+ kfree(msi_attrs);
+ return ret;
+}
+
+static void platform_msi_destroy_sysfs(struct device *dev,
+ struct platform_msi_priv_data *data)
+{
+ struct attribute **msi_attrs;
+ struct device_attribute *dev_attr;
+ int count = 0;
+
+ if (data->msi_irq_groups) {
+ sysfs_remove_groups(&dev->kobj, data->msi_irq_groups);
+ msi_attrs = data->msi_irq_groups[0]->attrs;
+ while (msi_attrs[count]) {
+ dev_attr = container_of(msi_attrs[count],
+ struct device_attribute, attr);
+ kfree(dev_attr->attr.name);
+ kfree(dev_attr);
+ ++count;
+ }
+ kfree(msi_attrs);
+ kfree(data->msi_irq_groups[0]);
+ kfree(data->msi_irq_groups);
+ data->msi_irq_groups = NULL;
+ }
+}
+
/**
* platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev
* @dev: The device for which to allocate interrupts
@@ -272,8 +387,14 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec,
if (err)
goto out_free_desc;

+ err = platform_msi_populate_sysfs(dev, nvec, priv_data);
+ if (err)
+ goto out_free_irqs;
+
return 0;

+out_free_irqs:
+ msi_domain_free_irqs(dev->msi_domain, dev);
out_free_desc:
platform_msi_free_descs(dev, 0, nvec);
out_free_priv_data:
@@ -293,6 +414,7 @@ void platform_msi_domain_free_irqs(struct device *dev)
struct msi_desc *desc;

desc = first_msi_entry(dev);
+ platform_msi_destroy_sysfs(dev, desc->platform.msi_priv_data);
platform_msi_free_priv_data(desc->platform.msi_priv_data);
}

--
1.8.3.1


2021-08-11 11:50:40

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] platform-msi: Add ABI to show msi_irqs of platform devices

On Wed, Aug 11, 2021 at 10:50:20PM +1200, Barry Song wrote:
> Just like pci devices have msi_irqs which can be used by userspace irq affinity
> tools or applications to bind irqs, platform devices also widely support msi
> irqs. For platform devices, for example ARM SMMU, userspaces also care about
> its msi irqs as applications can know the mapping between devices and irqs
> and then make smarter decision on handling irq affinity. For example, for
> SVA mode, it is better to pin io page fault to the numa node applications
> are running on. Otherwise, io page fault will get a remote page from the
> node iopf happens.
> With this patch, a system with multiple arm SMMUs in multiple different numa
> node can get the mapping between devices and irqs now:
> root@ubuntu:/sys/devices/platform# ls -l arm-smmu-v3.*/msi_irqs/*
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.0.auto/msi_irqs/25
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.0.auto/msi_irqs/26
> -r--r--r-- 1 root root 4096 Aug 11 10:28 arm-smmu-v3.1.auto/msi_irqs/27
> -r--r--r-- 1 root root 4096 Aug 11 10:28 arm-smmu-v3.1.auto/msi_irqs/28
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.2.auto/msi_irqs/29
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.2.auto/msi_irqs/30
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.3.auto/msi_irqs/31
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.3.auto/msi_irqs/32
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.4.auto/msi_irqs/33
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.4.auto/msi_irqs/34
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.5.auto/msi_irqs/35
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.5.auto/msi_irqs/36
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.6.auto/msi_irqs/37
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.6.auto/msi_irqs/38
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.7.auto/msi_irqs/39
> -r--r--r-- 1 root root 4096 Aug 11 10:29 arm-smmu-v3.7.auto/msi_irqs/40
>
> Applications can use the mapping and the numa node information to pin
> irqs by leveraging the numa information which has also been exported:
> root@ubuntu:/sys/devices/platform# cat arm-smmu-v3.0.auto/numa_node
> 0
> root@ubuntu:/sys/devices/platform# cat arm-smmu-v3.4.auto/numa_node
> 2
>
> Cc: Zhou Wang <[email protected]>
> Signed-off-by: Barry Song <[email protected]>
> ---
> Documentation/ABI/testing/sysfs-bus-platform | 14 +++
> drivers/base/platform-msi.c | 122 +++++++++++++++++++++++++++
> 2 files changed, 136 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
> index 194ca70..4498f89 100644
> --- a/Documentation/ABI/testing/sysfs-bus-platform
> +++ b/Documentation/ABI/testing/sysfs-bus-platform
> @@ -28,3 +28,17 @@ Description:
> value comes from an ACPI _PXM method or a similar firmware
> source. Initial users for this file would be devices like
> arm smmu which are populated by arm64 acpi_iort.
> +
> +What: /sys/bus/platform/devices/.../msi_irqs/
> +Date: August 2021
> +Contact: Barry Song <[email protected]>
> +Description:
> + The /sys/devices/.../msi_irqs directory contains a variable set
> + of files, with each file being named after a corresponding msi
> + irq vector allocated to that device.
> +
> +What: /sys/bus/platform/devices/.../msi_irqs/<N>
> +Date: August 2021
> +Contact: Barry Song <[email protected]>
> +Description:
> + This attribute will show "msi" if <N> is a valid msi irq
> diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
> index 0b72b13..6a72ebc 100644
> --- a/drivers/base/platform-msi.c
> +++ b/drivers/base/platform-msi.c
> @@ -23,6 +23,7 @@
> struct platform_msi_priv_data {
> struct device *dev;
> void *host_data;
> + const struct attribute_group **msi_irq_groups;
> msi_alloc_info_t arg;
> irq_write_msi_msg_t write_msg;
> int devid;
> @@ -245,6 +246,120 @@ static void platform_msi_free_priv_data(struct platform_msi_priv_data *data)
> kfree(data);
> }
>
> +static ssize_t platform_msi_show(struct device *dev, struct device_attribute *attr,
> + char *buf)
> +{
> + struct msi_desc *entry;
> + unsigned long irq;
> + int retval;
> +
> + retval = kstrtoul(attr->attr.name, 10, &irq);
> + if (retval)
> + return retval;
> +
> + entry = irq_get_msi_desc(irq);
> + if (entry)
> + return sprintf(buf, "msi\n");

sysfs_emit()?

But why isn't this all handled by the MSI core code? Why would each bus
need to have this logic in it?

thanks,

greg k-h

2021-08-12 04:24:57

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH] platform-msi: Add ABI to show msi_irqs of platform devices

>
> > +static ssize_t platform_msi_show(struct device *dev, struct device_attribute *attr,
> > + char *buf)
> > +{
> > + struct msi_desc *entry;
> > + unsigned long irq;
> > + int retval;
> > +
> > + retval = kstrtoul(attr->attr.name, 10, &irq);
> > + if (retval)
> > + return retval;
> > +
> > + entry = irq_get_msi_desc(irq);
> > + if (entry)
> > + return sprintf(buf, "msi\n");
>
> sysfs_emit()?

yep.

>
> But why isn't this all handled by the MSI core code? Why would each bus
> need to have this logic in it?

i think i can extract some common code for sysfs populate/destroy to msi core from pci and platform.
but we still need some pci/platform specific code in pci-msi and platform-msi cores. for example,
pci-msi has specific data which will be accessed in its show() entry.

struct msi_desc {
...
union {
/* PCI MSI/X specific data */
struct {
u32 masked;
struct {
u8 is_msix : 1;
u8 multiple : 3;
u8 multi_cap : 3;
u8 maskbit : 1;
u8 is_64 : 1;
u8 is_virtual : 1;
u16 entry_nr;
unsigned default_irq;
} msi_attrib;
union {
u8 mask_pos;
void __iomem *mask_base;
};
};

...
struct platform_msi_desc platform;
...
};
};

in addition, they are quite different in initialization/release and also need different places to save sysfs groups.
so probably i can let msi cores provide msi_populate_sysfs() and msi_destroy_sysfs() APIs. And ask pci and platform
to call msi_populate_sysfs() in their init code and save the groups in their specific pointers, and then they can
free sysfs in their release paths by calling msi_destroy_sysfs()

>
> thanks,
>
> greg k-h

Thanks
Barry

2021-08-12 07:44:14

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] platform-msi: Add ABI to show msi_irqs of platform devices

On Thu, Aug 12, 2021 at 04:19:30PM +1200, Barry Song wrote:
> > But why isn't this all handled by the MSI core code? Why would each bus
> > need to have this logic in it?
>
> i think i can extract some common code for sysfs populate/destroy to msi core from pci and platform.
> but we still need some pci/platform specific code in pci-msi and platform-msi cores. for example,
> pci-msi has specific data which will be accessed in its show() entry.
>
> struct msi_desc {
> ...
> union {
> /* PCI MSI/X specific data */
> struct {
> u32 masked;
> struct {
> u8 is_msix : 1;
> u8 multiple : 3;
> u8 multi_cap : 3;
> u8 maskbit : 1;
> u8 is_64 : 1;
> u8 is_virtual : 1;
> u16 entry_nr;
> unsigned default_irq;
> } msi_attrib;
> union {
> u8 mask_pos;
> void __iomem *mask_base;
> };
> };
>
> ...
> struct platform_msi_desc platform;
> ...
> };
> };
>
> in addition, they are quite different in initialization/release and also need different places to save sysfs groups.
> so probably i can let msi cores provide msi_populate_sysfs() and msi_destroy_sysfs() APIs. And ask pci and platform
> to call msi_populate_sysfs() in their init code and save the groups in their specific pointers, and then they can
> free sysfs in their release paths by calling msi_destroy_sysfs()

Ok, if this isn't easy then I guess it's not a big deal, but you should
go through the MSI developers first.

Why does a platform device have MSI interrupts? I thought they were
only for PCI devices.

thanks,

greg k-h

2021-08-12 10:01:14

by Barry Song

[permalink] [raw]
Subject: Re: [PATCH] platform-msi: Add ABI to show msi_irqs of platform devices

On Thu, Aug 12, 2021 at 7:24 PM Greg KH <[email protected]> wrote:
>
> On Thu, Aug 12, 2021 at 04:19:30PM +1200, Barry Song wrote:
> > > But why isn't this all handled by the MSI core code? Why would each bus
> > > need to have this logic in it?
> >
> > i think i can extract some common code for sysfs populate/destroy to msi core from pci and platform.
> > but we still need some pci/platform specific code in pci-msi and platform-msi cores. for example,
> > pci-msi has specific data which will be accessed in its show() entry.
> >
> > struct msi_desc {
> > ...
> > union {
> > /* PCI MSI/X specific data */
> > struct {
> > u32 masked;
> > struct {
> > u8 is_msix : 1;
> > u8 multiple : 3;
> > u8 multi_cap : 3;
> > u8 maskbit : 1;
> > u8 is_64 : 1;
> > u8 is_virtual : 1;
> > u16 entry_nr;
> > unsigned default_irq;
> > } msi_attrib;
> > union {
> > u8 mask_pos;
> > void __iomem *mask_base;
> > };
> > };
> >
> > ...
> > struct platform_msi_desc platform;
> > ...
> > };
> > };
> >
> > in addition, they are quite different in initialization/release and also need different places to save sysfs groups.
> > so probably i can let msi cores provide msi_populate_sysfs() and msi_destroy_sysfs() APIs. And ask pci and platform
> > to call msi_populate_sysfs() in their init code and save the groups in their specific pointers, and then they can
> > free sysfs in their release paths by calling msi_destroy_sysfs()
>
> Ok, if this isn't easy then I guess it's not a big deal, but you should
> go through the MSI developers first.
>
> Why does a platform device have MSI interrupts? I thought they were
> only for PCI devices.

I really don't know the story of hardware, but as long as a device can
write some mmio address with some
messages (interrupt-describing data) to generate interrupts instead of
using a hardware interrupt line, MSI is
supported :-)

>
> thanks,
>
> greg k-h

Thanks
Barry