The software uses a memory-mapped I/O command interface (MC portals) to
communicate with the MC hardware. This command interface is used to
discover, enumerate, configure and remove DPAA2 objects. The DPAA2
objects use MSIs, so the command interface needs to be emulated
such that the correct MSI is configured in the hardware (the guest
has the virtual MSIs).
This patch is adding read/write support for fsl-mc devices. The mc
commands are emulated by the userspace. The host is just passing
the correct command to the hardware.
Also the current patch limits userspace to write complete
64byte command once and read 64byte response by one ioctl.
Signed-off-by: Bharat Bhushan <[email protected]>
Signed-off-by: Diana Craciun <[email protected]>
---
drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++-
drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 +
2 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 73834f488a94..27713aa86878 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/vfio.h>
#include <linux/fsl/mc.h>
+#include <linux/delay.h>
#include "vfio_fsl_mc_private.h"
@@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
vdev->regions[i].size = resource_size(res);
vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+ if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
+ vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
}
vdev->num_regions = mc_dev->obj_desc.region_count;
@@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
{
+ int i;
+
+ for (i = 0; i < vdev->num_regions; i++)
+ iounmap(vdev->regions[i].ioaddr);
+
vdev->num_regions = 0;
kfree(vdev->regions);
}
@@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
size_t count, loff_t *ppos)
{
- return -EINVAL;
+ struct vfio_fsl_mc_device *vdev = device_data;
+ unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+ loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+ struct vfio_fsl_mc_region *region;
+ u64 data[8];
+ int i;
+
+ if (index >= vdev->num_regions)
+ return -EINVAL;
+
+ region = &vdev->regions[index];
+
+ if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
+ return -EINVAL;
+
+ if (!region->ioaddr) {
+ region->ioaddr = ioremap(region->addr, region->size);
+ if (!region->ioaddr)
+ return -ENOMEM;
+ }
+
+ if (count != 64 || off != 0)
+ return -EINVAL;
+
+ for (i = 7; i >= 0; i--)
+ data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
+
+ if (copy_to_user(buf, data, 64))
+ return -EFAULT;
+
+ return count;
+}
+
+#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
+#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
+
+static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+ int i;
+ enum mc_cmd_status status;
+ unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
+
+ /* Write at command parameter into portal */
+ for (i = 7; i >= 1; i--)
+ writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
+
+ /* Write command header in the end */
+ writeq(cmd_data[0], ioaddr);
+
+ /* Wait for response before returning to user-space
+ * This can be optimized in future to even prepare response
+ * before returning to user-space and avoid read ioctl.
+ */
+ for (;;) {
+ u64 header;
+ struct mc_cmd_header *resp_hdr;
+
+ header = cpu_to_le64(readq_relaxed(ioaddr));
+
+ resp_hdr = (struct mc_cmd_header *)&header;
+ status = (enum mc_cmd_status)resp_hdr->status;
+ if (status != MC_CMD_STATUS_READY)
+ break;
+
+ udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
+ timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
+ if (timeout_usecs == 0)
+ return -ETIMEDOUT;
+ }
+
+ return 0;
}
static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
size_t count, loff_t *ppos)
{
- return -EINVAL;
+ struct vfio_fsl_mc_device *vdev = device_data;
+ unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+ loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+ struct vfio_fsl_mc_region *region;
+ u64 data[8];
+ int ret;
+
+ if (index >= vdev->num_regions)
+ return -EINVAL;
+
+ region = &vdev->regions[index];
+
+ if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
+ return -EINVAL;
+
+ if (!region->ioaddr) {
+ region->ioaddr = ioremap(region->addr, region->size);
+ if (!region->ioaddr)
+ return -ENOMEM;
+ }
+
+ if (count != 64 || off != 0)
+ return -EINVAL;
+
+ if (copy_from_user(&data, buf, 64))
+ return -EFAULT;
+
+ ret = vfio_fsl_mc_send_command(region->ioaddr, data);
+ if (ret)
+ return ret;
+
+ return count;
+
}
static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
index bbfca8b55f8a..e6804e516c4a 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
u32 type;
u64 addr;
resource_size_t size;
+ void __iomem *ioaddr;
};
struct vfio_fsl_mc_device {
--
2.17.1
Hi Diana,
On 8/26/20 11:33 AM, Diana Craciun wrote:
> The software uses a memory-mapped I/O command interface (MC portals) to
> communicate with the MC hardware. This command interface is used to
> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
> objects use MSIs, so the command interface needs to be emulated
> such that the correct MSI is configured in the hardware (the guest
> has the virtual MSIs).
What I don't get is all the regions are mmappable too.
And this patch does not seem to introduce special handling with respect
to MSIs. Please could you clarify?
>
> This patch is adding read/write support for fsl-mc devices. The mc
> commands are emulated by the userspace. The host is just passing
> the correct command to the hardware.
>
> Also the current patch limits userspace to write complete
> 64byte command once and read 64byte response by one ioctl.
>
> Signed-off-by: Bharat Bhushan <[email protected]>
> Signed-off-by: Diana Craciun <[email protected]>
> ---
> drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++-
> drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 +
> 2 files changed, 114 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> index 73834f488a94..27713aa86878 100644
> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
> @@ -12,6 +12,7 @@
> #include <linux/types.h>
> #include <linux/vfio.h>
> #include <linux/fsl/mc.h>
> +#include <linux/delay.h>
>
> #include "vfio_fsl_mc_private.h"
>
> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
> vdev->regions[i].size = resource_size(res);
> vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
> vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
> + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
> }
>
> vdev->num_regions = mc_dev->obj_desc.region_count;
> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>
> static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
> {
> + int i;
> +
> + for (i = 0; i < vdev->num_regions; i++)
> + iounmap(vdev->regions[i].ioaddr);
> +
> vdev->num_regions = 0;
> kfree(vdev->regions);
> }
> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
> static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
> size_t count, loff_t *ppos)
> {
> - return -EINVAL;
> + struct vfio_fsl_mc_device *vdev = device_data;
> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
> + struct vfio_fsl_mc_region *region;
> + u64 data[8];
> + int i;
> +
> + if (index >= vdev->num_regions)
> + return -EINVAL;
> +
> + region = &vdev->regions[index];
> +
> + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
> + return -EINVAL;
> +
> + if (!region->ioaddr) {
> + region->ioaddr = ioremap(region->addr, region->size);
> + if (!region->ioaddr)
> + return -ENOMEM;
> + }
> +
> + if (count != 64 || off != 0)
> + return -EINVAL;
> +
> + for (i = 7; i >= 0; i--)
> + data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
> +
> + if (copy_to_user(buf, data, 64))
> + return -EFAULT;
> +
> + return count;
> +}
> +
> +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
> +
> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
> +{
> + int i;
> + enum mc_cmd_status status;
> + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
> +
> + /* Write at command parameter into portal */
> + for (i = 7; i >= 1; i--)
> + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
> +
> + /* Write command header in the end */
> + writeq(cmd_data[0], ioaddr);
> +
> + /* Wait for response before returning to user-space
> + * This can be optimized in future to even prepare response
> + * before returning to user-space and avoid read ioctl.
> + */
> + for (;;) {
> + u64 header;
> + struct mc_cmd_header *resp_hdr;
> +
> + header = cpu_to_le64(readq_relaxed(ioaddr));
> +
> + resp_hdr = (struct mc_cmd_header *)&header;
> + status = (enum mc_cmd_status)resp_hdr->status;
> + if (status != MC_CMD_STATUS_READY)
> + break;
> +
> + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
> + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
> + if (timeout_usecs == 0)
> + return -ETIMEDOUT;
> + }
> +
> + return 0;
> }
>
> static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
> size_t count, loff_t *ppos)
> {
> - return -EINVAL;
> + struct vfio_fsl_mc_device *vdev = device_data;
> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
> + struct vfio_fsl_mc_region *region;
> + u64 data[8];
> + int ret;
> +
> + if (index >= vdev->num_regions)
> + return -EINVAL;
> +
> + region = &vdev->regions[index];
> +
> + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
> + return -EINVAL;
> +
> + if (!region->ioaddr) {
> + region->ioaddr = ioremap(region->addr, region->size);
> + if (!region->ioaddr)
> + return -ENOMEM;
> + }
> +
> + if (count != 64 || off != 0)
> + return -EINVAL;
> +
> + if (copy_from_user(&data, buf, 64))
> + return -EFAULT;
> +
> + ret = vfio_fsl_mc_send_command(region->ioaddr, data);
> + if (ret)
> + return ret;
> +
> + return count;
> +
> }
>
> static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> index bbfca8b55f8a..e6804e516c4a 100644
> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
> u32 type;
> u64 addr;
> resource_size_t size;
> + void __iomem *ioaddr;
> };
>
> struct vfio_fsl_mc_device {
>
Thanks
Eric
Hi Eric,
On 9/4/2020 11:18 AM, Auger Eric wrote:
> Hi Diana,
>
> On 8/26/20 11:33 AM, Diana Craciun wrote:
>> The software uses a memory-mapped I/O command interface (MC portals) to
>> communicate with the MC hardware. This command interface is used to
>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>> objects use MSIs, so the command interface needs to be emulated
>> such that the correct MSI is configured in the hardware (the guest
>> has the virtual MSIs).
> What I don't get is all the regions are mmappable too.
> And this patch does not seem to introduce special handling with respect
> to MSIs. Please could you clarify?
The device can be controlled using commands issued towards a firmware.
Most of the commands can be passthrough, among exceptions is the command
that configures the interrupts. In a guest the interrupts are emulated
and for the hardware the numbers in the guest mean nothing. So, in a
virtual machine scenario, the DPMCP and DPRC regions are emulated in
qemu such that the command which configures the interrupts will not go
to hardware with the information set by the guest.
However there are other scenarios apart from virtual machines like DPDK
in which the interrupt configuration command is not used. The problem
might be that the userspace could issue the command because there is no
restriction in the VFIO, but in that case the worst thing that may
happen is for the interrupts for the device not to work.
However it is possible to restrict the command for this scenario as well
if I change the code and not allow the DPRC region to be mmapable. In
practice it proved that it might not gain much by direct assigning that
area. Also the interrupt configuration command was restricted from the
firmware to be issued only from the DPRC device region to help such a
scenario.
>>
>> This patch is adding read/write support for fsl-mc devices. The mc
>> commands are emulated by the userspace. The host is just passing
>> the correct command to the hardware.
>>
>> Also the current patch limits userspace to write complete
>> 64byte command once and read 64byte response by one ioctl.
>>
>> Signed-off-by: Bharat Bhushan <[email protected]>
>> Signed-off-by: Diana Craciun <[email protected]>
>> ---
>> drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++-
>> drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 +
>> 2 files changed, 114 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> index 73834f488a94..27713aa86878 100644
>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>> @@ -12,6 +12,7 @@
>> #include <linux/types.h>
>> #include <linux/vfio.h>
>> #include <linux/fsl/mc.h>
>> +#include <linux/delay.h>
>>
>> #include "vfio_fsl_mc_private.h"
>>
>> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>> vdev->regions[i].size = resource_size(res);
>> vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>> vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>> + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>> }
>>
>> vdev->num_regions = mc_dev->obj_desc.region_count;
>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
>>
>> static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
>> {
>> + int i;
>> +
>> + for (i = 0; i < vdev->num_regions; i++)
>> + iounmap(vdev->regions[i].ioaddr);
>> +
>> vdev->num_regions = 0;
>> kfree(vdev->regions);
>> }
>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
>> static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>> size_t count, loff_t *ppos)
>> {
>> - return -EINVAL;
>> + struct vfio_fsl_mc_device *vdev = device_data;
>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>> + struct vfio_fsl_mc_region *region;
>> + u64 data[8];
>> + int i;
>> +
>> + if (index >= vdev->num_regions)
>> + return -EINVAL;
>> +
>> + region = &vdev->regions[index];
>> +
>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>> + return -EINVAL;
>> +
>> + if (!region->ioaddr) {
>> + region->ioaddr = ioremap(region->addr, region->size);
>> + if (!region->ioaddr)
>> + return -ENOMEM;
>> + }
>> +
>> + if (count != 64 || off != 0)
>> + return -EINVAL;
>> +
>> + for (i = 7; i >= 0; i--)
>> + data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>> +
>> + if (copy_to_user(buf, data, 64))
>> + return -EFAULT;
>> +
>> + return count;
>> +}
>> +
>> +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
>> +
>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
>> +{
>> + int i;
>> + enum mc_cmd_status status;
>> + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>> +
>> + /* Write at command parameter into portal */
>> + for (i = 7; i >= 1; i--)
>> + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>> +
>> + /* Write command header in the end */
>> + writeq(cmd_data[0], ioaddr);
>> +
>> + /* Wait for response before returning to user-space
>> + * This can be optimized in future to even prepare response
>> + * before returning to user-space and avoid read ioctl.
>> + */
>> + for (;;) {
>> + u64 header;
>> + struct mc_cmd_header *resp_hdr;
>> +
>> + header = cpu_to_le64(readq_relaxed(ioaddr));
>> +
>> + resp_hdr = (struct mc_cmd_header *)&header;
>> + status = (enum mc_cmd_status)resp_hdr->status;
>> + if (status != MC_CMD_STATUS_READY)
>> + break;
>> +
>> + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>> + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>> + if (timeout_usecs == 0)
>> + return -ETIMEDOUT;
>> + }
>> +
>> + return 0;
>> }
>>
>> static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
>> size_t count, loff_t *ppos)
>> {
>> - return -EINVAL;
>> + struct vfio_fsl_mc_device *vdev = device_data;
>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>> + struct vfio_fsl_mc_region *region;
>> + u64 data[8];
>> + int ret;
>> +
>> + if (index >= vdev->num_regions)
>> + return -EINVAL;
>> +
>> + region = &vdev->regions[index];
>> +
>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>> + return -EINVAL;
>> +
>> + if (!region->ioaddr) {
>> + region->ioaddr = ioremap(region->addr, region->size);
>> + if (!region->ioaddr)
>> + return -ENOMEM;
>> + }
>> +
>> + if (count != 64 || off != 0)
>> + return -EINVAL;
>> +
>> + if (copy_from_user(&data, buf, 64))
>> + return -EFAULT;
>> +
>> + ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>> + if (ret)
>> + return ret;
>> +
>> + return count;
>> +
>> }
>>
>> static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> index bbfca8b55f8a..e6804e516c4a 100644
>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>> u32 type;
>> u64 addr;
>> resource_size_t size;
>> + void __iomem *ioaddr;
>> };
>>
>> struct vfio_fsl_mc_device {
>>
> Thanks
>
> Eric
>
Thanks,
Diana
Hi Diana,
On 9/7/20 4:34 PM, Diana Craciun OSS wrote:
> Hi Eric,
>
> On 9/4/2020 11:18 AM, Auger Eric wrote:
>> Hi Diana,
>>
>> On 8/26/20 11:33 AM, Diana Craciun wrote:
>>> The software uses a memory-mapped I/O command interface (MC portals) to
>>> communicate with the MC hardware. This command interface is used to
>>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>>> objects use MSIs, so the command interface needs to be emulated
>>> such that the correct MSI is configured in the hardware (the guest
>>> has the virtual MSIs).
>> What I don't get is all the regions are mmappable too.
>> And this patch does not seem to introduce special handling with respect
>> to MSIs. Please could you clarify?
>
> The device can be controlled using commands issued towards a firmware.
> Most of the commands can be passthrough, among exceptions is the command
> that configures the interrupts. In a guest the interrupts are emulated
> and for the hardware the numbers in the guest mean nothing. So, in a
> virtual machine scenario, the DPMCP and DPRC regions are emulated in
> qemu such that the command which configures the interrupts will not go
> to hardware with the information set by the guest.
> However there are other scenarios apart from virtual machines like DPDK
> in which the interrupt configuration command is not used. The problem
> might be that the userspace could issue the command because there is no
> restriction in the VFIO, but in that case the worst thing that may
> happen is for the interrupts for the device not to work.
> However it is possible to restrict the command for this scenario as well
> if I change the code and not allow the DPRC region to be mmapable. In
> practice it proved that it might not gain much by direct assigning that
> area. Also the interrupt configuration command was restricted from the
> firmware to be issued only from the DPRC device region to help such a
> scenario.
Yes actually I meant that the region used to configure MSIs should not
be mmappable then?
Thanks
Eric
>
>
>>>
>>> This patch is adding read/write support for fsl-mc devices. The mc
>>> commands are emulated by the userspace. The host is just passing
>>> the correct command to the hardware.
>>>
>>> Also the current patch limits userspace to write complete
>>> 64byte command once and read 64byte response by one ioctl.
>>>
>>> Signed-off-by: Bharat Bhushan <[email protected]>
>>> Signed-off-by: Diana Craciun <[email protected]>
>>> ---
>>> drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++-
>>> drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 +
>>> 2 files changed, 114 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> index 73834f488a94..27713aa86878 100644
>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>> @@ -12,6 +12,7 @@
>>> #include <linux/types.h>
>>> #include <linux/vfio.h>
>>> #include <linux/fsl/mc.h>
>>> +#include <linux/delay.h>
>>> #include "vfio_fsl_mc_private.h"
>>> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct
>>> vfio_fsl_mc_device *vdev)
>>> vdev->regions[i].size = resource_size(res);
>>> vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>>> vdev->regions[i].type = mc_dev->regions[i].flags &
>>> IORESOURCE_BITS;
>>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>>> + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>>> }
>>> vdev->num_regions = mc_dev->obj_desc.region_count;
>>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct
>>> vfio_fsl_mc_device *vdev)
>>> static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device
>>> *vdev)
>>> {
>>> + int i;
>>> +
>>> + for (i = 0; i < vdev->num_regions; i++)
>>> + iounmap(vdev->regions[i].ioaddr);
>>> +
>>> vdev->num_regions = 0;
>>> kfree(vdev->regions);
>>> }
>>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void
>>> *device_data, unsigned int cmd,
>>> static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>>> size_t count, loff_t *ppos)
>>> {
>>> - return -EINVAL;
>>> + struct vfio_fsl_mc_device *vdev = device_data;
>>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>> + struct vfio_fsl_mc_region *region;
>>> + u64 data[8];
>>> + int i;
>>> +
>>> + if (index >= vdev->num_regions)
>>> + return -EINVAL;
>>> +
>>> + region = &vdev->regions[index];
>>> +
>>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>>> + return -EINVAL;
>>> +
>>> + if (!region->ioaddr) {
>>> + region->ioaddr = ioremap(region->addr, region->size);
>>> + if (!region->ioaddr)
>>> + return -ENOMEM;
>>> + }
>>> +
>>> + if (count != 64 || off != 0)
>>> + return -EINVAL;
>>> +
>>> + for (i = 7; i >= 0; i--)
>>> + data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>>> +
>>> + if (copy_to_user(buf, data, 64))
>>> + return -EFAULT;
>>> +
>>> + return count;
>>> +}
>>> +
>>> +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
>>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
>>> +
>>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t
>>> *cmd_data)
>>> +{
>>> + int i;
>>> + enum mc_cmd_status status;
>>> + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>>> +
>>> + /* Write at command parameter into portal */
>>> + for (i = 7; i >= 1; i--)
>>> + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>>> +
>>> + /* Write command header in the end */
>>> + writeq(cmd_data[0], ioaddr);
>>> +
>>> + /* Wait for response before returning to user-space
>>> + * This can be optimized in future to even prepare response
>>> + * before returning to user-space and avoid read ioctl.
>>> + */
>>> + for (;;) {
>>> + u64 header;
>>> + struct mc_cmd_header *resp_hdr;
>>> +
>>> + header = cpu_to_le64(readq_relaxed(ioaddr));
>>> +
>>> + resp_hdr = (struct mc_cmd_header *)&header;
>>> + status = (enum mc_cmd_status)resp_hdr->status;
>>> + if (status != MC_CMD_STATUS_READY)
>>> + break;
>>> +
>>> + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>>> + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>>> + if (timeout_usecs == 0)
>>> + return -ETIMEDOUT;
>>> + }
>>> +
>>> + return 0;
>>> }
>>> static ssize_t vfio_fsl_mc_write(void *device_data, const char
>>> __user *buf,
>>> size_t count, loff_t *ppos)
>>> {
>>> - return -EINVAL;
>>> + struct vfio_fsl_mc_device *vdev = device_data;
>>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>> + struct vfio_fsl_mc_region *region;
>>> + u64 data[8];
>>> + int ret;
>>> +
>>> + if (index >= vdev->num_regions)
>>> + return -EINVAL;
>>> +
>>> + region = &vdev->regions[index];
>>> +
>>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>>> + return -EINVAL;
>>> +
>>> + if (!region->ioaddr) {
>>> + region->ioaddr = ioremap(region->addr, region->size);
>>> + if (!region->ioaddr)
>>> + return -ENOMEM;
>>> + }
>>> +
>>> + if (count != 64 || off != 0)
>>> + return -EINVAL;
>>> +
>>> + if (copy_from_user(&data, buf, 64))
>>> + return -EFAULT;
>>> +
>>> + ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + return count;
>>> +
>>> }
>>> static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> index bbfca8b55f8a..e6804e516c4a 100644
>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>>> u32 type;
>>> u64 addr;
>>> resource_size_t size;
>>> + void __iomem *ioaddr;
>>> };
>>> struct vfio_fsl_mc_device {
>>>
>> Thanks
>>
>> Eric
>>
>
> Thanks,
> Diana
>
Hi Eric,
On 9/10/2020 11:20 AM, Auger Eric wrote:
> Hi Diana,
>
> On 9/7/20 4:34 PM, Diana Craciun OSS wrote:
>> Hi Eric,
>>
>> On 9/4/2020 11:18 AM, Auger Eric wrote:
>>> Hi Diana,
>>>
>>> On 8/26/20 11:33 AM, Diana Craciun wrote:
>>>> The software uses a memory-mapped I/O command interface (MC portals) to
>>>> communicate with the MC hardware. This command interface is used to
>>>> discover, enumerate, configure and remove DPAA2 objects. The DPAA2
>>>> objects use MSIs, so the command interface needs to be emulated
>>>> such that the correct MSI is configured in the hardware (the guest
>>>> has the virtual MSIs).
>>> What I don't get is all the regions are mmappable too.
>>> And this patch does not seem to introduce special handling with respect
>>> to MSIs. Please could you clarify?
>>
>> The device can be controlled using commands issued towards a firmware.
>> Most of the commands can be passthrough, among exceptions is the command
>> that configures the interrupts. In a guest the interrupts are emulated
>> and for the hardware the numbers in the guest mean nothing. So, in a
>> virtual machine scenario, the DPMCP and DPRC regions are emulated in
>> qemu such that the command which configures the interrupts will not go
>> to hardware with the information set by the guest.
>> However there are other scenarios apart from virtual machines like DPDK
>> in which the interrupt configuration command is not used. The problem
>> might be that the userspace could issue the command because there is no
>> restriction in the VFIO, but in that case the worst thing that may
>> happen is for the interrupts for the device not to work.
>> However it is possible to restrict the command for this scenario as well
>> if I change the code and not allow the DPRC region to be mmapable. In
>> practice it proved that it might not gain much by direct assigning that
>> area. Also the interrupt configuration command was restricted from the
>> firmware to be issued only from the DPRC device region to help such a
>> scenario.
> Yes actually I meant that the region used to configure MSIs should not
> be mmappable then?
>
>
That region is not used only for the MSI configuration. The way it works
is through commands that are written at a certain memory address. And
the commands can be different. Applications like DPDK do not use the
command to configure interrupts, so that is the reason that the region
is mmapable. But at a second thought, I think that I can restrict the
DPRC region not to be mmapable. And that is the only region that can be
used to configure interrupts.
Thanks,
Diana
> Thanks
>
> Eric
>>
>>
>>>>
>>>> This patch is adding read/write support for fsl-mc devices. The mc
>>>> commands are emulated by the userspace. The host is just passing
>>>> the correct command to the hardware.
>>>>
>>>> Also the current patch limits userspace to write complete
>>>> 64byte command once and read 64byte response by one ioctl.
>>>>
>>>> Signed-off-by: Bharat Bhushan <[email protected]>
>>>> Signed-off-by: Diana Craciun <[email protected]>
>>>> ---
>>>> drivers/vfio/fsl-mc/vfio_fsl_mc.c | 115 +++++++++++++++++++++-
>>>> drivers/vfio/fsl-mc/vfio_fsl_mc_private.h | 1 +
>>>> 2 files changed, 114 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> index 73834f488a94..27713aa86878 100644
>>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
>>>> @@ -12,6 +12,7 @@
>>>> #include <linux/types.h>
>>>> #include <linux/vfio.h>
>>>> #include <linux/fsl/mc.h>
>>>> +#include <linux/delay.h>
>>>> #include "vfio_fsl_mc_private.h"
>>>> @@ -106,6 +107,9 @@ static int vfio_fsl_mc_regions_init(struct
>>>> vfio_fsl_mc_device *vdev)
>>>> vdev->regions[i].size = resource_size(res);
>>>> vdev->regions[i].flags = VFIO_REGION_INFO_FLAG_MMAP;
>>>> vdev->regions[i].type = mc_dev->regions[i].flags &
>>>> IORESOURCE_BITS;
>>>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
>>>> + if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
>>>> + vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
>>>> }
>>>> vdev->num_regions = mc_dev->obj_desc.region_count;
>>>> @@ -114,6 +118,11 @@ static int vfio_fsl_mc_regions_init(struct
>>>> vfio_fsl_mc_device *vdev)
>>>> static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device
>>>> *vdev)
>>>> {
>>>> + int i;
>>>> +
>>>> + for (i = 0; i < vdev->num_regions; i++)
>>>> + iounmap(vdev->regions[i].ioaddr);
>>>> +
>>>> vdev->num_regions = 0;
>>>> kfree(vdev->regions);
>>>> }
>>>> @@ -311,13 +320,115 @@ static long vfio_fsl_mc_ioctl(void
>>>> *device_data, unsigned int cmd,
>>>> static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
>>>> size_t count, loff_t *ppos)
>>>> {
>>>> - return -EINVAL;
>>>> + struct vfio_fsl_mc_device *vdev = device_data;
>>>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>>> + struct vfio_fsl_mc_region *region;
>>>> + u64 data[8];
>>>> + int i;
>>>> +
>>>> + if (index >= vdev->num_regions)
>>>> + return -EINVAL;
>>>> +
>>>> + region = &vdev->regions[index];
>>>> +
>>>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
>>>> + return -EINVAL;
>>>> +
>>>> + if (!region->ioaddr) {
>>>> + region->ioaddr = ioremap(region->addr, region->size);
>>>> + if (!region->ioaddr)
>>>> + return -ENOMEM;
>>>> + }
>>>> +
>>>> + if (count != 64 || off != 0)
>>>> + return -EINVAL;
>>>> +
>>>> + for (i = 7; i >= 0; i--)
>>>> + data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
>>>> +
>>>> + if (copy_to_user(buf, data, 64))
>>>> + return -EFAULT;
>>>> +
>>>> + return count;
>>>> +}
>>>> +
>>>> +#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
>>>> +#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
>>>> +
>>>> +static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t
>>>> *cmd_data)
>>>> +{
>>>> + int i;
>>>> + enum mc_cmd_status status;
>>>> + unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
>>>> +
>>>> + /* Write at command parameter into portal */
>>>> + for (i = 7; i >= 1; i--)
>>>> + writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
>>>> +
>>>> + /* Write command header in the end */
>>>> + writeq(cmd_data[0], ioaddr);
>>>> +
>>>> + /* Wait for response before returning to user-space
>>>> + * This can be optimized in future to even prepare response
>>>> + * before returning to user-space and avoid read ioctl.
>>>> + */
>>>> + for (;;) {
>>>> + u64 header;
>>>> + struct mc_cmd_header *resp_hdr;
>>>> +
>>>> + header = cpu_to_le64(readq_relaxed(ioaddr));
>>>> +
>>>> + resp_hdr = (struct mc_cmd_header *)&header;
>>>> + status = (enum mc_cmd_status)resp_hdr->status;
>>>> + if (status != MC_CMD_STATUS_READY)
>>>> + break;
>>>> +
>>>> + udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
>>>> + timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
>>>> + if (timeout_usecs == 0)
>>>> + return -ETIMEDOUT;
>>>> + }
>>>> +
>>>> + return 0;
>>>> }
>>>> static ssize_t vfio_fsl_mc_write(void *device_data, const char
>>>> __user *buf,
>>>> size_t count, loff_t *ppos)
>>>> {
>>>> - return -EINVAL;
>>>> + struct vfio_fsl_mc_device *vdev = device_data;
>>>> + unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
>>>> + loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
>>>> + struct vfio_fsl_mc_region *region;
>>>> + u64 data[8];
>>>> + int ret;
>>>> +
>>>> + if (index >= vdev->num_regions)
>>>> + return -EINVAL;
>>>> +
>>>> + region = &vdev->regions[index];
>>>> +
>>>> + if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
>>>> + return -EINVAL;
>>>> +
>>>> + if (!region->ioaddr) {
>>>> + region->ioaddr = ioremap(region->addr, region->size);
>>>> + if (!region->ioaddr)
>>>> + return -ENOMEM;
>>>> + }
>>>> +
>>>> + if (count != 64 || off != 0)
>>>> + return -EINVAL;
>>>> +
>>>> + if (copy_from_user(&data, buf, 64))
>>>> + return -EFAULT;
>>>> +
>>>> + ret = vfio_fsl_mc_send_command(region->ioaddr, data);
>>>> + if (ret)
>>>> + return ret;
>>>> +
>>>> + return count;
>>>> +
>>>> }
>>>> static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
>>>> diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> index bbfca8b55f8a..e6804e516c4a 100644
>>>> --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
>>>> @@ -32,6 +32,7 @@ struct vfio_fsl_mc_region {
>>>> u32 type;
>>>> u64 addr;
>>>> resource_size_t size;
>>>> + void __iomem *ioaddr;
>>>> };
>>>> struct vfio_fsl_mc_device {
>>>>
>>> Thanks
>>>
>>> Eric
>>>
>>
>> Thanks,
>> Diana
>>
>