2020-11-16 11:04:10

by Eric Auger

[permalink] [raw]
Subject: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

From: "Liu, Yi L" <[email protected]>

This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
which aims to pass the virtual iommu guest configuration
to the host. This latter takes the form of the so-called
PASID table.

Signed-off-by: Jacob Pan <[email protected]>
Signed-off-by: Liu, Yi L <[email protected]>
Signed-off-by: Eric Auger <[email protected]>

---
v11 -> v12:
- use iommu_uapi_set_pasid_table
- check SET and UNSET are not set simultaneously (Zenghui)

v8 -> v9:
- Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
VFIO_IOMMU_SET_PASID_TABLE ioctl.

v6 -> v7:
- add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE

v3 -> v4:
- restore ATTACH/DETACH
- add unwind on failure

v2 -> v3:
- s/BIND_PASID_TABLE/SET_PASID_TABLE

v1 -> v2:
- s/BIND_GUEST_STAGE/BIND_PASID_TABLE
- remove the struct device arg
---
drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
include/uapi/linux/vfio.h | 19 ++++++++++
2 files changed, 84 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 67e827638995..87ddd9e882dc 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
return ret;
}

+static void
+vfio_detach_pasid_table(struct vfio_iommu *iommu)
+{
+ struct vfio_domain *d;
+
+ mutex_lock(&iommu->lock);
+ list_for_each_entry(d, &iommu->domain_list, next)
+ iommu_detach_pasid_table(d->domain);
+
+ mutex_unlock(&iommu->lock);
+}
+
+static int
+vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
+{
+ struct vfio_domain *d;
+ int ret = 0;
+
+ mutex_lock(&iommu->lock);
+
+ list_for_each_entry(d, &iommu->domain_list, next) {
+ ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
+ if (ret)
+ goto unwind;
+ }
+ goto unlock;
+unwind:
+ list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
+ iommu_detach_pasid_table(d->domain);
+ }
+unlock:
+ mutex_unlock(&iommu->lock);
+ return ret;
+}
+
static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
struct vfio_info_cap *caps)
{
@@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
-EFAULT : 0;
}

+static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
+ unsigned long arg)
+{
+ struct vfio_iommu_type1_set_pasid_table spt;
+ unsigned long minsz;
+ int ret = -EINVAL;
+
+ minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
+
+ if (copy_from_user(&spt, (void __user *)arg, minsz))
+ return -EFAULT;
+
+ if (spt.argsz < minsz)
+ return -EINVAL;
+
+ if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
+ spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
+ return -EINVAL;
+
+ if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
+ ret = vfio_attach_pasid_table(iommu, arg + minsz);
+ else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
+ vfio_detach_pasid_table(iommu);
+ ret = 0;
+ }
+ return ret;
+}
+
static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
unsigned long arg)
{
@@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
return vfio_iommu_type1_unmap_dma(iommu, arg);
case VFIO_IOMMU_DIRTY_PAGES:
return vfio_iommu_type1_dirty_pages(iommu, arg);
+ case VFIO_IOMMU_SET_PASID_TABLE:
+ return vfio_iommu_type1_set_pasid_table(iommu, arg);
default:
return -ENOTTY;
}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 2f313a238a8f..78ce3ce6c331 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -14,6 +14,7 @@

#include <linux/types.h>
#include <linux/ioctl.h>
+#include <linux/iommu.h>

#define VFIO_API_VERSION 0

@@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {

#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)

+/*
+ * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
+ * struct vfio_iommu_type1_set_pasid_table)
+ *
+ * The SET operation passes a PASID table to the host while the
+ * UNSET operation detaches the one currently programmed. Setting
+ * a table while another is already programmed replaces the old table.
+ */
+struct vfio_iommu_type1_set_pasid_table {
+ __u32 argsz;
+ __u32 flags;
+#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
+#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
+ struct iommu_pasid_table_config config; /* used on SET */
+};
+
+#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
+
/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */

/*
--
2.21.3


2020-11-17 02:42:44

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

Hi Eric,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.10-rc4]
[also build test ERROR on next-20201116]
[cannot apply to vfio/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Eric-Auger/SMMUv3-Nested-Stage-Setup-VFIO-part/20201116-190742
base: 09162bc32c880a791c6c0668ce0745cf7958f576
config: x86_64-randconfig-s022-20201115 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-107-gaf3512a6-dirty
# https://github.com/0day-ci/linux/commit/ce7c900f2d50c7e8cc62d54836369a3f3150adaf
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Eric-Auger/SMMUv3-Nested-Stage-Setup-VFIO-part/20201116-190742
git checkout ce7c900f2d50c7e8cc62d54836369a3f3150adaf
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

In file included from <command-line>:32:
>> ./usr/include/linux/vfio.h:1197:34: error: field 'config' has incomplete type
1197 | struct iommu_pasid_table_config config; /* used on SET */
| ^~~~~~

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (1.68 kB)
.config.gz (36.07 kB)
Download all attachments

2020-11-25 00:02:45

by Alex Williamson

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

On Mon, 16 Nov 2020 12:00:18 +0100
Eric Auger <[email protected]> wrote:

> From: "Liu, Yi L" <[email protected]>
>
> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
> which aims to pass the virtual iommu guest configuration
> to the host. This latter takes the form of the so-called
> PASID table.
>
> Signed-off-by: Jacob Pan <[email protected]>
> Signed-off-by: Liu, Yi L <[email protected]>
> Signed-off-by: Eric Auger <[email protected]>
>
> ---
> v11 -> v12:
> - use iommu_uapi_set_pasid_table
> - check SET and UNSET are not set simultaneously (Zenghui)
>
> v8 -> v9:
> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
> VFIO_IOMMU_SET_PASID_TABLE ioctl.
>
> v6 -> v7:
> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
>
> v3 -> v4:
> - restore ATTACH/DETACH
> - add unwind on failure
>
> v2 -> v3:
> - s/BIND_PASID_TABLE/SET_PASID_TABLE
>
> v1 -> v2:
> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
> - remove the struct device arg
> ---
> drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
> include/uapi/linux/vfio.h | 19 ++++++++++
> 2 files changed, 84 insertions(+)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 67e827638995..87ddd9e882dc 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
> return ret;
> }
>
> +static void
> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
> +{
> + struct vfio_domain *d;
> +
> + mutex_lock(&iommu->lock);
> + list_for_each_entry(d, &iommu->domain_list, next)
> + iommu_detach_pasid_table(d->domain);
> +
> + mutex_unlock(&iommu->lock);
> +}
> +
> +static int
> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
> +{
> + struct vfio_domain *d;
> + int ret = 0;
> +
> + mutex_lock(&iommu->lock);
> +
> + list_for_each_entry(d, &iommu->domain_list, next) {
> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
> + if (ret)
> + goto unwind;
> + }
> + goto unlock;
> +unwind:
> + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
> + iommu_detach_pasid_table(d->domain);
> + }
> +unlock:

This goto leap frog could be avoided with just:

list_for_each_entry(d, &iommu->domain_list, next) {
ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
if (ret) {
list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
iommu_detach_pasid_table(d->domain);
}
break;
}
}

> + mutex_unlock(&iommu->lock);
> + return ret;
> +}
> +
> static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
> struct vfio_info_cap *caps)
> {
> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
> -EFAULT : 0;
> }
>
> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
> + unsigned long arg)
> +{
> + struct vfio_iommu_type1_set_pasid_table spt;
> + unsigned long minsz;
> + int ret = -EINVAL;
> +
> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
> +
> + if (copy_from_user(&spt, (void __user *)arg, minsz))
> + return -EFAULT;
> +
> + if (spt.argsz < minsz)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
> + vfio_detach_pasid_table(iommu);
> + ret = 0;
> + }

This doesn't really validate that the other flag bits are zero, ex.
user could pass flags = (1 << 8) | VFIO_PASID_TABLE_FLAG_SET and we'd
just ignore the extra bit. So this probably needs to be:

if (spt.flags == VFIO_PASID_TABLE_FLAG_SET)
ret = vfio_attach_pasid_table(iommu, arg + minsz);
else if (spt.flags == VFIO_PASID_TABLE_FLAG_UNSET)
vfio_detach_pasid_table(iommu);

Or otherwise validate that none of the other bits are set. It also
seems cleaner to me to set the initial value of ret = 0 and end this
with:

else
ret = -EINVAL;


> + return ret;
> +}
> +
> static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> unsigned long arg)
> {
> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> return vfio_iommu_type1_unmap_dma(iommu, arg);
> case VFIO_IOMMU_DIRTY_PAGES:
> return vfio_iommu_type1_dirty_pages(iommu, arg);
> + case VFIO_IOMMU_SET_PASID_TABLE:
> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
> default:
> return -ENOTTY;
> }
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 2f313a238a8f..78ce3ce6c331 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -14,6 +14,7 @@
>
> #include <linux/types.h>
> #include <linux/ioctl.h>
> +#include <linux/iommu.h>
>
> #define VFIO_API_VERSION 0
>
> @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {
>
> #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
>
> +/*
> + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,

We already reuse ioctl indexes between type1 and spapr (ex. +17 is
either VFIO_IOMMU_DIRTY_PAGES or VFIO_IOMMU_SPAPR_REGISTER_MEMORY
depending on the iommu type). I wonder if we should reuse +18 here
instead.

> + * struct vfio_iommu_type1_set_pasid_table)
> + *
> + * The SET operation passes a PASID table to the host while the
> + * UNSET operation detaches the one currently programmed. Setting
> + * a table while another is already programmed replaces the old table.
> + */
> +struct vfio_iommu_type1_set_pasid_table {
> + __u32 argsz;
> + __u32 flags;
> +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
> +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
> + struct iommu_pasid_table_config config; /* used on SET */
> +};
> +
> +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
> +
> /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>
> /*

2021-02-02 21:24:41

by Keqian Zhu

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

Hi Eric,

On 2020/11/16 19:00, Eric Auger wrote:
> From: "Liu, Yi L" <[email protected]>
>
> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
> which aims to pass the virtual iommu guest configuration
> to the host. This latter takes the form of the so-called
> PASID table.
>
> Signed-off-by: Jacob Pan <[email protected]>
> Signed-off-by: Liu, Yi L <[email protected]>
> Signed-off-by: Eric Auger <[email protected]>
>
> ---
> v11 -> v12:
> - use iommu_uapi_set_pasid_table
> - check SET and UNSET are not set simultaneously (Zenghui)
>
> v8 -> v9:
> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
> VFIO_IOMMU_SET_PASID_TABLE ioctl.
>
> v6 -> v7:
> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
>
> v3 -> v4:
> - restore ATTACH/DETACH
> - add unwind on failure
>
> v2 -> v3:
> - s/BIND_PASID_TABLE/SET_PASID_TABLE
>
> v1 -> v2:
> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
> - remove the struct device arg
> ---
> drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
> include/uapi/linux/vfio.h | 19 ++++++++++
> 2 files changed, 84 insertions(+)
>
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 67e827638995..87ddd9e882dc 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
> return ret;
> }
>
> +static void
> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
> +{
> + struct vfio_domain *d;
> +
> + mutex_lock(&iommu->lock);
> + list_for_each_entry(d, &iommu->domain_list, next)
> + iommu_detach_pasid_table(d->domain);
> +
> + mutex_unlock(&iommu->lock);
> +}
> +
> +static int
> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
> +{
> + struct vfio_domain *d;
> + int ret = 0;
> +
> + mutex_lock(&iommu->lock);
> +
> + list_for_each_entry(d, &iommu->domain_list, next) {
> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
This design is not very clear to me. This assumes all iommu_domains share the same pasid table.

As I understand, it's reasonable when there is only one group in the domain, and only one domain in the vfio_iommu.
If more than one group in the vfio_iommu, the guest may put them into different guest iommu_domain, then they have different pasid table.

Is this the use scenario?

Thanks,
Keqian

> + if (ret)
> + goto unwind;
> + }
> + goto unlock;
> +unwind:
> + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
> + iommu_detach_pasid_table(d->domain);
> + }
> +unlock:
> + mutex_unlock(&iommu->lock);
> + return ret;
> +}
> +
> static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
> struct vfio_info_cap *caps)
> {
> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
> -EFAULT : 0;
> }
>
> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
> + unsigned long arg)
> +{
> + struct vfio_iommu_type1_set_pasid_table spt;
> + unsigned long minsz;
> + int ret = -EINVAL;
> +
> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
> +
> + if (copy_from_user(&spt, (void __user *)arg, minsz))
> + return -EFAULT;
> +
> + if (spt.argsz < minsz)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
> + return -EINVAL;
> +
> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
> + vfio_detach_pasid_table(iommu);
> + ret = 0;
> + }
> + return ret;
> +}
> +
> static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
> unsigned long arg)
> {
> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
> return vfio_iommu_type1_unmap_dma(iommu, arg);
> case VFIO_IOMMU_DIRTY_PAGES:
> return vfio_iommu_type1_dirty_pages(iommu, arg);
> + case VFIO_IOMMU_SET_PASID_TABLE:
> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
> default:
> return -ENOTTY;
> }
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 2f313a238a8f..78ce3ce6c331 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -14,6 +14,7 @@
>
> #include <linux/types.h>
> #include <linux/ioctl.h>
> +#include <linux/iommu.h>
>
> #define VFIO_API_VERSION 0
>
> @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {
>
> #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
>
> +/*
> + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
> + * struct vfio_iommu_type1_set_pasid_table)
> + *
> + * The SET operation passes a PASID table to the host while the
> + * UNSET operation detaches the one currently programmed. Setting
> + * a table while another is already programmed replaces the old table.
> + */
> +struct vfio_iommu_type1_set_pasid_table {
> + __u32 argsz;
> + __u32 flags;
> +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
> +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
> + struct iommu_pasid_table_config config; /* used on SET */
> +};
> +
> +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
> +
> /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>
> /*
>

2021-02-22 10:58:06

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

Hi Keqian,

On 2/2/21 1:34 PM, Keqian Zhu wrote:
> Hi Eric,
>
> On 2020/11/16 19:00, Eric Auger wrote:
>> From: "Liu, Yi L" <[email protected]>
>>
>> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
>> which aims to pass the virtual iommu guest configuration
>> to the host. This latter takes the form of the so-called
>> PASID table.
>>
>> Signed-off-by: Jacob Pan <[email protected]>
>> Signed-off-by: Liu, Yi L <[email protected]>
>> Signed-off-by: Eric Auger <[email protected]>
>>
>> ---
>> v11 -> v12:
>> - use iommu_uapi_set_pasid_table
>> - check SET and UNSET are not set simultaneously (Zenghui)
>>
>> v8 -> v9:
>> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
>> VFIO_IOMMU_SET_PASID_TABLE ioctl.
>>
>> v6 -> v7:
>> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
>>
>> v3 -> v4:
>> - restore ATTACH/DETACH
>> - add unwind on failure
>>
>> v2 -> v3:
>> - s/BIND_PASID_TABLE/SET_PASID_TABLE
>>
>> v1 -> v2:
>> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
>> - remove the struct device arg
>> ---
>> drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
>> include/uapi/linux/vfio.h | 19 ++++++++++
>> 2 files changed, 84 insertions(+)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 67e827638995..87ddd9e882dc 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
>> return ret;
>> }
>>
>> +static void
>> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
>> +{
>> + struct vfio_domain *d;
>> +
>> + mutex_lock(&iommu->lock);
>> + list_for_each_entry(d, &iommu->domain_list, next)
>> + iommu_detach_pasid_table(d->domain);
>> +
>> + mutex_unlock(&iommu->lock);
>> +}
>> +
>> +static int
>> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
>> +{
>> + struct vfio_domain *d;
>> + int ret = 0;
>> +
>> + mutex_lock(&iommu->lock);
>> +
>> + list_for_each_entry(d, &iommu->domain_list, next) {
>> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
> This design is not very clear to me. This assumes all iommu_domains share the same pasid table.
>
> As I understand, it's reasonable when there is only one group in the domain, and only one domain in the vfio_iommu.
> If more than one group in the vfio_iommu, the guest may put them into different guest iommu_domain, then they have different pasid table.
>
> Is this the use scenario?

the vfio_iommu is attached to a container. all the groups within a
container share the same set of page tables (linux
Documentation/driver-api/vfio.rst). So to me if you want to use
different pasid tables, the groups need to be attached to different
containers. Does that make sense to you?

Thanks

Eric
>
> Thanks,
> Keqian
>
>> + if (ret)
>> + goto unwind;
>> + }
>> + goto unlock;
>> +unwind:
>> + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
>> + iommu_detach_pasid_table(d->domain);
>> + }
>> +unlock:
>> + mutex_unlock(&iommu->lock);
>> + return ret;
>> +}
>> +
>> static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
>> struct vfio_info_cap *caps)
>> {
>> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
>> -EFAULT : 0;
>> }
>>
>> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
>> + unsigned long arg)
>> +{
>> + struct vfio_iommu_type1_set_pasid_table spt;
>> + unsigned long minsz;
>> + int ret = -EINVAL;
>> +
>> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
>> +
>> + if (copy_from_user(&spt, (void __user *)arg, minsz))
>> + return -EFAULT;
>> +
>> + if (spt.argsz < minsz)
>> + return -EINVAL;
>> +
>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
>> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
>> + return -EINVAL;
>> +
>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
>> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
>> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
>> + vfio_detach_pasid_table(iommu);
>> + ret = 0;
>> + }
>> + return ret;
>> +}
>> +
>> static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>> unsigned long arg)
>> {
>> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>> return vfio_iommu_type1_unmap_dma(iommu, arg);
>> case VFIO_IOMMU_DIRTY_PAGES:
>> return vfio_iommu_type1_dirty_pages(iommu, arg);
>> + case VFIO_IOMMU_SET_PASID_TABLE:
>> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
>> default:
>> return -ENOTTY;
>> }
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 2f313a238a8f..78ce3ce6c331 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -14,6 +14,7 @@
>>
>> #include <linux/types.h>
>> #include <linux/ioctl.h>
>> +#include <linux/iommu.h>
>>
>> #define VFIO_API_VERSION 0
>>
>> @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {
>>
>> #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
>>
>> +/*
>> + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
>> + * struct vfio_iommu_type1_set_pasid_table)
>> + *
>> + * The SET operation passes a PASID table to the host while the
>> + * UNSET operation detaches the one currently programmed. Setting
>> + * a table while another is already programmed replaces the old table.
>> + */
>> +struct vfio_iommu_type1_set_pasid_table {
>> + __u32 argsz;
>> + __u32 flags;
>> +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
>> +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
>> + struct iommu_pasid_table_config config; /* used on SET */
>> +};
>> +
>> +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
>> +
>> /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>>
>> /*
>>
>

2021-02-22 12:56:17

by Keqian Zhu

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

Hi Eric,

On 2021/2/22 18:53, Auger Eric wrote:
> Hi Keqian,
>
> On 2/2/21 1:34 PM, Keqian Zhu wrote:
>> Hi Eric,
>>
>> On 2020/11/16 19:00, Eric Auger wrote:
>>> From: "Liu, Yi L" <[email protected]>
>>>
>>> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
>>> which aims to pass the virtual iommu guest configuration
>>> to the host. This latter takes the form of the so-called
>>> PASID table.
>>>
>>> Signed-off-by: Jacob Pan <[email protected]>
>>> Signed-off-by: Liu, Yi L <[email protected]>
>>> Signed-off-by: Eric Auger <[email protected]>
>>>
>>> ---
>>> v11 -> v12:
>>> - use iommu_uapi_set_pasid_table
>>> - check SET and UNSET are not set simultaneously (Zenghui)
>>>
>>> v8 -> v9:
>>> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
>>> VFIO_IOMMU_SET_PASID_TABLE ioctl.
>>>
>>> v6 -> v7:
>>> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
>>>
>>> v3 -> v4:
>>> - restore ATTACH/DETACH
>>> - add unwind on failure
>>>
>>> v2 -> v3:
>>> - s/BIND_PASID_TABLE/SET_PASID_TABLE
>>>
>>> v1 -> v2:
>>> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
>>> - remove the struct device arg
>>> ---
>>> drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
>>> include/uapi/linux/vfio.h | 19 ++++++++++
>>> 2 files changed, 84 insertions(+)
>>>
>>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>>> index 67e827638995..87ddd9e882dc 100644
>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
>>> return ret;
>>> }
>>>
>>> +static void
>>> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
>>> +{
>>> + struct vfio_domain *d;
>>> +
>>> + mutex_lock(&iommu->lock);
>>> + list_for_each_entry(d, &iommu->domain_list, next)
>>> + iommu_detach_pasid_table(d->domain);
>>> +
>>> + mutex_unlock(&iommu->lock);
>>> +}
>>> +
>>> +static int
>>> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
>>> +{
>>> + struct vfio_domain *d;
>>> + int ret = 0;
>>> +
>>> + mutex_lock(&iommu->lock);
>>> +
>>> + list_for_each_entry(d, &iommu->domain_list, next) {
>>> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
>> This design is not very clear to me. This assumes all iommu_domains share the same pasid table.
>>
>> As I understand, it's reasonable when there is only one group in the domain, and only one domain in the vfio_iommu.
>> If more than one group in the vfio_iommu, the guest may put them into different guest iommu_domain, then they have different pasid table.
>>
>> Is this the use scenario?
>
> the vfio_iommu is attached to a container. all the groups within a
> container share the same set of page tables (linux
> Documentation/driver-api/vfio.rst). So to me if you want to use
> different pasid tables, the groups need to be attached to different
> containers. Does that make sense to you?
OK, so this is what I understand about the design. A little question is that when
we perform attach_pasid_table on a container, maybe we ought to do a sanity
check to make sure that only one group is in this container, instead of
iterating all domain?

To be frank, my main concern is that if we put each group into different container
under nested mode, then we give up the possibility that they can share stage2 page tables,
which saves host memory and reduces the time of preparing environment for VM.

To me, I'd like to understand the "container shares page table" to be:
1) share stage2 page table under nested mode.
2) share stage1 page table under non-nested mode.

As when we perform "map" on a container:
1) under nested mode, we setup stage2 mapping.
2) under non-nested mode, we setup stage1 mapping.

Indeed, to realize stage2 mapping sharing, we should do much more work to refactor
SMMU_DOMAIN...

Hope you can consider this. :)

Thanks,
Keqian

>
> Thanks
>
> Eric
>>
>> Thanks,
>> Keqian
>>
>>> + if (ret)
>>> + goto unwind;
>>> + }
>>> + goto unlock;
>>> +unwind:
>>> + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
>>> + iommu_detach_pasid_table(d->domain);
>>> + }
>>> +unlock:
>>> + mutex_unlock(&iommu->lock);
>>> + return ret;
>>> +}
>>> +
>>> static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
>>> struct vfio_info_cap *caps)
>>> {
>>> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
>>> -EFAULT : 0;
>>> }
>>>
>>> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
>>> + unsigned long arg)
>>> +{
>>> + struct vfio_iommu_type1_set_pasid_table spt;
>>> + unsigned long minsz;
>>> + int ret = -EINVAL;
>>> +
>>> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
>>> +
>>> + if (copy_from_user(&spt, (void __user *)arg, minsz))
>>> + return -EFAULT;
>>> +
>>> + if (spt.argsz < minsz)
>>> + return -EINVAL;
>>> +
>>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
>>> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
>>> + return -EINVAL;
>>> +
>>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
>>> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
>>> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
>>> + vfio_detach_pasid_table(iommu);
>>> + ret = 0;
>>> + }
>>> + return ret;
>>> +}
>>> +
>>> static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>>> unsigned long arg)
>>> {
>>> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>>> return vfio_iommu_type1_unmap_dma(iommu, arg);
>>> case VFIO_IOMMU_DIRTY_PAGES:
>>> return vfio_iommu_type1_dirty_pages(iommu, arg);
>>> + case VFIO_IOMMU_SET_PASID_TABLE:
>>> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
>>> default:
>>> return -ENOTTY;
>>> }
>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>> index 2f313a238a8f..78ce3ce6c331 100644
>>> --- a/include/uapi/linux/vfio.h
>>> +++ b/include/uapi/linux/vfio.h
>>> @@ -14,6 +14,7 @@
>>>
>>> #include <linux/types.h>
>>> #include <linux/ioctl.h>
>>> +#include <linux/iommu.h>
>>>
>>> #define VFIO_API_VERSION 0
>>>
>>> @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {
>>>
>>> #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
>>>
>>> +/*
>>> + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
>>> + * struct vfio_iommu_type1_set_pasid_table)
>>> + *
>>> + * The SET operation passes a PASID table to the host while the
>>> + * UNSET operation detaches the one currently programmed. Setting
>>> + * a table while another is already programmed replaces the old table.
>>> + */
>>> +struct vfio_iommu_type1_set_pasid_table {
>>> + __u32 argsz;
>>> + __u32 flags;
>>> +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
>>> +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
>>> + struct iommu_pasid_table_config config; /* used on SET */
>>> +};
>>> +
>>> +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
>>> +
>>> /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>>>
>>> /*
>>>
>>
>
> .
>

2021-02-22 16:17:49

by Eric Auger

[permalink] [raw]
Subject: Re: [PATCH v11 01/13] vfio: VFIO_IOMMU_SET_PASID_TABLE

Hi Keqian,

On 2/22/21 1:20 PM, Keqian Zhu wrote:
> Hi Eric,
>
> On 2021/2/22 18:53, Auger Eric wrote:
>> Hi Keqian,
>>
>> On 2/2/21 1:34 PM, Keqian Zhu wrote:
>>> Hi Eric,
>>>
>>> On 2020/11/16 19:00, Eric Auger wrote:
>>>> From: "Liu, Yi L" <[email protected]>
>>>>
>>>> This patch adds an VFIO_IOMMU_SET_PASID_TABLE ioctl
>>>> which aims to pass the virtual iommu guest configuration
>>>> to the host. This latter takes the form of the so-called
>>>> PASID table.
>>>>
>>>> Signed-off-by: Jacob Pan <[email protected]>
>>>> Signed-off-by: Liu, Yi L <[email protected]>
>>>> Signed-off-by: Eric Auger <[email protected]>
>>>>
>>>> ---
>>>> v11 -> v12:
>>>> - use iommu_uapi_set_pasid_table
>>>> - check SET and UNSET are not set simultaneously (Zenghui)
>>>>
>>>> v8 -> v9:
>>>> - Merge VFIO_IOMMU_ATTACH/DETACH_PASID_TABLE into a single
>>>> VFIO_IOMMU_SET_PASID_TABLE ioctl.
>>>>
>>>> v6 -> v7:
>>>> - add a comment related to VFIO_IOMMU_DETACH_PASID_TABLE
>>>>
>>>> v3 -> v4:
>>>> - restore ATTACH/DETACH
>>>> - add unwind on failure
>>>>
>>>> v2 -> v3:
>>>> - s/BIND_PASID_TABLE/SET_PASID_TABLE
>>>>
>>>> v1 -> v2:
>>>> - s/BIND_GUEST_STAGE/BIND_PASID_TABLE
>>>> - remove the struct device arg
>>>> ---
>>>> drivers/vfio/vfio_iommu_type1.c | 65 +++++++++++++++++++++++++++++++++
>>>> include/uapi/linux/vfio.h | 19 ++++++++++
>>>> 2 files changed, 84 insertions(+)
>>>>
>>>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>>>> index 67e827638995..87ddd9e882dc 100644
>>>> --- a/drivers/vfio/vfio_iommu_type1.c
>>>> +++ b/drivers/vfio/vfio_iommu_type1.c
>>>> @@ -2587,6 +2587,41 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
>>>> return ret;
>>>> }
>>>>
>>>> +static void
>>>> +vfio_detach_pasid_table(struct vfio_iommu *iommu)
>>>> +{
>>>> + struct vfio_domain *d;
>>>> +
>>>> + mutex_lock(&iommu->lock);
>>>> + list_for_each_entry(d, &iommu->domain_list, next)
>>>> + iommu_detach_pasid_table(d->domain);
>>>> +
>>>> + mutex_unlock(&iommu->lock);
>>>> +}
>>>> +
>>>> +static int
>>>> +vfio_attach_pasid_table(struct vfio_iommu *iommu, unsigned long arg)
>>>> +{
>>>> + struct vfio_domain *d;
>>>> + int ret = 0;
>>>> +
>>>> + mutex_lock(&iommu->lock);
>>>> +
>>>> + list_for_each_entry(d, &iommu->domain_list, next) {
>>>> + ret = iommu_uapi_attach_pasid_table(d->domain, (void __user *)arg);
>>> This design is not very clear to me. This assumes all iommu_domains share the same pasid table.
>>>
>>> As I understand, it's reasonable when there is only one group in the domain, and only one domain in the vfio_iommu.
>>> If more than one group in the vfio_iommu, the guest may put them into different guest iommu_domain, then they have different pasid table.
>>>
>>> Is this the use scenario?
>>
>> the vfio_iommu is attached to a container. all the groups within a
>> container share the same set of page tables (linux
>> Documentation/driver-api/vfio.rst). So to me if you want to use
>> different pasid tables, the groups need to be attached to different
>> containers. Does that make sense to you?
> OK, so this is what I understand about the design. A little question is that when
> we perform attach_pasid_table on a container, maybe we ought to do a sanity
> check to make sure that only one group is in this container, instead of
> iterating all domain?
>
> To be frank, my main concern is that if we put each group into different container
> under nested mode, then we give up the possibility that they can share stage2 page tables,
> which saves host memory and reduces the time of preparing environment for VM.

Referring to the QEMU integration, when you use a virtual IOMMU, there
is generally one VFIO container per viommu protected device
(AddressSpace), independently on the fact nested stage is being used. I
think the exception is if you put 2 assigned devices behind a virtual
PCIe to PCI bridge (pcie-pci-bridge), in that case they have the same
RID, they share the same QEMU AddressSpace and they are put in the same
container, if the kernel does not reject it (underlying pIOMMUs allow
it). See QEMU vfio_connect_container() in hw/vfio/common.c.

In that config, if the assigned devices belong to different groups, you
may end up with 2 groups set to the same container. But this case is not
supported by the guest kernel anyway (independently on the nested stage
integration). You hit a BUG_ON as reported a long time ago in

https://www.mail-archive.com/[email protected]/msg608047.html


>
> To me, I'd like to understand the "container shares page table" to be:
> 1) share stage2 page table under nested mode.
under nested mode they share S2 and with this design devices also share
the same PASID table. Because on the guest they are in the same group.
> 2) share stage1 page table under non-nested mode.
in non nested mode there is a single stage, by default S1.
>
> As when we perform "map" on a container:
> 1) under nested mode, we setup stage2 mapping.
> 2) under non-nested mode, we setup stage1 mapping.
right
>
> Indeed, to realize stage2 mapping sharing, we should do much more work to refactor
> SMMU_DOMAIN...

Hope this helps

Thanks

Eric
>
> Hope you can consider this. :)
>
> Thanks,
> Keqian
>
>>
>> Thanks
>>
>> Eric
>>>
>>> Thanks,
>>> Keqian
>>>
>>>> + if (ret)
>>>> + goto unwind;
>>>> + }
>>>> + goto unlock;
>>>> +unwind:
>>>> + list_for_each_entry_continue_reverse(d, &iommu->domain_list, next) {
>>>> + iommu_detach_pasid_table(d->domain);
>>>> + }
>>>> +unlock:
>>>> + mutex_unlock(&iommu->lock);
>>>> + return ret;
>>>> +}
>>>> +
>>>> static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
>>>> struct vfio_info_cap *caps)
>>>> {
>>>> @@ -2747,6 +2782,34 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
>>>> -EFAULT : 0;
>>>> }
>>>>
>>>> +static int vfio_iommu_type1_set_pasid_table(struct vfio_iommu *iommu,
>>>> + unsigned long arg)
>>>> +{
>>>> + struct vfio_iommu_type1_set_pasid_table spt;
>>>> + unsigned long minsz;
>>>> + int ret = -EINVAL;
>>>> +
>>>> + minsz = offsetofend(struct vfio_iommu_type1_set_pasid_table, flags);
>>>> +
>>>> + if (copy_from_user(&spt, (void __user *)arg, minsz))
>>>> + return -EFAULT;
>>>> +
>>>> + if (spt.argsz < minsz)
>>>> + return -EINVAL;
>>>> +
>>>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET &&
>>>> + spt.flags & VFIO_PASID_TABLE_FLAG_UNSET)
>>>> + return -EINVAL;
>>>> +
>>>> + if (spt.flags & VFIO_PASID_TABLE_FLAG_SET)
>>>> + ret = vfio_attach_pasid_table(iommu, arg + minsz);
>>>> + else if (spt.flags & VFIO_PASID_TABLE_FLAG_UNSET) {
>>>> + vfio_detach_pasid_table(iommu);
>>>> + ret = 0;
>>>> + }
>>>> + return ret;
>>>> +}
>>>> +
>>>> static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>>>> unsigned long arg)
>>>> {
>>>> @@ -2867,6 +2930,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>>>> return vfio_iommu_type1_unmap_dma(iommu, arg);
>>>> case VFIO_IOMMU_DIRTY_PAGES:
>>>> return vfio_iommu_type1_dirty_pages(iommu, arg);
>>>> + case VFIO_IOMMU_SET_PASID_TABLE:
>>>> + return vfio_iommu_type1_set_pasid_table(iommu, arg);
>>>> default:
>>>> return -ENOTTY;
>>>> }
>>>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>>>> index 2f313a238a8f..78ce3ce6c331 100644
>>>> --- a/include/uapi/linux/vfio.h
>>>> +++ b/include/uapi/linux/vfio.h
>>>> @@ -14,6 +14,7 @@
>>>>
>>>> #include <linux/types.h>
>>>> #include <linux/ioctl.h>
>>>> +#include <linux/iommu.h>
>>>>
>>>> #define VFIO_API_VERSION 0
>>>>
>>>> @@ -1180,6 +1181,24 @@ struct vfio_iommu_type1_dirty_bitmap_get {
>>>>
>>>> #define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17)
>>>>
>>>> +/*
>>>> + * VFIO_IOMMU_SET_PASID_TABLE - _IOWR(VFIO_TYPE, VFIO_BASE + 22,
>>>> + * struct vfio_iommu_type1_set_pasid_table)
>>>> + *
>>>> + * The SET operation passes a PASID table to the host while the
>>>> + * UNSET operation detaches the one currently programmed. Setting
>>>> + * a table while another is already programmed replaces the old table.
>>>> + */
>>>> +struct vfio_iommu_type1_set_pasid_table {
>>>> + __u32 argsz;
>>>> + __u32 flags;
>>>> +#define VFIO_PASID_TABLE_FLAG_SET (1 << 0)
>>>> +#define VFIO_PASID_TABLE_FLAG_UNSET (1 << 1)
>>>> + struct iommu_pasid_table_config config; /* used on SET */
>>>> +};
>>>> +
>>>> +#define VFIO_IOMMU_SET_PASID_TABLE _IO(VFIO_TYPE, VFIO_BASE + 22)
>>>> +
>>>> /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>>>>
>>>> /*
>>>>
>>>
>>
>> .
>>
>