2021-04-09 01:44:49

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

The void* drvdata parameter isn't really used in iommu_sva_bind_device()
API, the current IDXD code "borrows" the drvdata for a VT-d private flag
for supervisor SVA usage.

Supervisor/Privileged mode request is a generic feature. It should be
promoted from the VT-d vendor driver to the generic code.

This patch replaces void* drvdata with a unsigned int flags parameter
and adjusts callers accordingly.

Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
Suggested-by: Jean-Philippe Brucker <[email protected]>
Signed-off-by: Jacob Pan <[email protected]>
---
drivers/dma/idxd/cdev.c | 2 +-
drivers/dma/idxd/init.c | 6 +++---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++--
drivers/iommu/intel/Kconfig | 1 +
drivers/iommu/intel/svm.c | 18 ++++++------------
drivers/iommu/iommu.c | 9 ++++++---
drivers/misc/uacce/uacce.c | 2 +-
include/linux/intel-iommu.h | 2 +-
include/linux/intel-svm.h | 17 ++---------------
include/linux/iommu.h | 19 ++++++++++++++++---
11 files changed, 40 insertions(+), 42 deletions(-)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 0db9b82..21ec82b 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
filp->private_data = ctx;

if (device_pasid_enabled(idxd)) {
- sva = iommu_sva_bind_device(dev, current->mm, NULL);
+ sva = iommu_sva_bind_device(dev, current->mm, 0);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
dev_err(dev, "pasid allocation failed: %d\n", rc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 085a0c3..cdc85f1 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -300,13 +300,13 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev)

static int idxd_enable_system_pasid(struct idxd_device *idxd)
{
- int flags;
+ unsigned int flags;
unsigned int pasid;
struct iommu_sva *sva;

- flags = SVM_FLAG_SUPERVISOR_MODE;
+ flags = IOMMU_SVA_BIND_SUPERVISOR;

- sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
+ sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
if (IS_ERR(sva)) {
dev_warn(&idxd->pdev->dev,
"iommu sva bind failed: %ld\n", PTR_ERR(sva));
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index bb251ca..23e287e 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -354,7 +354,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
}

struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
struct iommu_sva *handle;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index f985817..b971d4d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -711,7 +711,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
- void *drvdata);
+ unsigned int flags);
void arm_smmu_sva_unbind(struct iommu_sva *handle);
u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
void arm_smmu_sva_notifier_synchronize(void);
@@ -742,7 +742,7 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
}

static inline struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
return ERR_PTR(-ENODEV);
}
diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 28a3d15..5415052 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -41,6 +41,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
+ select IOMMU_SVA_LIB
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 574a7e6..4b5f8b0 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -486,12 +486,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
} else
pasid_max = 1 << 20;

- /* Bind supervisor PASID shuld have mm = NULL */
- if (flags & SVM_FLAG_SUPERVISOR_MODE) {
- if (!ecap_srs(iommu->ecap) || mm) {
- pr_err("Supervisor PASID with user provided mm.\n");
- return -EINVAL;
- }
+ if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && !ecap_srs(iommu->ecap)) {
+ pr_err("Supervisor PASID not supported.\n");
+ return -EINVAL;
}

if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
@@ -593,7 +590,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
(cpu_feature_enabled(X86_FEATURE_LA57) ?
PASID_FLAG_FL5LP : 0));
spin_unlock_irqrestore(&iommu->lock, iflags);
@@ -620,7 +617,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
- (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
+ (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
(cpu_feature_enabled(X86_FEATURE_LA57) ?
PASID_FLAG_FL5LP : 0));
spin_unlock_irqrestore(&iommu->lock, iflags);
@@ -1059,11 +1056,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)

#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
struct iommu_sva *
-intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+intel_svm_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
struct iommu_sva *sva = ERR_PTR(-EINVAL);
struct intel_svm_dev *sdev = NULL;
- unsigned int flags = 0;
int ret;

/*
@@ -1071,8 +1067,6 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
* It will require shared SVM data structures, i.e. combine io_mm
* and intel_svm etc.
*/
- if (drvdata)
- flags = *(unsigned int *)drvdata;
mutex_lock(&pasid_mutex);
ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
if (ret)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d0b0a15..bf0a20f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2962,6 +2962,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
* iommu_sva_bind_device() - Bind a process address space to a device
* @dev: the device
* @mm: the mm to bind, caller must hold a reference to it
+ * @flags: options for the bind operation
*
* Create a bond between device and address space, allowing the device to access
* the mm using the returned PASID. If a bond already exists between @device and
@@ -2974,7 +2975,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
* On error, returns an ERR_PTR value.
*/
struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
struct iommu_group *group;
struct iommu_sva *handle = ERR_PTR(-EINVAL);
@@ -2987,6 +2988,9 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
if (!group)
return ERR_PTR(-ENODEV);

+ /* Supervisor SVA does not need the current mm */
+ if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
+ return ERR_PTR(-EINVAL);
/* Ensure device count and domain don't change while we're binding */
mutex_lock(&group->mutex);

@@ -2999,8 +3003,7 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
if (iommu_group_device_count(group) != 1)
goto out_unlock;

- handle = ops->sva_bind(dev, mm, drvdata);
-
+ handle = ops->sva_bind(dev, mm, flags);
out_unlock:
mutex_unlock(&group->mutex);
iommu_group_put(group);
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index d07af4e..27e0e04 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
if (!(uacce->flags & UACCE_DEV_SVA))
return 0;

- handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
+ handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
if (IS_ERR(handle))
return PTR_ERR(handle);

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 1bc46b8..cdff752 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -757,7 +757,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
struct iommu_gpasid_bind_data *data);
int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
- void *drvdata);
+ unsigned int flags);
void intel_svm_unbind(struct iommu_sva *handle);
u32 intel_svm_get_pasid(struct iommu_sva *handle);
int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 39d368a..ef6b753 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -30,30 +30,17 @@ struct svm_dev_ops {
* if there is no other way to do so. It should be used sparingly, if at all.
*/
#define SVM_FLAG_PRIVATE_PASID (1<<0)
-
-/*
- * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
- */
-#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
/*
* The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
* processes. Compared to the host bind, the primary differences are:
* 1. mm life cycle management
* 2. fault reporting
*/
-#define SVM_FLAG_GUEST_MODE (1<<2)
+#define SVM_FLAG_GUEST_MODE (1<<1)
/*
* The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space,
* which requires guest and host PASID translation at both directions.
*/
-#define SVM_FLAG_GUEST_PASID (1<<3)
+#define SVM_FLAG_GUEST_PASID (1<<2)

#endif /* __INTEL_SVM_H__ */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5e7fe51..a3fbaa2 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -166,6 +166,19 @@ enum iommu_dev_features {

#ifdef CONFIG_IOMMU_API

+/*
+ * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be used only
+ * for access to kernel addresses. No IOTLB flushes are automatically done
+ * for kernel mappings; it is valid only for access to the kernel's static
+ * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
+ * A future API addition may permit the use of such ranges, by means of an
+ * explicit IOTLB flush call (akin to the DMA API's unmap method).
+ *
+ * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
+ * do such IOTLB flushes automatically.
+ */
+#define IOMMU_SVA_BIND_SUPERVISOR BIT(0)
+
/**
* struct iommu_iotlb_gather - Range information for a pending IOTLB flush
*
@@ -287,7 +300,7 @@ struct iommu_ops {
int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev);

struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
- void *drvdata);
+ unsigned int flags);
void (*sva_unbind)(struct iommu_sva *handle);
u32 (*sva_get_pasid)(struct iommu_sva *handle);

@@ -640,7 +653,7 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);

struct iommu_sva *iommu_sva_bind_device(struct device *dev,
struct mm_struct *mm,
- void *drvdata);
+ unsigned int flags);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);

@@ -1015,7 +1028,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
}

static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
return NULL;
}
--
2.7.4


2021-04-09 01:47:27

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

The mm parameter in iommu_sva_bind_device() is intended for privileged
process perform bind() on behalf of other processes. This use case has
yet to be materialized, let alone potential security implications of
adding kernel hooks without explicit user consent.
In addition, with the agreement that IOASID allocation shall be subject
cgroup limit. It will be inline with misc cgroup proposal if IOASID
allocation as part of the SVA bind is limited to the current task.

Link: https://lore.kernel.org/linux-iommu/20210303160205.151d114e@jacob-builder/
Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
Signed-off-by: Jacob Pan <[email protected]>
---
drivers/dma/idxd/cdev.c | 2 +-
drivers/dma/idxd/init.c | 2 +-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++---
drivers/iommu/iommu-sva-lib.c | 30 +++++++++++++++++++------
drivers/iommu/iommu-sva-lib.h | 4 ++--
drivers/iommu/iommu.c | 16 ++++++++-----
drivers/misc/uacce/uacce.c | 2 +-
include/linux/iommu.h | 7 +++---
8 files changed, 45 insertions(+), 24 deletions(-)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 21ec82b..8c3347c 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
filp->private_data = ctx;

if (device_pasid_enabled(idxd)) {
- sva = iommu_sva_bind_device(dev, current->mm, 0);
+ sva = iommu_sva_bind_device(dev, 0);
if (IS_ERR(sva)) {
rc = PTR_ERR(sva);
dev_err(dev, "pasid allocation failed: %d\n", rc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index cdc85f1..a583f79 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -306,7 +306,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd)

flags = IOMMU_SVA_BIND_SUPERVISOR;

- sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
+ sva = iommu_sva_bind_device(&idxd->pdev->dev, flags);
if (IS_ERR(sva)) {
dev_warn(&idxd->pdev->dev,
"iommu sva bind failed: %ld\n", PTR_ERR(sva));
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 23e287e..bdd5c79 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -329,7 +329,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
return ERR_PTR(-ENOMEM);

/* Allocate a PASID for this mm if necessary */
- ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1);
+ ret = iommu_sva_alloc_pasid(1, (1U << master->ssid_bits) - 1);
if (ret)
goto err_free_bond;

@@ -347,7 +347,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
return &bond->sva;

err_free_pasid:
- iommu_sva_free_pasid(mm);
+ iommu_sva_free_pasid();
err_free_bond:
kfree(bond);
return ERR_PTR(ret);
@@ -377,7 +377,7 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
if (refcount_dec_and_test(&bond->refs)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
- iommu_sva_free_pasid(bond->mm);
+ iommu_sva_free_pasid();
kfree(bond);
}
mutex_unlock(&sva_lock);
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index bd41405..bd99f6b 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -12,27 +12,33 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);

/**
* iommu_sva_alloc_pasid - Allocate a PASID for the mm
- * @mm: the mm
* @min: minimum PASID value (inclusive)
* @max: maximum PASID value (inclusive)
*
- * Try to allocate a PASID for this mm, or take a reference to the existing one
- * provided it fits within the [@min, @max] range. On success the PASID is
- * available in mm->pasid, and must be released with iommu_sva_free_pasid().
+ * Try to allocate a PASID for the current mm, or take a reference to the
+ * existing one provided it fits within the [@min, @max] range. On success
+ * the PASID is available in the current mm->pasid, and must be released with
+ * iommu_sva_free_pasid().
* @min must be greater than 0, because 0 indicates an unused mm->pasid.
*
* Returns 0 on success and < 0 on error.
*/
-int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
+int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max)
{
int ret = 0;
ioasid_t pasid;
+ struct mm_struct *mm;

if (min == INVALID_IOASID || max == INVALID_IOASID ||
min == 0 || max < min)
return -EINVAL;

mutex_lock(&iommu_sva_lock);
+ mm = get_task_mm(current);
+ if (!mm) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
if (mm->pasid) {
if (mm->pasid >= min && mm->pasid <= max)
ioasid_get(mm->pasid);
@@ -45,22 +51,32 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
else
mm->pasid = pasid;
}
+ mmput(mm);
+out_unlock:
mutex_unlock(&iommu_sva_lock);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);

/**
- * iommu_sva_free_pasid - Release the mm's PASID
+ * iommu_sva_free_pasid - Release the current mm's PASID
* @mm: the mm
*
* Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
*/
-void iommu_sva_free_pasid(struct mm_struct *mm)
+void iommu_sva_free_pasid(void)
{
+ struct mm_struct *mm;
+
mutex_lock(&iommu_sva_lock);
+ mm = get_task_mm(current);
+ if (!mm)
+ goto out_unlock;
+
if (ioasid_put(mm->pasid))
mm->pasid = 0;
+ mmput(mm);
+out_unlock:
mutex_unlock(&iommu_sva_lock);
}
EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index b40990a..278b8b4 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -8,8 +8,8 @@
#include <linux/ioasid.h>
#include <linux/mm_types.h>

-int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
-void iommu_sva_free_pasid(struct mm_struct *mm);
+int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max);
+void iommu_sva_free_pasid(void);
struct mm_struct *iommu_sva_find(ioasid_t pasid);

#endif /* _IOMMU_SVA_LIB_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bf0a20f..25840e6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -23,6 +23,7 @@
#include <linux/property.h>
#include <linux/fsl/mc.h>
#include <linux/module.h>
+#include <linux/sched/mm.h>
#include <trace/events/iommu.h>

static struct kset *iommu_group_kset;
@@ -2959,9 +2960,8 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);

/**
- * iommu_sva_bind_device() - Bind a process address space to a device
+ * iommu_sva_bind_device() - Bind the current process address space to a device
* @dev: the device
- * @mm: the mm to bind, caller must hold a reference to it
* @flags: options for the bind operation
*
* Create a bond between device and address space, allowing the device to access
@@ -2975,9 +2975,10 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
* On error, returns an ERR_PTR value.
*/
struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
+iommu_sva_bind_device(struct device *dev, unsigned int flags)
{
struct iommu_group *group;
+ struct mm_struct *mm = NULL;
struct iommu_sva *handle = ERR_PTR(-EINVAL);
const struct iommu_ops *ops = dev->bus->iommu_ops;

@@ -2989,8 +2990,11 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int fla
return ERR_PTR(-ENODEV);

/* Supervisor SVA does not need the current mm */
- if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
- return ERR_PTR(-EINVAL);
+ if (!(flags & IOMMU_SVA_BIND_SUPERVISOR)) {
+ mm = get_task_mm(current);
+ if (!mm)
+ return ERR_PTR(-EINVAL);
+ }
/* Ensure device count and domain don't change while we're binding */
mutex_lock(&group->mutex);

@@ -3004,6 +3008,8 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int fla
goto out_unlock;

handle = ops->sva_bind(dev, mm, flags);
+ if (mm)
+ mmput(mm);
out_unlock:
mutex_unlock(&group->mutex);
iommu_group_put(group);
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index 27e0e04..da4401a 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
if (!(uacce->flags & UACCE_DEV_SVA))
return 0;

- handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
+ handle = iommu_sva_bind_device(uacce->parent, 0);
if (IS_ERR(handle))
return PTR_ERR(handle);

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a3fbaa2..cf752f3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -231,8 +231,8 @@ struct iommu_iotlb_gather {
* @dev_feat_enabled: check enabled feature
* @aux_attach/detach_dev: aux-domain specific attach/detach entries.
* @aux_get_pasid: get the pasid given an aux-domain
- * @sva_bind: Bind process address space to device
- * @sva_unbind: Unbind process address space from device
+ * @sva_bind: Bind the current process address space to device
+ * @sva_unbind: Unbind the current process address space from device
* @sva_get_pasid: Get PASID associated to a SVA handle
* @page_response: handle page request response
* @cache_invalidate: invalidate translation caches
@@ -652,7 +652,6 @@ void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev);
int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);

struct iommu_sva *iommu_sva_bind_device(struct device *dev,
- struct mm_struct *mm,
unsigned int flags);
void iommu_sva_unbind_device(struct iommu_sva *handle);
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
@@ -1028,7 +1027,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
}

static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
+iommu_sva_bind_device(struct device *dev, unsigned int flags)
{
return NULL;
}
--
2.7.4

2021-04-09 10:18:25

by Jean-Philippe Brucker

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

On Thu, Apr 08, 2021 at 10:08:56AM -0700, Jacob Pan wrote:
> diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
> index bd41405..bd99f6b 100644
> --- a/drivers/iommu/iommu-sva-lib.c
> +++ b/drivers/iommu/iommu-sva-lib.c
> @@ -12,27 +12,33 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
>
> /**
> * iommu_sva_alloc_pasid - Allocate a PASID for the mm
> - * @mm: the mm
> * @min: minimum PASID value (inclusive)
> * @max: maximum PASID value (inclusive)
> *
> - * Try to allocate a PASID for this mm, or take a reference to the existing one
> - * provided it fits within the [@min, @max] range. On success the PASID is
> - * available in mm->pasid, and must be released with iommu_sva_free_pasid().
> + * Try to allocate a PASID for the current mm, or take a reference to the
> + * existing one provided it fits within the [@min, @max] range. On success
> + * the PASID is available in the current mm->pasid, and must be released with
> + * iommu_sva_free_pasid().
> * @min must be greater than 0, because 0 indicates an unused mm->pasid.
> *
> * Returns 0 on success and < 0 on error.
> */
> -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
> +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max)
> {
> int ret = 0;
> ioasid_t pasid;
> + struct mm_struct *mm;
>
> if (min == INVALID_IOASID || max == INVALID_IOASID ||
> min == 0 || max < min)
> return -EINVAL;
>
> mutex_lock(&iommu_sva_lock);
> + mm = get_task_mm(current);
> + if (!mm) {
> + ret = -EINVAL;
> + goto out_unlock;
> + }

I still think it would be more elegant to keep the choice of context in
iommu_sva_bind_device() and pass it down to leaf functions such as
iommu_sva_alloc_pasid(). The patch is trying to solve two separate
problems:

* We don't have a use-case for binding the mm of a remote process (and
it's supposedly difficult for device drivers to do it securely). So OK,
we remove the mm argument from iommu_sva_bind_device() and use the
current mm. But the IOMMU driver isn't going to do get_task_mm(current)
every time it needs the mm being bound, it will take it from
iommu_sva_bind_device(). Likewise iommu_sva_alloc_pasid() shouldn't need
to bother with get_task_mm().

* cgroup accounting for IOASIDs needs to be on the current task. Removing
the mm parameter from iommu_sva_alloc_pasid() doesn't help with that.
Sure it indicates that iommu_sva_alloc_pasid() needs a specific task
context but that's only for cgroup purpose, and I'd rather pass the
cgroup down from iommu_sva_bind_device() anyway (but am fine with
keeping it within ioasid_alloc() for now). Plus it's an internal helper,
easy for us to check that the callers are doing the right thing.

> if (mm->pasid) {
> if (mm->pasid >= min && mm->pasid <= max)
> ioasid_get(mm->pasid);
> @@ -45,22 +51,32 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
> else
> mm->pasid = pasid;
> }
> + mmput(mm);
> +out_unlock:
> mutex_unlock(&iommu_sva_lock);
> return ret;
> }
> EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
>
> /**
> - * iommu_sva_free_pasid - Release the mm's PASID
> + * iommu_sva_free_pasid - Release the current mm's PASID
> * @mm: the mm
> *
> * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
> */
> -void iommu_sva_free_pasid(struct mm_struct *mm)
> +void iommu_sva_free_pasid(void)
> {
> + struct mm_struct *mm;
> +
> mutex_lock(&iommu_sva_lock);
> + mm = get_task_mm(current);
> + if (!mm)
> + goto out_unlock;
> +

More importantly, could we at least dissociate free_pasid() from the
current process? Otherwise drivers can't clean up from a workqueue (as
amdkfd does) or from an rcu callback. Given that iommu_sva_unbind_device()
takes the SVA handle owned by whomever did bind(), there shouldn't be any
security issue. For the cgroup problem, ioasid.c could internally keep
track of the cgroup used during allocation rather than assuming the
context of ioasid_put() is the same as ioasid_get()

> if (ioasid_put(mm->pasid))
> mm->pasid = 0;
> + mmput(mm);
> +out_unlock:
> mutex_unlock(&iommu_sva_lock);
> }
> EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
> diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
> index b40990a..278b8b4 100644
> --- a/drivers/iommu/iommu-sva-lib.h
> +++ b/drivers/iommu/iommu-sva-lib.h
> @@ -8,8 +8,8 @@
> #include <linux/ioasid.h>
> #include <linux/mm_types.h>
>
> -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
> -void iommu_sva_free_pasid(struct mm_struct *mm);
> +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max);
> +void iommu_sva_free_pasid(void);
> struct mm_struct *iommu_sva_find(ioasid_t pasid);
>
> #endif /* _IOMMU_SVA_LIB_H */
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index bf0a20f..25840e6 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -23,6 +23,7 @@
> #include <linux/property.h>
> #include <linux/fsl/mc.h>
> #include <linux/module.h>
> +#include <linux/sched/mm.h>
> #include <trace/events/iommu.h>
>
> static struct kset *iommu_group_kset;
> @@ -2959,9 +2960,8 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
> EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
>
> /**
> - * iommu_sva_bind_device() - Bind a process address space to a device
> + * iommu_sva_bind_device() - Bind the current process address space to a device
> * @dev: the device
> - * @mm: the mm to bind, caller must hold a reference to it
> * @flags: options for the bind operation
> *
> * Create a bond between device and address space, allowing the device to access

There is another reference to @mm to remove in the function description

> @@ -2975,9 +2975,10 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> * On error, returns an ERR_PTR value.
> */
> struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> +iommu_sva_bind_device(struct device *dev, unsigned int flags)
> {
> struct iommu_group *group;
> + struct mm_struct *mm = NULL;
> struct iommu_sva *handle = ERR_PTR(-EINVAL);
> const struct iommu_ops *ops = dev->bus->iommu_ops;
>
> @@ -2989,8 +2990,11 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int fla
> return ERR_PTR(-ENODEV);
>
> /* Supervisor SVA does not need the current mm */
> - if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> - return ERR_PTR(-EINVAL);
> + if (!(flags & IOMMU_SVA_BIND_SUPERVISOR)) {
> + mm = get_task_mm(current);
> + if (!mm)
> + return ERR_PTR(-EINVAL);
> + }
> /* Ensure device count and domain don't change while we're binding */
> mutex_lock(&group->mutex);
>
> @@ -3004,6 +3008,8 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int fla
> goto out_unlock;
>
> handle = ops->sva_bind(dev, mm, flags);
> + if (mm)
> + mmput(mm);
> out_unlock:
> mutex_unlock(&group->mutex);
> iommu_group_put(group);
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index 27e0e04..da4401a 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
> if (!(uacce->flags & UACCE_DEV_SVA))
> return 0;
>
> - handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> + handle = iommu_sva_bind_device(uacce->parent, 0);
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index a3fbaa2..cf752f3 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -231,8 +231,8 @@ struct iommu_iotlb_gather {
> * @dev_feat_enabled: check enabled feature
> * @aux_attach/detach_dev: aux-domain specific attach/detach entries.
> * @aux_get_pasid: get the pasid given an aux-domain
> - * @sva_bind: Bind process address space to device
> - * @sva_unbind: Unbind process address space from device
> + * @sva_bind: Bind the current process address space to device
> + * @sva_unbind: Unbind the current process address space from device

These don't need changing since we're still passing the mm down to the
drivers

Thanks,
Jean

> * @sva_get_pasid: Get PASID associated to a SVA handle
> * @page_response: handle page request response
> * @cache_invalidate: invalidate translation caches
> @@ -652,7 +652,6 @@ void iommu_aux_detach_device(struct iommu_domain *domain, struct device *dev);
> int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);
>
> struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> - struct mm_struct *mm,
> unsigned int flags);
> void iommu_sva_unbind_device(struct iommu_sva *handle);
> u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> @@ -1028,7 +1027,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
> }
>
> static inline struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> +iommu_sva_bind_device(struct device *dev, unsigned int flags)
> {
> return NULL;
> }
> --
> 2.7.4
>

2021-04-09 10:24:06

by Jean-Philippe Brucker

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

On Thu, Apr 08, 2021 at 10:08:55AM -0700, Jacob Pan wrote:
> The void* drvdata parameter isn't really used in iommu_sva_bind_device()
> API,

Right, it used to be a cookie passed to the device driver in the exit_mm()
callback, but that went away with edcc40d2ab5f ("iommu: Remove
iommu_sva_ops::mm_exit()")

> the current IDXD code "borrows" the drvdata for a VT-d private flag
> for supervisor SVA usage.
>
> Supervisor/Privileged mode request is a generic feature. It should be
> promoted from the VT-d vendor driver to the generic code.
>
> This patch replaces void* drvdata with a unsigned int flags parameter
> and adjusts callers accordingly.

Thanks for cleaning this up. Making flags unsigned long seems more common
(I suggested int without thinking). But it doesn't matter much, we won't
get to 32 flags.

>
> Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
> Suggested-by: Jean-Philippe Brucker <[email protected]>
> Signed-off-by: Jacob Pan <[email protected]>
> ---
> drivers/dma/idxd/cdev.c | 2 +-
> drivers/dma/idxd/init.c | 6 +++---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++--
> drivers/iommu/intel/Kconfig | 1 +
> drivers/iommu/intel/svm.c | 18 ++++++------------
> drivers/iommu/iommu.c | 9 ++++++---
> drivers/misc/uacce/uacce.c | 2 +-
> include/linux/intel-iommu.h | 2 +-
> include/linux/intel-svm.h | 17 ++---------------
> include/linux/iommu.h | 19 ++++++++++++++++---
> 11 files changed, 40 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
> index 0db9b82..21ec82b 100644
> --- a/drivers/dma/idxd/cdev.c
> +++ b/drivers/dma/idxd/cdev.c
> @@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
> filp->private_data = ctx;
>
> if (device_pasid_enabled(idxd)) {
> - sva = iommu_sva_bind_device(dev, current->mm, NULL);
> + sva = iommu_sva_bind_device(dev, current->mm, 0);
> if (IS_ERR(sva)) {
> rc = PTR_ERR(sva);
> dev_err(dev, "pasid allocation failed: %d\n", rc);
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index 085a0c3..cdc85f1 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -300,13 +300,13 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev)
>
> static int idxd_enable_system_pasid(struct idxd_device *idxd)
> {
> - int flags;
> + unsigned int flags;
> unsigned int pasid;
> struct iommu_sva *sva;
>
> - flags = SVM_FLAG_SUPERVISOR_MODE;
> + flags = IOMMU_SVA_BIND_SUPERVISOR;
>
> - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
> + sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
> if (IS_ERR(sva)) {
> dev_warn(&idxd->pdev->dev,
> "iommu sva bind failed: %ld\n", PTR_ERR(sva));
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index bb251ca..23e287e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -354,7 +354,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
> }
>
> struct iommu_sva *
> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)

Could you add a check on flags:

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index bb251cab61f3..145ceb2fc5da 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -354,12 +354,15 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
}

struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
{
struct iommu_sva *handle;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);

+ if (flags)
+ return ERR_PTR(-EINVAL);
+
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
return ERR_PTR(-EINVAL);



> {
> struct iommu_sva *handle;
> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index f985817..b971d4d 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -711,7 +711,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
> int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
> int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
> struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void arm_smmu_sva_unbind(struct iommu_sva *handle);
> u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
> void arm_smmu_sva_notifier_synchronize(void);
> @@ -742,7 +742,7 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
> }
>
> static inline struct iommu_sva *
> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> return ERR_PTR(-ENODEV);
> }
> diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
> index 28a3d15..5415052 100644
> --- a/drivers/iommu/intel/Kconfig
> +++ b/drivers/iommu/intel/Kconfig
> @@ -41,6 +41,7 @@ config INTEL_IOMMU_SVM
> select PCI_PRI
> select MMU_NOTIFIER
> select IOASID
> + select IOMMU_SVA_LIB

Not needed here?

> help
> Shared Virtual Memory (SVM) provides a facility for devices
> to access DMA resources through process address space by
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 574a7e6..4b5f8b0 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -486,12 +486,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> } else
> pasid_max = 1 << 20;
>
> - /* Bind supervisor PASID shuld have mm = NULL */
> - if (flags & SVM_FLAG_SUPERVISOR_MODE) {
> - if (!ecap_srs(iommu->ecap) || mm) {
> - pr_err("Supervisor PASID with user provided mm.\n");
> - return -EINVAL;
> - }
> + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && !ecap_srs(iommu->ecap)) {
> + pr_err("Supervisor PASID not supported.\n");
> + return -EINVAL;
> }
>
> if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
> @@ -593,7 +590,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> ret = intel_pasid_setup_first_level(iommu, dev,
> mm ? mm->pgd : init_mm.pgd,
> svm->pasid, FLPT_DEFAULT_DID,
> - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
> (cpu_feature_enabled(X86_FEATURE_LA57) ?
> PASID_FLAG_FL5LP : 0));
> spin_unlock_irqrestore(&iommu->lock, iflags);
> @@ -620,7 +617,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> ret = intel_pasid_setup_first_level(iommu, dev,
> mm ? mm->pgd : init_mm.pgd,
> svm->pasid, FLPT_DEFAULT_DID,
> - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
> (cpu_feature_enabled(X86_FEATURE_LA57) ?
> PASID_FLAG_FL5LP : 0));
> spin_unlock_irqrestore(&iommu->lock, iflags);
> @@ -1059,11 +1056,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>
> #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
> struct iommu_sva *
> -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +intel_svm_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> struct iommu_sva *sva = ERR_PTR(-EINVAL);
> struct intel_svm_dev *sdev = NULL;
> - unsigned int flags = 0;
> int ret;
>
> /*
> @@ -1071,8 +1067,6 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> * It will require shared SVM data structures, i.e. combine io_mm
> * and intel_svm etc.
> */
> - if (drvdata)
> - flags = *(unsigned int *)drvdata;
> mutex_lock(&pasid_mutex);
> ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
> if (ret)
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index d0b0a15..bf0a20f 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2962,6 +2962,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> * iommu_sva_bind_device() - Bind a process address space to a device
> * @dev: the device
> * @mm: the mm to bind, caller must hold a reference to it
> + * @flags: options for the bind operation

Could also specify valid flags "IOMMU_SVA_BIND_*"

> *
> * Create a bond between device and address space, allowing the device to access
> * the mm using the returned PASID. If a bond already exists between @device and
> @@ -2974,7 +2975,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> * On error, returns an ERR_PTR value.
> */
> struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> struct iommu_group *group;
> struct iommu_sva *handle = ERR_PTR(-EINVAL);
> @@ -2987,6 +2988,9 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> if (!group)
> return ERR_PTR(-ENODEV);
>
> + /* Supervisor SVA does not need the current mm */
> + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> + return ERR_PTR(-EINVAL);
> /* Ensure device count and domain don't change while we're binding */
> mutex_lock(&group->mutex);
>
> @@ -2999,8 +3003,7 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> if (iommu_group_device_count(group) != 1)
> goto out_unlock;
>
> - handle = ops->sva_bind(dev, mm, drvdata);
> -
> + handle = ops->sva_bind(dev, mm, flags);
> out_unlock:
> mutex_unlock(&group->mutex);
> iommu_group_put(group);
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index d07af4e..27e0e04 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
> if (!(uacce->flags & UACCE_DEV_SVA))
> return 0;
>
> - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
> + handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 1bc46b8..cdff752 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -757,7 +757,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
> struct iommu_gpasid_bind_data *data);
> int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
> struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void intel_svm_unbind(struct iommu_sva *handle);
> u32 intel_svm_get_pasid(struct iommu_sva *handle);
> int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
> diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
> index 39d368a..ef6b753 100644
> --- a/include/linux/intel-svm.h
> +++ b/include/linux/intel-svm.h
> @@ -30,30 +30,17 @@ struct svm_dev_ops {
> * if there is no other way to do so. It should be used sparingly, if at all.
> */
> #define SVM_FLAG_PRIVATE_PASID (1<<0)
> -
> -/*
> - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
> - * for access to kernel addresses. No IOTLB flushes are automatically done
> - * for kernel mappings; it is valid only for access to the kernel's static
> - * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
> - * A future API addition may permit the use of such ranges, by means of an
> - * explicit IOTLB flush call (akin to the DMA API's unmap method).
> - *
> - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
> - * do such IOTLB flushes automatically.
> - */
> -#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
> /*
> * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
> * processes. Compared to the host bind, the primary differences are:
> * 1. mm life cycle management
> * 2. fault reporting
> */
> -#define SVM_FLAG_GUEST_MODE (1<<2)
> +#define SVM_FLAG_GUEST_MODE (1<<1)
> /*
> * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space,
> * which requires guest and host PASID translation at both directions.
> */
> -#define SVM_FLAG_GUEST_PASID (1<<3)
> +#define SVM_FLAG_GUEST_PASID (1<<2)
>
> #endif /* __INTEL_SVM_H__ */
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e7fe51..a3fbaa2 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -166,6 +166,19 @@ enum iommu_dev_features {
>
> #ifdef CONFIG_IOMMU_API
>
> +/*
> + * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be used only
> + * for access to kernel addresses. No IOTLB flushes are automatically done
> + * for kernel mappings; it is valid only for access to the kernel's static
> + * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
> + * A future API addition may permit the use of such ranges, by means of an
> + * explicit IOTLB flush call (akin to the DMA API's unmap method).
> + *
> + * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
> + * do such IOTLB flushes automatically.
> + */
> +#define IOMMU_SVA_BIND_SUPERVISOR BIT(0)
> +

It needs to be defined before the #ifdef CONFIG_IOMMU_API, otherwise
drivers using it won't build with !CONFIG_IOMMU_API

Thanks,
Jean

> /**
> * struct iommu_iotlb_gather - Range information for a pending IOTLB flush
> *
> @@ -287,7 +300,7 @@ struct iommu_ops {
> int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev);
>
> struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void (*sva_unbind)(struct iommu_sva *handle);
> u32 (*sva_get_pasid)(struct iommu_sva *handle);
>
> @@ -640,7 +653,7 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);
>
> struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void iommu_sva_unbind_device(struct iommu_sva *handle);
> u32 iommu_sva_get_pasid(struct iommu_sva *handle);
>
> @@ -1015,7 +1028,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
> }
>
> static inline struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> return NULL;
> }
> --
> 2.7.4
>

2021-04-09 12:26:19

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

Hi Jacob,

On 2021/4/9 1:08, Jacob Pan wrote:
> The void* drvdata parameter isn't really used in iommu_sva_bind_device()
> API, the current IDXD code "borrows" the drvdata for a VT-d private flag
> for supervisor SVA usage.
>
> Supervisor/Privileged mode request is a generic feature. It should be
> promoted from the VT-d vendor driver to the generic code.
>
> This patch replaces void* drvdata with a unsigned int flags parameter
> and adjusts callers accordingly.
>
> Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
> Suggested-by: Jean-Philippe Brucker <[email protected]>
> Signed-off-by: Jacob Pan <[email protected]>
> ---
> drivers/dma/idxd/cdev.c | 2 +-
> drivers/dma/idxd/init.c | 6 +++---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++--
> drivers/iommu/intel/Kconfig | 1 +
> drivers/iommu/intel/svm.c | 18 ++++++------------
> drivers/iommu/iommu.c | 9 ++++++---
> drivers/misc/uacce/uacce.c | 2 +-
> include/linux/intel-iommu.h | 2 +-
> include/linux/intel-svm.h | 17 ++---------------
> include/linux/iommu.h | 19 ++++++++++++++++---
> 11 files changed, 40 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
> index 0db9b82..21ec82b 100644
> --- a/drivers/dma/idxd/cdev.c
> +++ b/drivers/dma/idxd/cdev.c
> @@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
> filp->private_data = ctx;
>
> if (device_pasid_enabled(idxd)) {
> - sva = iommu_sva_bind_device(dev, current->mm, NULL);
> + sva = iommu_sva_bind_device(dev, current->mm, 0);
> if (IS_ERR(sva)) {
> rc = PTR_ERR(sva);
> dev_err(dev, "pasid allocation failed: %d\n", rc);
> diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> index 085a0c3..cdc85f1 100644
> --- a/drivers/dma/idxd/init.c
> +++ b/drivers/dma/idxd/init.c
> @@ -300,13 +300,13 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev)
>
> static int idxd_enable_system_pasid(struct idxd_device *idxd)
> {
> - int flags;
> + unsigned int flags;
> unsigned int pasid;
> struct iommu_sva *sva;
>
> - flags = SVM_FLAG_SUPERVISOR_MODE;
> + flags = IOMMU_SVA_BIND_SUPERVISOR;

With SVM_FLAG_SUPERVISOR_MODE removed, I guess

#include <linux/intel-svm.h>

in this file could be removed as well.

>
> - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
> + sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
> if (IS_ERR(sva)) {
> dev_warn(&idxd->pdev->dev,
> "iommu sva bind failed: %ld\n", PTR_ERR(sva));
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index bb251ca..23e287e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -354,7 +354,7 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
> }
>
> struct iommu_sva *
> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> struct iommu_sva *handle;
> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index f985817..b971d4d 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -711,7 +711,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
> int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
> int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
> struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void arm_smmu_sva_unbind(struct iommu_sva *handle);
> u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
> void arm_smmu_sva_notifier_synchronize(void);
> @@ -742,7 +742,7 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
> }
>
> static inline struct iommu_sva *
> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> return ERR_PTR(-ENODEV);
> }
> diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
> index 28a3d15..5415052 100644
> --- a/drivers/iommu/intel/Kconfig
> +++ b/drivers/iommu/intel/Kconfig
> @@ -41,6 +41,7 @@ config INTEL_IOMMU_SVM
> select PCI_PRI
> select MMU_NOTIFIER
> select IOASID
> + select IOMMU_SVA_LIB

Why?

> help
> Shared Virtual Memory (SVM) provides a facility for devices
> to access DMA resources through process address space by
> diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> index 574a7e6..4b5f8b0 100644
> --- a/drivers/iommu/intel/svm.c
> +++ b/drivers/iommu/intel/svm.c
> @@ -486,12 +486,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> } else
> pasid_max = 1 << 20;
>
> - /* Bind supervisor PASID shuld have mm = NULL */
> - if (flags & SVM_FLAG_SUPERVISOR_MODE) {
> - if (!ecap_srs(iommu->ecap) || mm) {
> - pr_err("Supervisor PASID with user provided mm.\n");
> - return -EINVAL;
> - }
> + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && !ecap_srs(iommu->ecap)) {
> + pr_err("Supervisor PASID not supported.\n");
> + return -EINVAL;
> }
>
> if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
> @@ -593,7 +590,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> ret = intel_pasid_setup_first_level(iommu, dev,
> mm ? mm->pgd : init_mm.pgd,
> svm->pasid, FLPT_DEFAULT_DID,
> - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |

NO. PASID_FLAG_SUPERVISOR_MODE is an internal flag to tell the helper
whether the SRE bit should be set in the PASID entry.

> (cpu_feature_enabled(X86_FEATURE_LA57) ?
> PASID_FLAG_FL5LP : 0));
> spin_unlock_irqrestore(&iommu->lock, iflags);
> @@ -620,7 +617,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
> ret = intel_pasid_setup_first_level(iommu, dev,
> mm ? mm->pgd : init_mm.pgd,
> svm->pasid, FLPT_DEFAULT_DID,
> - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |

The same as above.

> (cpu_feature_enabled(X86_FEATURE_LA57) ?
> PASID_FLAG_FL5LP : 0));
> spin_unlock_irqrestore(&iommu->lock, iflags);
> @@ -1059,11 +1056,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
>
> #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
> struct iommu_sva *
> -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> +intel_svm_bind(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> struct iommu_sva *sva = ERR_PTR(-EINVAL);
> struct intel_svm_dev *sdev = NULL;
> - unsigned int flags = 0;
> int ret;
>
> /*
> @@ -1071,8 +1067,6 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> * It will require shared SVM data structures, i.e. combine io_mm
> * and intel_svm etc.
> */
> - if (drvdata)
> - flags = *(unsigned int *)drvdata;
> mutex_lock(&pasid_mutex);
> ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
> if (ret)
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index d0b0a15..bf0a20f 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2962,6 +2962,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> * iommu_sva_bind_device() - Bind a process address space to a device
> * @dev: the device
> * @mm: the mm to bind, caller must hold a reference to it
> + * @flags: options for the bind operation
> *
> * Create a bond between device and address space, allowing the device to access
> * the mm using the returned PASID. If a bond already exists between @device and
> @@ -2974,7 +2975,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> * On error, returns an ERR_PTR value.
> */
> struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> struct iommu_group *group;
> struct iommu_sva *handle = ERR_PTR(-EINVAL);
> @@ -2987,6 +2988,9 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> if (!group)
> return ERR_PTR(-ENODEV);
>
> + /* Supervisor SVA does not need the current mm */
> + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> + return ERR_PTR(-EINVAL);
> /* Ensure device count and domain don't change while we're binding */
> mutex_lock(&group->mutex);
>
> @@ -2999,8 +3003,7 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> if (iommu_group_device_count(group) != 1)
> goto out_unlock;
>
> - handle = ops->sva_bind(dev, mm, drvdata);
> -
> + handle = ops->sva_bind(dev, mm, flags);
> out_unlock:
> mutex_unlock(&group->mutex);
> iommu_group_put(group);
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index d07af4e..27e0e04 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
> if (!(uacce->flags & UACCE_DEV_SVA))
> return 0;
>
> - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
> + handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> if (IS_ERR(handle))
> return PTR_ERR(handle);
>
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 1bc46b8..cdff752 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -757,7 +757,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
> struct iommu_gpasid_bind_data *data);
> int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
> struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void intel_svm_unbind(struct iommu_sva *handle);
> u32 intel_svm_get_pasid(struct iommu_sva *handle);
> int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
> diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
> index 39d368a..ef6b753 100644
> --- a/include/linux/intel-svm.h
> +++ b/include/linux/intel-svm.h
> @@ -30,30 +30,17 @@ struct svm_dev_ops {
> * if there is no other way to do so. It should be used sparingly, if at all.
> */
> #define SVM_FLAG_PRIVATE_PASID (1<<0)
> -
> -/*
> - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
> - * for access to kernel addresses. No IOTLB flushes are automatically done
> - * for kernel mappings; it is valid only for access to the kernel's static
> - * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
> - * A future API addition may permit the use of such ranges, by means of an
> - * explicit IOTLB flush call (akin to the DMA API's unmap method).
> - *
> - * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
> - * do such IOTLB flushes automatically.
> - */
> -#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
> /*
> * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
> * processes. Compared to the host bind, the primary differences are:
> * 1. mm life cycle management
> * 2. fault reporting
> */
> -#define SVM_FLAG_GUEST_MODE (1<<2)
> +#define SVM_FLAG_GUEST_MODE (1<<1)
> /*
> * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own PASID space,
> * which requires guest and host PASID translation at both directions.
> */
> -#define SVM_FLAG_GUEST_PASID (1<<3)
> +#define SVM_FLAG_GUEST_PASID (1<<2)
>
> #endif /* __INTEL_SVM_H__ */
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 5e7fe51..a3fbaa2 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -166,6 +166,19 @@ enum iommu_dev_features {
>
> #ifdef CONFIG_IOMMU_API
>
> +/*
> + * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be used only
> + * for access to kernel addresses. No IOTLB flushes are automatically done
> + * for kernel mappings; it is valid only for access to the kernel's static
> + * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
> + * A future API addition may permit the use of such ranges, by means of an
> + * explicit IOTLB flush call (akin to the DMA API's unmap method).
> + *
> + * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
> + * do such IOTLB flushes automatically.
> + */
> +#define IOMMU_SVA_BIND_SUPERVISOR BIT(0)
> +
> /**
> * struct iommu_iotlb_gather - Range information for a pending IOTLB flush
> *
> @@ -287,7 +300,7 @@ struct iommu_ops {
> int (*aux_get_pasid)(struct iommu_domain *domain, struct device *dev);
>
> struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void (*sva_unbind)(struct iommu_sva *handle);
> u32 (*sva_get_pasid)(struct iommu_sva *handle);
>
> @@ -640,7 +653,7 @@ int iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev);
>
> struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> struct mm_struct *mm,
> - void *drvdata);
> + unsigned int flags);
> void iommu_sva_unbind_device(struct iommu_sva *handle);
> u32 iommu_sva_get_pasid(struct iommu_sva *handle);
>
> @@ -1015,7 +1028,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain, struct device *dev)
> }
>
> static inline struct iommu_sva *
> -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
> +iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, unsigned int flags)
> {
> return NULL;
> }
>

Best regards,
baolu

2021-04-09 12:46:33

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi,

On 2021/4/9 1:08, Jacob Pan wrote:
> /**
> * iommu_sva_alloc_pasid - Allocate a PASID for the mm
> - * @mm: the mm
> * @min: minimum PASID value (inclusive)
> * @max: maximum PASID value (inclusive)
> *
> - * Try to allocate a PASID for this mm, or take a reference to the existing one
> - * provided it fits within the [@min, @max] range. On success the PASID is
> - * available in mm->pasid, and must be released with iommu_sva_free_pasid().
> + * Try to allocate a PASID for the current mm, or take a reference to the
> + * existing one provided it fits within the [@min, @max] range. On success
> + * the PASID is available in the current mm->pasid, and must be released with
> + * iommu_sva_free_pasid().
> * @min must be greater than 0, because 0 indicates an unused mm->pasid.
> *
> * Returns 0 on success and < 0 on error.
> */
> -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
> +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max)
> {
> int ret = 0;
> ioasid_t pasid;
> + struct mm_struct *mm;
>
> if (min == INVALID_IOASID || max == INVALID_IOASID ||
> min == 0 || max < min)
> return -EINVAL;
>
> mutex_lock(&iommu_sva_lock);
> + mm = get_task_mm(current);

How could we allocate a supervisor PASID through iommu_sva_alloc_pasid()
if we always use current->mm here?

> + if (!mm) {
> + ret = -EINVAL;
> + goto out_unlock;
> + }
> if (mm->pasid) {
> if (mm->pasid >= min && mm->pasid <= max)
> ioasid_get(mm->pasid);
> @@ -45,22 +51,32 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
> else
> mm->pasid = pasid;
> }
> + mmput(mm);
> +out_unlock:
> mutex_unlock(&iommu_sva_lock);
> return ret;
> }
> EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);

Best regards,
baolu

2021-04-09 18:03:14

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi Jean-Philippe,

On Fri, 9 Apr 2021 12:11:47 +0200, Jean-Philippe Brucker
<[email protected]> wrote:

> On Thu, Apr 08, 2021 at 10:08:56AM -0700, Jacob Pan wrote:
> > diff --git a/drivers/iommu/iommu-sva-lib.c
> > b/drivers/iommu/iommu-sva-lib.c index bd41405..bd99f6b 100644
> > --- a/drivers/iommu/iommu-sva-lib.c
> > +++ b/drivers/iommu/iommu-sva-lib.c
> > @@ -12,27 +12,33 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
> >
> > /**
> > * iommu_sva_alloc_pasid - Allocate a PASID for the mm
> > - * @mm: the mm
> > * @min: minimum PASID value (inclusive)
> > * @max: maximum PASID value (inclusive)
> > *
> > - * Try to allocate a PASID for this mm, or take a reference to the
> > existing one
> > - * provided it fits within the [@min, @max] range. On success the
> > PASID is
> > - * available in mm->pasid, and must be released with
> > iommu_sva_free_pasid().
> > + * Try to allocate a PASID for the current mm, or take a reference to
> > the
> > + * existing one provided it fits within the [@min, @max] range. On
> > success
> > + * the PASID is available in the current mm->pasid, and must be
> > released with
> > + * iommu_sva_free_pasid().
> > * @min must be greater than 0, because 0 indicates an unused
> > mm->pasid. *
> > * Returns 0 on success and < 0 on error.
> > */
> > -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t
> > max) +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max)
> > {
> > int ret = 0;
> > ioasid_t pasid;
> > + struct mm_struct *mm;
> >
> > if (min == INVALID_IOASID || max == INVALID_IOASID ||
> > min == 0 || max < min)
> > return -EINVAL;
> >
> > mutex_lock(&iommu_sva_lock);
> > + mm = get_task_mm(current);
> > + if (!mm) {
> > + ret = -EINVAL;
> > + goto out_unlock;
> > + }
>
> I still think it would be more elegant to keep the choice of context in
> iommu_sva_bind_device() and pass it down to leaf functions such as
> iommu_sva_alloc_pasid(). The patch is trying to solve two separate

I agree if iommu_sva_alloc_pasid() is a leaf function, but it is a public
function, e.g. called by smmu code:
/* Allocate a PASID for this mm if necessary */
ret = iommu_sva_alloc_pasid(1, (1U << master->ssid_bits) - 1);
If we give mm as parameter, it will give callers the illusion that this
mm doesn't have to be current->mm.

Should we make it into a leaf function by splitting iommu_sva_alloc_pasid()
into two parts?
1. iommu_sva_assign_pasid() //a new leaf helper function does mm->pasid
assignment
2. ioasid_alloc()

in iommu_sva_bind_device(), we do:
1. handle = driver ops->sva_bind(dev, mm, flags);
2. pasid = sva_get_pasid(handle);
3. iommu_sva_assign_pasid(mm, pasid)

In vendor driver sva_bind(), it just use ioasid_alloc directly with custom
range. e.g. arm-smmu-v3-sva.c
- ret = iommu_sva_alloc_pasid(1, (1U << master->ssid_bits) - 1);
+ ret = ioasid_alloc(&iommu_sva_pasid, 1, (1U << master->ssid_bits);

> problems:
>
> * We don't have a use-case for binding the mm of a remote process (and
> it's supposedly difficult for device drivers to do it securely). So OK,
> we remove the mm argument from iommu_sva_bind_device() and use the
> current mm. But the IOMMU driver isn't going to do get_task_mm(current)
> every time it needs the mm being bound, it will take it from
> iommu_sva_bind_device(). Likewise iommu_sva_alloc_pasid() shouldn't need
> to bother with get_task_mm().
>
> * cgroup accounting for IOASIDs needs to be on the current task. Removing
> the mm parameter from iommu_sva_alloc_pasid() doesn't help with that.
> Sure it indicates that iommu_sva_alloc_pasid() needs a specific task
> context but that's only for cgroup purpose, and I'd rather pass the
> cgroup down from iommu_sva_bind_device() anyway (but am fine with
> keeping it within ioasid_alloc() for now). Plus it's an internal helper,
> easy for us to check that the callers are doing the right thing.
>
With the above split, we really just have one allocation function:
ioasid_alloc(), so it can manage current cgroup accounting within. Would
this work?

> > if (mm->pasid) {
> > if (mm->pasid >= min && mm->pasid <= max)
> > ioasid_get(mm->pasid);
> > @@ -45,22 +51,32 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm,
> > ioasid_t min, ioasid_t max) else
> > mm->pasid = pasid;
> > }
> > + mmput(mm);
> > +out_unlock:
> > mutex_unlock(&iommu_sva_lock);
> > return ret;
> > }
> > EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
> >
> > /**
> > - * iommu_sva_free_pasid - Release the mm's PASID
> > + * iommu_sva_free_pasid - Release the current mm's PASID
> > * @mm: the mm
> > *
> > * Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
> > */
> > -void iommu_sva_free_pasid(struct mm_struct *mm)
> > +void iommu_sva_free_pasid(void)
> > {
> > + struct mm_struct *mm;
> > +
> > mutex_lock(&iommu_sva_lock);
> > + mm = get_task_mm(current);
> > + if (!mm)
> > + goto out_unlock;
> > +
>
> More importantly, could we at least dissociate free_pasid() from the
> current process? Otherwise drivers can't clean up from a workqueue (as
> amdkfd does) or from an rcu callback. Given that iommu_sva_unbind_device()
> takes the SVA handle owned by whomever did bind(), there shouldn't be any
> security issue. For the cgroup problem, ioasid.c could internally keep
> track of the cgroup used during allocation rather than assuming the
> context of ioasid_put() is the same as ioasid_get()
>
Good point, you are right cgroup uncharge does not have to be on the
current. I will keep the mm parameter here.

> > if (ioasid_put(mm->pasid))
> > mm->pasid = 0;
> > + mmput(mm);
> > +out_unlock:
> > mutex_unlock(&iommu_sva_lock);
> > }
> > EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
> > diff --git a/drivers/iommu/iommu-sva-lib.h
> > b/drivers/iommu/iommu-sva-lib.h index b40990a..278b8b4 100644
> > --- a/drivers/iommu/iommu-sva-lib.h
> > +++ b/drivers/iommu/iommu-sva-lib.h
> > @@ -8,8 +8,8 @@
> > #include <linux/ioasid.h>
> > #include <linux/mm_types.h>
> >
> > -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t
> > max); -void iommu_sva_free_pasid(struct mm_struct *mm);
> > +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max);
> > +void iommu_sva_free_pasid(void);
> > struct mm_struct *iommu_sva_find(ioasid_t pasid);
> >
> > #endif /* _IOMMU_SVA_LIB_H */
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index bf0a20f..25840e6 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -23,6 +23,7 @@
> > #include <linux/property.h>
> > #include <linux/fsl/mc.h>
> > #include <linux/module.h>
> > +#include <linux/sched/mm.h>
> > #include <trace/events/iommu.h>
> >
> > static struct kset *iommu_group_kset;
> > @@ -2959,9 +2960,8 @@ int iommu_aux_get_pasid(struct iommu_domain
> > *domain, struct device *dev) EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> >
> > /**
> > - * iommu_sva_bind_device() - Bind a process address space to a device
> > + * iommu_sva_bind_device() - Bind the current process address space to
> > a device
> > * @dev: the device
> > - * @mm: the mm to bind, caller must hold a reference to it
> > * @flags: options for the bind operation
> > *
> > * Create a bond between device and address space, allowing the device
> > to access
>
> There is another reference to @mm to remove in the function description
>
will do

> > @@ -2975,9 +2975,10 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> > * On error, returns an ERR_PTR value.
> > */
> > struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm,
> > unsigned int flags) +iommu_sva_bind_device(struct device *dev, unsigned
> > int flags) {
> > struct iommu_group *group;
> > + struct mm_struct *mm = NULL;
> > struct iommu_sva *handle = ERR_PTR(-EINVAL);
> > const struct iommu_ops *ops = dev->bus->iommu_ops;
> >
> > @@ -2989,8 +2990,11 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, unsigned int fla return ERR_PTR(-ENODEV);
> >
> > /* Supervisor SVA does not need the current mm */
> > - if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> > - return ERR_PTR(-EINVAL);
> > + if (!(flags & IOMMU_SVA_BIND_SUPERVISOR)) {
> > + mm = get_task_mm(current);
> > + if (!mm)
> > + return ERR_PTR(-EINVAL);
> > + }
> > /* Ensure device count and domain don't change while we're
> > binding */ mutex_lock(&group->mutex);
> >
> > @@ -3004,6 +3008,8 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, unsigned int fla goto out_unlock;
> >
> > handle = ops->sva_bind(dev, mm, flags);
> > + if (mm)
> > + mmput(mm);
> > out_unlock:
> > mutex_unlock(&group->mutex);
> > iommu_group_put(group);
> > diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> > index 27e0e04..da4401a 100644
> > --- a/drivers/misc/uacce/uacce.c
> > +++ b/drivers/misc/uacce/uacce.c
> > @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device
> > *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA))
> > return 0;
> >
> > - handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> > + handle = iommu_sva_bind_device(uacce->parent, 0);
> > if (IS_ERR(handle))
> > return PTR_ERR(handle);
> >
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index a3fbaa2..cf752f3 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -231,8 +231,8 @@ struct iommu_iotlb_gather {
> > * @dev_feat_enabled: check enabled feature
> > * @aux_attach/detach_dev: aux-domain specific attach/detach entries.
> > * @aux_get_pasid: get the pasid given an aux-domain
> > - * @sva_bind: Bind process address space to device
> > - * @sva_unbind: Unbind process address space from device
> > + * @sva_bind: Bind the current process address space to device
> > + * @sva_unbind: Unbind the current process address space from device
>
> These don't need changing since we're still passing the mm down to the
> drivers
>
Right, I struggled between two options :)

> Thanks,
> Jean
>
> > * @sva_get_pasid: Get PASID associated to a SVA handle
> > * @page_response: handle page request response
> > * @cache_invalidate: invalidate translation caches
> > @@ -652,7 +652,6 @@ void iommu_aux_detach_device(struct iommu_domain
> > *domain, struct device *dev); int iommu_aux_get_pasid(struct
> > iommu_domain *domain, struct device *dev);
> > struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> > - struct mm_struct *mm,
> > unsigned int flags);
> > void iommu_sva_unbind_device(struct iommu_sva *handle);
> > u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> > @@ -1028,7 +1027,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain,
> > struct device *dev) }
> >
> > static inline struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm,
> > unsigned int flags) +iommu_sva_bind_device(struct device *dev, unsigned
> > int flags) {
> > return NULL;
> > }
> > --
> > 2.7.4
> >


Thanks,

Jacob

2021-04-09 18:06:49

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi Lu,

On Fri, 9 Apr 2021 20:45:22 +0800, Lu Baolu <[email protected]>
wrote:

> > -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t
> > max) +int iommu_sva_alloc_pasid(ioasid_t min, ioasid_t max)
> > {
> > int ret = 0;
> > ioasid_t pasid;
> > + struct mm_struct *mm;
> >
> > if (min == INVALID_IOASID || max == INVALID_IOASID ||
> > min == 0 || max < min)
> > return -EINVAL;
> >
> > mutex_lock(&iommu_sva_lock);
> > + mm = get_task_mm(current);
>
> How could we allocate a supervisor PASID through iommu_sva_alloc_pasid()
> if we always use current->mm here?
I don't think you can. But I guess the current callers of this function do
not need supervisor PASID.
In reply to Jean, I suggest we split this function into mm->pasid
assignment and keep using ioasid_alloc() directly, then supervisor PASID is
caller's bind choice.

Thanks,

Jacob

2021-04-09 21:56:25

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

Hi Jean-Philippe,

On Fri, 9 Apr 2021 12:22:21 +0200, Jean-Philippe Brucker
<[email protected]> wrote:

> On Thu, Apr 08, 2021 at 10:08:55AM -0700, Jacob Pan wrote:
> > The void* drvdata parameter isn't really used in iommu_sva_bind_device()
> > API,
>
> Right, it used to be a cookie passed to the device driver in the exit_mm()
> callback, but that went away with edcc40d2ab5f ("iommu: Remove
> iommu_sva_ops::mm_exit()")
>
> > the current IDXD code "borrows" the drvdata for a VT-d private flag
> > for supervisor SVA usage.
> >
> > Supervisor/Privileged mode request is a generic feature. It should be
> > promoted from the VT-d vendor driver to the generic code.
> >
> > This patch replaces void* drvdata with a unsigned int flags parameter
> > and adjusts callers accordingly.
>
> Thanks for cleaning this up. Making flags unsigned long seems more common
> (I suggested int without thinking). But it doesn't matter much, we won't
> get to 32 flags.
>
I was just thinking unsigned int is 32 bit for both 32 and 64 bit machine.

> >
> > Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
> > Suggested-by: Jean-Philippe Brucker <[email protected]>
> > Signed-off-by: Jacob Pan <[email protected]>
> > ---
> > drivers/dma/idxd/cdev.c | 2 +-
> > drivers/dma/idxd/init.c | 6 +++---
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++--
> > drivers/iommu/intel/Kconfig | 1 +
> > drivers/iommu/intel/svm.c | 18 ++++++------------
> > drivers/iommu/iommu.c | 9 ++++++---
> > drivers/misc/uacce/uacce.c | 2 +-
> > include/linux/intel-iommu.h | 2 +-
> > include/linux/intel-svm.h | 17 ++---------------
> > include/linux/iommu.h | 19
> > ++++++++++++++++--- 11 files changed, 40 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
> > index 0db9b82..21ec82b 100644
> > --- a/drivers/dma/idxd/cdev.c
> > +++ b/drivers/dma/idxd/cdev.c
> > @@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode,
> > struct file *filp) filp->private_data = ctx;
> >
> > if (device_pasid_enabled(idxd)) {
> > - sva = iommu_sva_bind_device(dev, current->mm, NULL);
> > + sva = iommu_sva_bind_device(dev, current->mm, 0);
> > if (IS_ERR(sva)) {
> > rc = PTR_ERR(sva);
> > dev_err(dev, "pasid allocation failed: %d\n",
> > rc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> > index 085a0c3..cdc85f1 100644
> > --- a/drivers/dma/idxd/init.c
> > +++ b/drivers/dma/idxd/init.c
> > @@ -300,13 +300,13 @@ static struct idxd_device *idxd_alloc(struct
> > pci_dev *pdev)
> > static int idxd_enable_system_pasid(struct idxd_device *idxd)
> > {
> > - int flags;
> > + unsigned int flags;
> > unsigned int pasid;
> > struct iommu_sva *sva;
> >
> > - flags = SVM_FLAG_SUPERVISOR_MODE;
> > + flags = IOMMU_SVA_BIND_SUPERVISOR;
> >
> > - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
> > + sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
> > if (IS_ERR(sva)) {
> > dev_warn(&idxd->pdev->dev,
> > "iommu sva bind failed: %ld\n", PTR_ERR(sva));
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index
> > bb251ca..23e287e 100644 ---
> > a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -354,7 +354,7 @@
> > __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) }
> >
> > struct iommu_sva *
> > -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> > unsigned int flags)
>
> Could you add a check on flags:
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index
> bb251cab61f3..145ceb2fc5da 100644 ---
> a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -354,12 +354,15 @@
> __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) }
>
> struct iommu_sva *
> -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> unsigned int flags) {
> struct iommu_sva *handle;
> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>
> + if (flags)
> + return ERR_PTR(-EINVAL);
> +
yes, will do.

> if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> return ERR_PTR(-EINVAL);
>
>
>
> > {
> > struct iommu_sva *handle;
> > struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f985817..b971d4d
> > 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -711,7 +711,7 @@ bool arm_smmu_master_sva_enabled(struct
> > arm_smmu_master *master); int arm_smmu_master_enable_sva(struct
> > arm_smmu_master *master); int arm_smmu_master_disable_sva(struct
> > arm_smmu_master *master); struct iommu_sva *arm_smmu_sva_bind(struct
> > device *dev, struct mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void arm_smmu_sva_unbind(struct iommu_sva *handle);
> > u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
> > void arm_smmu_sva_notifier_synchronize(void);
> > @@ -742,7 +742,7 @@ static inline int
> > arm_smmu_master_disable_sva(struct arm_smmu_master *master) }
> >
> > static inline struct iommu_sva *
> > -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> > unsigned int flags) {
> > return ERR_PTR(-ENODEV);
> > }
> > diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
> > index 28a3d15..5415052 100644
> > --- a/drivers/iommu/intel/Kconfig
> > +++ b/drivers/iommu/intel/Kconfig
> > @@ -41,6 +41,7 @@ config INTEL_IOMMU_SVM
> > select PCI_PRI
> > select MMU_NOTIFIER
> > select IOASID
> > + select IOMMU_SVA_LIB
>
> Not needed here?
intel/svm.c is registered to sva-lib and use the IOMMU_SVA_BIND_SUPERVISOR
flag.

>
> > help
> > Shared Virtual Memory (SVM) provides a facility for devices
> > to access DMA resources through process address space by
> > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> > index 574a7e6..4b5f8b0 100644
> > --- a/drivers/iommu/intel/svm.c
> > +++ b/drivers/iommu/intel/svm.c
> > @@ -486,12 +486,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, } else
> > pasid_max = 1 << 20;
> >
> > - /* Bind supervisor PASID shuld have mm = NULL */
> > - if (flags & SVM_FLAG_SUPERVISOR_MODE) {
> > - if (!ecap_srs(iommu->ecap) || mm) {
> > - pr_err("Supervisor PASID with user provided
> > mm.\n");
> > - return -EINVAL;
> > - }
> > + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) &&
> > !ecap_srs(iommu->ecap)) {
> > + pr_err("Supervisor PASID not supported.\n");
> > + return -EINVAL;
> > }
> >
> > if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
> > @@ -593,7 +590,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, ret = intel_pasid_setup_first_level(iommu, dev,
> > mm ? mm->pgd : init_mm.pgd,
> > svm->pasid, FLPT_DEFAULT_DID,
> > - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> > + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
> > (cpu_feature_enabled(X86_FEATURE_LA57)
> > ? PASID_FLAG_FL5LP : 0));
> > spin_unlock_irqrestore(&iommu->lock, iflags);
> > @@ -620,7 +617,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, ret = intel_pasid_setup_first_level(iommu, dev,
> > mm ? mm->pgd :
> > init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID,
> > - (mm ? 0 :
> > PASID_FLAG_SUPERVISOR_MODE) |
> > + (mm ? 0 :
> > IOMMU_SVA_BIND_SUPERVISOR) | (cpu_feature_enabled(X86_FEATURE_LA57) ?
> > PASID_FLAG_FL5LP : 0));
> > spin_unlock_irqrestore(&iommu->lock, iflags);
> > @@ -1059,11 +1056,10 @@ static irqreturn_t prq_event_thread(int irq,
> > void *d)
> > #define to_intel_svm_dev(handle) container_of(handle, struct
> > intel_svm_dev, sva) struct iommu_sva *
> > -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> > +intel_svm_bind(struct device *dev, struct mm_struct *mm, unsigned int
> > flags) {
> > struct iommu_sva *sva = ERR_PTR(-EINVAL);
> > struct intel_svm_dev *sdev = NULL;
> > - unsigned int flags = 0;
> > int ret;
> >
> > /*
> > @@ -1071,8 +1067,6 @@ intel_svm_bind(struct device *dev, struct
> > mm_struct *mm, void *drvdata)
> > * It will require shared SVM data structures, i.e. combine
> > io_mm
> > * and intel_svm etc.
> > */
> > - if (drvdata)
> > - flags = *(unsigned int *)drvdata;
> > mutex_lock(&pasid_mutex);
> > ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
> > if (ret)
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index d0b0a15..bf0a20f 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -2962,6 +2962,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> > * iommu_sva_bind_device() - Bind a process address space to a device
> > * @dev: the device
> > * @mm: the mm to bind, caller must hold a reference to it
> > + * @flags: options for the bind operation
>
> Could also specify valid flags "IOMMU_SVA_BIND_*"
>
Good point! will do.

> > *
> > * Create a bond between device and address space, allowing the device
> > to access
> > * the mm using the returned PASID. If a bond already exists between
> > @device and @@ -2974,7 +2975,7 @@
> > EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> > * On error, returns an ERR_PTR value.
> > */
> > struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct
> > *mm, unsigned int flags) {
> > struct iommu_group *group;
> > struct iommu_sva *handle = ERR_PTR(-EINVAL);
> > @@ -2987,6 +2988,9 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, void *drvdata) if (!group)
> > return ERR_PTR(-ENODEV);
> >
> > + /* Supervisor SVA does not need the current mm */
> > + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> > + return ERR_PTR(-EINVAL);
> > /* Ensure device count and domain don't change while we're
> > binding */ mutex_lock(&group->mutex);
> >
> > @@ -2999,8 +3003,7 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, void *drvdata) if (iommu_group_device_count(group) != 1)
> > goto out_unlock;
> >
> > - handle = ops->sva_bind(dev, mm, drvdata);
> > -
> > + handle = ops->sva_bind(dev, mm, flags);
> > out_unlock:
> > mutex_unlock(&group->mutex);
> > iommu_group_put(group);
> > diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> > index d07af4e..27e0e04 100644
> > --- a/drivers/misc/uacce/uacce.c
> > +++ b/drivers/misc/uacce/uacce.c
> > @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device
> > *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA))
> > return 0;
> >
> > - handle = iommu_sva_bind_device(uacce->parent, current->mm,
> > NULL);
> > + handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> > if (IS_ERR(handle))
> > return PTR_ERR(handle);
> >
> > diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> > index 1bc46b8..cdff752 100644
> > --- a/include/linux/intel-iommu.h
> > +++ b/include/linux/intel-iommu.h
> > @@ -757,7 +757,7 @@ int intel_svm_bind_gpasid(struct iommu_domain
> > *domain, struct device *dev, struct iommu_gpasid_bind_data *data);
> > int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
> > struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
> > *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void intel_svm_unbind(struct iommu_sva *handle);
> > u32 intel_svm_get_pasid(struct iommu_sva *handle);
> > int intel_svm_page_response(struct device *dev, struct
> > iommu_fault_event *evt, diff --git a/include/linux/intel-svm.h
> > b/include/linux/intel-svm.h index 39d368a..ef6b753 100644
> > --- a/include/linux/intel-svm.h
> > +++ b/include/linux/intel-svm.h
> > @@ -30,30 +30,17 @@ struct svm_dev_ops {
> > * if there is no other way to do so. It should be used sparingly, if
> > at all. */
> > #define SVM_FLAG_PRIVATE_PASID (1<<0)
> > -
> > -/*
> > - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be
> > used only
> > - * for access to kernel addresses. No IOTLB flushes are automatically
> > done
> > - * for kernel mappings; it is valid only for access to the kernel's
> > static
> > - * 1:1 mapping of physical memory — not to vmalloc or even module
> > mappings.
> > - * A future API addition may permit the use of such ranges, by means
> > of an
> > - * explicit IOTLB flush call (akin to the DMA API's unmap method).
> > - *
> > - * It is unlikely that we will ever hook into flush_tlb_kernel_range()
> > to
> > - * do such IOTLB flushes automatically.
> > - */
> > -#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
> > /*
> > * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
> > * processes. Compared to the host bind, the primary differences are:
> > * 1. mm life cycle management
> > * 2. fault reporting
> > */
> > -#define SVM_FLAG_GUEST_MODE (1<<2)
> > +#define SVM_FLAG_GUEST_MODE (1<<1)
> > /*
> > * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own
> > PASID space,
> > * which requires guest and host PASID translation at both directions.
> > */
> > -#define SVM_FLAG_GUEST_PASID (1<<3)
> > +#define SVM_FLAG_GUEST_PASID (1<<2)
> >
> > #endif /* __INTEL_SVM_H__ */
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index 5e7fe51..a3fbaa2 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -166,6 +166,19 @@ enum iommu_dev_features {
> >
> > #ifdef CONFIG_IOMMU_API
> >
> > +/*
> > + * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be
> > used only
> > + * for access to kernel addresses. No IOTLB flushes are automatically
> > done
> > + * for kernel mappings; it is valid only for access to the kernel's
> > static
> > + * 1:1 mapping of physical memory — not to vmalloc or even module
> > mappings.
> > + * A future API addition may permit the use of such ranges, by means
> > of an
> > + * explicit IOTLB flush call (akin to the DMA API's unmap method).
> > + *
> > + * It is unlikely that we will ever hook into flush_tlb_kernel_range()
> > to
> > + * do such IOTLB flushes automatically.
> > + */
> > +#define IOMMU_SVA_BIND_SUPERVISOR BIT(0)
> > +
>
> It needs to be defined before the #ifdef CONFIG_IOMMU_API, otherwise
> drivers using it won't build with !CONFIG_IOMMU_API
>
Good point, I was counting on driver that uses SVA must depend on
CONFIG_IOMMU_API. But SVA could be an option of the driver itself.

> Thanks,
> Jean
>
> > /**
> > * struct iommu_iotlb_gather - Range information for a pending IOTLB
> > flush *
> > @@ -287,7 +300,7 @@ struct iommu_ops {
> > int (*aux_get_pasid)(struct iommu_domain *domain, struct
> > device *dev);
> > struct iommu_sva *(*sva_bind)(struct device *dev, struct
> > mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void (*sva_unbind)(struct iommu_sva *handle);
> > u32 (*sva_get_pasid)(struct iommu_sva *handle);
> >
> > @@ -640,7 +653,7 @@ int iommu_aux_get_pasid(struct iommu_domain
> > *domain, struct device *dev);
> > struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> > struct mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void iommu_sva_unbind_device(struct iommu_sva *handle);
> > u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> >
> > @@ -1015,7 +1028,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain,
> > struct device *dev) }
> >
> > static inline struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct
> > *mm, unsigned int flags) {
> > return NULL;
> > }
> > --
> > 2.7.4
> >


Thanks,

Jacob

2021-04-13 15:23:57

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

Hi Jacob,

I love your patch! Yet something to improve:

[auto build test ERROR on vkoul-dmaengine/next]
[also build test ERROR on char-misc/char-misc-testing v5.12-rc7]
[cannot apply to iommu/next next-20210413]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Jacob-Pan/iommu-sva-Tighten-SVA-bind-API-with-explicit-flags/20210409-094521
base: https://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git next
config: x86_64-randconfig-a016-20200531 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/3b009446e2f451c401cff7a6d4766424acbcc890
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Jacob-Pan/iommu-sva-Tighten-SVA-bind-API-with-explicit-flags/20210409-094521
git checkout 3b009446e2f451c401cff7a6d4766424acbcc890
# save the attached .config to linux build tree
make W=1 ARCH=x86_64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

drivers/dma/idxd/init.c: In function 'idxd_enable_system_pasid':
>> drivers/dma/idxd/init.c:307:10: error: 'IOMMU_SVA_BIND_SUPERVISOR' undeclared (first use in this function)
307 | flags = IOMMU_SVA_BIND_SUPERVISOR;
| ^~~~~~~~~~~~~~~~~~~~~~~~~
drivers/dma/idxd/init.c:307:10: note: each undeclared identifier is reported only once for each function it appears in


vim +/IOMMU_SVA_BIND_SUPERVISOR +307 drivers/dma/idxd/init.c

300
301 static int idxd_enable_system_pasid(struct idxd_device *idxd)
302 {
303 unsigned int flags;
304 unsigned int pasid;
305 struct iommu_sva *sva;
306
> 307 flags = IOMMU_SVA_BIND_SUPERVISOR;
308
309 sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
310 if (IS_ERR(sva)) {
311 dev_warn(&idxd->pdev->dev,
312 "iommu sva bind failed: %ld\n", PTR_ERR(sva));
313 return PTR_ERR(sva);
314 }
315
316 pasid = iommu_sva_get_pasid(sva);
317 if (pasid == IOMMU_PASID_INVALID) {
318 iommu_sva_unbind_device(sva);
319 return -ENODEV;
320 }
321
322 idxd->sva = sva;
323 idxd->pasid = pasid;
324 dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
325 return 0;
326 }
327

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (2.68 kB)
.config.gz (36.65 kB)
Download all attachments

2021-04-14 07:15:24

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

Hi Baolu,
Thanks for the view.

On Fri, 9 Apr 2021 20:24:22 +0800, Lu Baolu <[email protected]>
wrote:

> Hi Jacob,
>
> On 2021/4/9 1:08, Jacob Pan wrote:
> > The void* drvdata parameter isn't really used in iommu_sva_bind_device()
> > API, the current IDXD code "borrows" the drvdata for a VT-d private flag
> > for supervisor SVA usage.
> >
> > Supervisor/Privileged mode request is a generic feature. It should be
> > promoted from the VT-d vendor driver to the generic code.
> >
> > This patch replaces void* drvdata with a unsigned int flags parameter
> > and adjusts callers accordingly.
> >
> > Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/
> > Suggested-by: Jean-Philippe Brucker <[email protected]>
> > Signed-off-by: Jacob Pan <[email protected]>
> > ---
> > drivers/dma/idxd/cdev.c | 2 +-
> > drivers/dma/idxd/init.c | 6 +++---
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
> > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 ++--
> > drivers/iommu/intel/Kconfig | 1 +
> > drivers/iommu/intel/svm.c | 18
> > ++++++------------ drivers/iommu/iommu.c | 9
> > ++++++--- drivers/misc/uacce/uacce.c | 2 +-
> > include/linux/intel-iommu.h | 2 +-
> > include/linux/intel-svm.h | 17 ++---------------
> > include/linux/iommu.h | 19
> > ++++++++++++++++--- 11 files changed, 40 insertions(+), 42 deletions(-)
> >
> > diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
> > index 0db9b82..21ec82b 100644
> > --- a/drivers/dma/idxd/cdev.c
> > +++ b/drivers/dma/idxd/cdev.c
> > @@ -103,7 +103,7 @@ static int idxd_cdev_open(struct inode *inode,
> > struct file *filp) filp->private_data = ctx;
> >
> > if (device_pasid_enabled(idxd)) {
> > - sva = iommu_sva_bind_device(dev, current->mm, NULL);
> > + sva = iommu_sva_bind_device(dev, current->mm, 0);
> > if (IS_ERR(sva)) {
> > rc = PTR_ERR(sva);
> > dev_err(dev, "pasid allocation failed: %d\n",
> > rc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
> > index 085a0c3..cdc85f1 100644
> > --- a/drivers/dma/idxd/init.c
> > +++ b/drivers/dma/idxd/init.c
> > @@ -300,13 +300,13 @@ static struct idxd_device *idxd_alloc(struct
> > pci_dev *pdev)
> > static int idxd_enable_system_pasid(struct idxd_device *idxd)
> > {
> > - int flags;
> > + unsigned int flags;
> > unsigned int pasid;
> > struct iommu_sva *sva;
> >
> > - flags = SVM_FLAG_SUPERVISOR_MODE;
> > + flags = IOMMU_SVA_BIND_SUPERVISOR;
>
> With SVM_FLAG_SUPERVISOR_MODE removed, I guess
>
> #include <linux/intel-svm.h>
>
> in this file could be removed as well.
yes, good point.

>
> >
> > - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
> > + sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
> > if (IS_ERR(sva)) {
> > dev_warn(&idxd->pdev->dev,
> > "iommu sva bind failed: %ld\n",
> > PTR_ERR(sva)); diff --git
> > a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index
> > bb251ca..23e287e 100644 ---
> > a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -354,7 +354,7 @@
> > __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) }
> > struct iommu_sva *
> > -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> > unsigned int flags) {
> > struct iommu_sva *handle;
> > struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f985817..b971d4d
> > 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> > @@ -711,7 +711,7 @@ bool arm_smmu_master_sva_enabled(struct
> > arm_smmu_master *master); int arm_smmu_master_enable_sva(struct
> > arm_smmu_master *master); int arm_smmu_master_disable_sva(struct
> > arm_smmu_master *master); struct iommu_sva *arm_smmu_sva_bind(struct
> > device *dev, struct mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void arm_smmu_sva_unbind(struct iommu_sva *handle);
> > u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
> > void arm_smmu_sva_notifier_synchronize(void);
> > @@ -742,7 +742,7 @@ static inline int
> > arm_smmu_master_disable_sva(struct arm_smmu_master *master) }
> >
> > static inline struct iommu_sva *
> > -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
> > unsigned int flags) {
> > return ERR_PTR(-ENODEV);
> > }
> > diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
> > index 28a3d15..5415052 100644
> > --- a/drivers/iommu/intel/Kconfig
> > +++ b/drivers/iommu/intel/Kconfig
> > @@ -41,6 +41,7 @@ config INTEL_IOMMU_SVM
> > select PCI_PRI
> > select MMU_NOTIFIER
> > select IOASID
> > + select IOMMU_SVA_LIB
>
> Why?
Right, not needed. VT-d is not using iommu_sva_alloc/free_pasid() yet. I was
under the wrong impression that iommu_sva_bind_device is part of the
sva-lib.

>
> > help
> > Shared Virtual Memory (SVM) provides a facility for devices
> > to access DMA resources through process address space by
> > diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
> > index 574a7e6..4b5f8b0 100644
> > --- a/drivers/iommu/intel/svm.c
> > +++ b/drivers/iommu/intel/svm.c
> > @@ -486,12 +486,9 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, } else
> > pasid_max = 1 << 20;
> >
> > - /* Bind supervisor PASID shuld have mm = NULL */
> > - if (flags & SVM_FLAG_SUPERVISOR_MODE) {
> > - if (!ecap_srs(iommu->ecap) || mm) {
> > - pr_err("Supervisor PASID with user provided
> > mm.\n");
> > - return -EINVAL;
> > - }
> > + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) &&
> > !ecap_srs(iommu->ecap)) {
> > + pr_err("Supervisor PASID not supported.\n");
> > + return -EINVAL;
> > }
> >
> > if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
> > @@ -593,7 +590,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, ret = intel_pasid_setup_first_level(iommu, dev,
> > mm ? mm->pgd : init_mm.pgd,
> > svm->pasid, FLPT_DEFAULT_DID,
> > - (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
> > + (mm ? 0 : IOMMU_SVA_BIND_SUPERVISOR) |
> >
>
> NO. PASID_FLAG_SUPERVISOR_MODE is an internal flag to tell the helper
> whether the SRE bit should be set in the PASID entry.
>
OK, will keep them separate.

> > (cpu_feature_enabled(X86_FEATURE_LA57)
> > ? PASID_FLAG_FL5LP : 0));
> > spin_unlock_irqrestore(&iommu->lock, iflags);
> > @@ -620,7 +617,7 @@ intel_svm_bind_mm(struct device *dev, unsigned int
> > flags, ret = intel_pasid_setup_first_level(iommu, dev,
> > mm ? mm->pgd :
> > init_mm.pgd, svm->pasid, FLPT_DEFAULT_DID,
> > - (mm ? 0 :
> > PASID_FLAG_SUPERVISOR_MODE) |
> > + (mm ? 0 :
> > IOMMU_SVA_BIND_SUPERVISOR) |
>
> The same as above.
>
got it.

> > (cpu_feature_enabled(X86_FEATURE_LA57)
> > ? PASID_FLAG_FL5LP : 0));
> > spin_unlock_irqrestore(&iommu->lock, iflags);
> > @@ -1059,11 +1056,10 @@ static irqreturn_t prq_event_thread(int irq,
> > void *d)
> > #define to_intel_svm_dev(handle) container_of(handle, struct
> > intel_svm_dev, sva) struct iommu_sva *
> > -intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
> > +intel_svm_bind(struct device *dev, struct mm_struct *mm, unsigned int
> > flags) {
> > struct iommu_sva *sva = ERR_PTR(-EINVAL);
> > struct intel_svm_dev *sdev = NULL;
> > - unsigned int flags = 0;
> > int ret;
> >
> > /*
> > @@ -1071,8 +1067,6 @@ intel_svm_bind(struct device *dev, struct
> > mm_struct *mm, void *drvdata)
> > * It will require shared SVM data structures, i.e. combine
> > io_mm
> > * and intel_svm etc.
> > */
> > - if (drvdata)
> > - flags = *(unsigned int *)drvdata;
> > mutex_lock(&pasid_mutex);
> > ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
> > if (ret)
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index d0b0a15..bf0a20f 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -2962,6 +2962,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> > * iommu_sva_bind_device() - Bind a process address space to a device
> > * @dev: the device
> > * @mm: the mm to bind, caller must hold a reference to it
> > + * @flags: options for the bind operation
> > *
> > * Create a bond between device and address space, allowing the
> > device to access
> > * the mm using the returned PASID. If a bond already exists between
> > @device and @@ -2974,7 +2975,7 @@
> > EXPORT_SYMBOL_GPL(iommu_aux_get_pasid);
> > * On error, returns an ERR_PTR value.
> > */
> > struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct
> > *mm, unsigned int flags) {
> > struct iommu_group *group;
> > struct iommu_sva *handle = ERR_PTR(-EINVAL);
> > @@ -2987,6 +2988,9 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, void *drvdata) if (!group)
> > return ERR_PTR(-ENODEV);
> >
> > + /* Supervisor SVA does not need the current mm */
> > + if ((flags & IOMMU_SVA_BIND_SUPERVISOR) && mm)
> > + return ERR_PTR(-EINVAL);
> > /* Ensure device count and domain don't change while we're
> > binding */ mutex_lock(&group->mutex);
> >
> > @@ -2999,8 +3003,7 @@ iommu_sva_bind_device(struct device *dev, struct
> > mm_struct *mm, void *drvdata) if (iommu_group_device_count(group) != 1)
> > goto out_unlock;
> >
> > - handle = ops->sva_bind(dev, mm, drvdata);
> > -
> > + handle = ops->sva_bind(dev, mm, flags);
> > out_unlock:
> > mutex_unlock(&group->mutex);
> > iommu_group_put(group);
> > diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> > index d07af4e..27e0e04 100644
> > --- a/drivers/misc/uacce/uacce.c
> > +++ b/drivers/misc/uacce/uacce.c
> > @@ -99,7 +99,7 @@ static int uacce_bind_queue(struct uacce_device
> > *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA))
> > return 0;
> >
> > - handle = iommu_sva_bind_device(uacce->parent, current->mm,
> > NULL);
> > + handle = iommu_sva_bind_device(uacce->parent, current->mm, 0);
> > if (IS_ERR(handle))
> > return PTR_ERR(handle);
> >
> > diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> > index 1bc46b8..cdff752 100644
> > --- a/include/linux/intel-iommu.h
> > +++ b/include/linux/intel-iommu.h
> > @@ -757,7 +757,7 @@ int intel_svm_bind_gpasid(struct iommu_domain
> > *domain, struct device *dev, struct iommu_gpasid_bind_data *data);
> > int intel_svm_unbind_gpasid(struct device *dev, u32 pasid);
> > struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct
> > *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void intel_svm_unbind(struct iommu_sva *handle);
> > u32 intel_svm_get_pasid(struct iommu_sva *handle);
> > int intel_svm_page_response(struct device *dev, struct
> > iommu_fault_event *evt, diff --git a/include/linux/intel-svm.h
> > b/include/linux/intel-svm.h index 39d368a..ef6b753 100644
> > --- a/include/linux/intel-svm.h
> > +++ b/include/linux/intel-svm.h
> > @@ -30,30 +30,17 @@ struct svm_dev_ops {
> > * if there is no other way to do so. It should be used sparingly, if
> > at all. */
> > #define SVM_FLAG_PRIVATE_PASID (1<<0)
> > -
> > -/*
> > - * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be
> > used only
> > - * for access to kernel addresses. No IOTLB flushes are automatically
> > done
> > - * for kernel mappings; it is valid only for access to the kernel's
> > static
> > - * 1:1 mapping of physical memory — not to vmalloc or even module
> > mappings.
> > - * A future API addition may permit the use of such ranges, by means
> > of an
> > - * explicit IOTLB flush call (akin to the DMA API's unmap method).
> > - *
> > - * It is unlikely that we will ever hook into flush_tlb_kernel_range()
> > to
> > - * do such IOTLB flushes automatically.
> > - */
> > -#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
> > /*
> > * The SVM_FLAG_GUEST_MODE flag is used when a PASID bind is for guest
> > * processes. Compared to the host bind, the primary differences are:
> > * 1. mm life cycle management
> > * 2. fault reporting
> > */
> > -#define SVM_FLAG_GUEST_MODE (1<<2)
> > +#define SVM_FLAG_GUEST_MODE (1<<1)
> > /*
> > * The SVM_FLAG_GUEST_PASID flag is used when a guest has its own
> > PASID space,
> > * which requires guest and host PASID translation at both directions.
> > */
> > -#define SVM_FLAG_GUEST_PASID (1<<3)
> > +#define SVM_FLAG_GUEST_PASID (1<<2)
> >
> > #endif /* __INTEL_SVM_H__ */
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index 5e7fe51..a3fbaa2 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -166,6 +166,19 @@ enum iommu_dev_features {
> >
> > #ifdef CONFIG_IOMMU_API
> >
> > +/*
> > + * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be
> > used only
> > + * for access to kernel addresses. No IOTLB flushes are automatically
> > done
> > + * for kernel mappings; it is valid only for access to the kernel's
> > static
> > + * 1:1 mapping of physical memory — not to vmalloc or even module
> > mappings.
> > + * A future API addition may permit the use of such ranges, by means
> > of an
> > + * explicit IOTLB flush call (akin to the DMA API's unmap method).
> > + *
> > + * It is unlikely that we will ever hook into flush_tlb_kernel_range()
> > to
> > + * do such IOTLB flushes automatically.
> > + */
> > +#define IOMMU_SVA_BIND_SUPERVISOR BIT(0)
> > +
> > /**
> > * struct iommu_iotlb_gather - Range information for a pending IOTLB
> > flush *
> > @@ -287,7 +300,7 @@ struct iommu_ops {
> > int (*aux_get_pasid)(struct iommu_domain *domain, struct
> > device *dev);
> > struct iommu_sva *(*sva_bind)(struct device *dev, struct
> > mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void (*sva_unbind)(struct iommu_sva *handle);
> > u32 (*sva_get_pasid)(struct iommu_sva *handle);
> >
> > @@ -640,7 +653,7 @@ int iommu_aux_get_pasid(struct iommu_domain
> > *domain, struct device *dev);
> > struct iommu_sva *iommu_sva_bind_device(struct device *dev,
> > struct mm_struct *mm,
> > - void *drvdata);
> > + unsigned int flags);
> > void iommu_sva_unbind_device(struct iommu_sva *handle);
> > u32 iommu_sva_get_pasid(struct iommu_sva *handle);
> >
> > @@ -1015,7 +1028,7 @@ iommu_aux_get_pasid(struct iommu_domain *domain,
> > struct device *dev) }
> >
> > static inline struct iommu_sva *
> > -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void
> > *drvdata) +iommu_sva_bind_device(struct device *dev, struct mm_struct
> > *mm, unsigned int flags) {
> > return NULL;
> > }
> >
>
> Best regards,
> baolu


Thanks,

Jacob

2021-04-14 09:28:20

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi Jean,

On Fri, 9 Apr 2021 11:03:05 -0700, Jacob Pan
<[email protected]> wrote:

> > problems:
> >
> > * We don't have a use-case for binding the mm of a remote process (and
> > it's supposedly difficult for device drivers to do it securely). So
> > OK, we remove the mm argument from iommu_sva_bind_device() and use the
> > current mm. But the IOMMU driver isn't going to do
> > get_task_mm(current) every time it needs the mm being bound, it will
> > take it from iommu_sva_bind_device(). Likewise iommu_sva_alloc_pasid()
> > shouldn't need to bother with get_task_mm().
> >
> > * cgroup accounting for IOASIDs needs to be on the current task.
> > Removing the mm parameter from iommu_sva_alloc_pasid() doesn't help
> > with that. Sure it indicates that iommu_sva_alloc_pasid() needs a
> > specific task context but that's only for cgroup purpose, and I'd
> > rather pass the cgroup down from iommu_sva_bind_device() anyway (but am
> > fine with keeping it within ioasid_alloc() for now). Plus it's an
> > internal helper, easy for us to check that the callers are doing the
> > right thing.
> With the above split, we really just have one allocation function:
> ioasid_alloc(), so it can manage current cgroup accounting within. Would
> this work?
After a few attempts, I don't think the split can work better. I will
restore the mm parameter and add a warning if mm != current->mm.

Thanks,

Jacob

2021-04-14 13:40:19

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] iommu/sva: Tighten SVA bind API with explicit flags

Hi Jacob,

I love your patch! Yet something to improve:

[auto build test ERROR on vkoul-dmaengine/next]
[also build test ERROR on char-misc/char-misc-testing v5.12-rc7]
[cannot apply to iommu/next next-20210413]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/Jacob-Pan/iommu-sva-Tighten-SVA-bind-API-with-explicit-flags/20210409-094521
base: https://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git next
config: x86_64-randconfig-a016-20210413 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 9829f5e6b1bca9b61efc629770d28bb9014dec45)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/3b009446e2f451c401cff7a6d4766424acbcc890
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Jacob-Pan/iommu-sva-Tighten-SVA-bind-API-with-explicit-flags/20210409-094521
git checkout 3b009446e2f451c401cff7a6d4766424acbcc890
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> drivers/dma/idxd/init.c:307:10: error: use of undeclared identifier 'IOMMU_SVA_BIND_SUPERVISOR'
flags = IOMMU_SVA_BIND_SUPERVISOR;
^
drivers/dma/idxd/init.c:422:30: warning: shift count >= width of type [-Wshift-count-overflow]
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
^~~~~~~~~~~~~~~~
include/linux/dma-mapping.h:76:54: note: expanded from macro 'DMA_BIT_MASK'
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
^ ~~~
drivers/dma/idxd/init.c:428:41: warning: shift count >= width of type [-Wshift-count-overflow]
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
^~~~~~~~~~~~~~~~
include/linux/dma-mapping.h:76:54: note: expanded from macro 'DMA_BIT_MASK'
#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
^ ~~~
2 warnings and 1 error generated.


vim +/IOMMU_SVA_BIND_SUPERVISOR +307 drivers/dma/idxd/init.c

300
301 static int idxd_enable_system_pasid(struct idxd_device *idxd)
302 {
303 unsigned int flags;
304 unsigned int pasid;
305 struct iommu_sva *sva;
306
> 307 flags = IOMMU_SVA_BIND_SUPERVISOR;
308
309 sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, flags);
310 if (IS_ERR(sva)) {
311 dev_warn(&idxd->pdev->dev,
312 "iommu sva bind failed: %ld\n", PTR_ERR(sva));
313 return PTR_ERR(sva);
314 }
315
316 pasid = iommu_sva_get_pasid(sva);
317 if (pasid == IOMMU_PASID_INVALID) {
318 iommu_sva_unbind_device(sva);
319 return -ENODEV;
320 }
321
322 idxd->sva = sva;
323 idxd->pasid = pasid;
324 dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
325 return 0;
326 }
327

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]


Attachments:
(No filename) (3.72 kB)
.config.gz (37.53 kB)
Download all attachments

2021-04-14 15:32:28

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

On Wed, Apr 14, 2021 at 02:22:09PM +0800, Lu Baolu wrote:

> I still worry about supervisor pasid allocation.
>
> If we use iommu_sva_alloc_pasid() to allocate a supervisor pasid, which
> mm should the pasid be set? I've ever thought about passing &init_mm to
> iommu_sva_alloc_pasid(). But if you add "mm != current->mm", this seems
> not to work. Or do you prefer a separated interface for supervisor pasid
> allocation/free?

Without a mm_struct it is not SVA, so don't use SVA APIs for whatever
a 'supervisor pasid' is

Jason

2021-04-14 19:21:09

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi Jacob,

On 4/14/21 8:09 AM, Jacob Pan wrote:
> Hi Jean,
>
> On Fri, 9 Apr 2021 11:03:05 -0700, Jacob Pan
> <[email protected]> wrote:
>
>>> problems:
>>>
>>> * We don't have a use-case for binding the mm of a remote process (and
>>> it's supposedly difficult for device drivers to do it securely). So
>>> OK, we remove the mm argument from iommu_sva_bind_device() and use the
>>> current mm. But the IOMMU driver isn't going to do
>>> get_task_mm(current) every time it needs the mm being bound, it will
>>> take it from iommu_sva_bind_device(). Likewise iommu_sva_alloc_pasid()
>>> shouldn't need to bother with get_task_mm().
>>>
>>> * cgroup accounting for IOASIDs needs to be on the current task.
>>> Removing the mm parameter from iommu_sva_alloc_pasid() doesn't help
>>> with that. Sure it indicates that iommu_sva_alloc_pasid() needs a
>>> specific task context but that's only for cgroup purpose, and I'd
>>> rather pass the cgroup down from iommu_sva_bind_device() anyway (but am
>>> fine with keeping it within ioasid_alloc() for now). Plus it's an
>>> internal helper, easy for us to check that the callers are doing the
>>> right thing.
>> With the above split, we really just have one allocation function:
>> ioasid_alloc(), so it can manage current cgroup accounting within. Would
>> this work?
> After a few attempts, I don't think the split can work better. I will
> restore the mm parameter and add a warning if mm != current->mm.

I still worry about supervisor pasid allocation.

If we use iommu_sva_alloc_pasid() to allocate a supervisor pasid, which
mm should the pasid be set? I've ever thought about passing &init_mm to
iommu_sva_alloc_pasid(). But if you add "mm != current->mm", this seems
not to work. Or do you prefer a separated interface for supervisor pasid
allocation/free?

Best regards,
baolu

>
> Thanks,
>
> Jacob
>

2021-04-15 05:43:52

by Baolu Lu

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

Hi Jason,

On 4/14/21 7:26 PM, Jason Gunthorpe wrote:
> On Wed, Apr 14, 2021 at 02:22:09PM +0800, Lu Baolu wrote:
>
>> I still worry about supervisor pasid allocation.
>>
>> If we use iommu_sva_alloc_pasid() to allocate a supervisor pasid, which
>> mm should the pasid be set? I've ever thought about passing &init_mm to
>> iommu_sva_alloc_pasid(). But if you add "mm != current->mm", this seems
>> not to work. Or do you prefer a separated interface for supervisor pasid
>> allocation/free?
>
> Without a mm_struct it is not SVA, so don't use SVA APIs for whatever
> a 'supervisor pasid' is

The supervisor PASID has its mm_struct. The only difference is that the
device will set priv=1 in its DMA transactions with the PASID.

Best regards,
baolu

2021-04-15 12:00:38

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

On Thu, Apr 15, 2021 at 01:33:24PM +0800, Lu Baolu wrote:
> Hi Jason,
>
> On 4/14/21 7:26 PM, Jason Gunthorpe wrote:
> > On Wed, Apr 14, 2021 at 02:22:09PM +0800, Lu Baolu wrote:
> >
> > > I still worry about supervisor pasid allocation.
> > >
> > > If we use iommu_sva_alloc_pasid() to allocate a supervisor pasid, which
> > > mm should the pasid be set? I've ever thought about passing &init_mm to
> > > iommu_sva_alloc_pasid(). But if you add "mm != current->mm", this seems
> > > not to work. Or do you prefer a separated interface for supervisor pasid
> > > allocation/free?
> >
> > Without a mm_struct it is not SVA, so don't use SVA APIs for whatever
> > a 'supervisor pasid' is
>
> The supervisor PASID has its mm_struct. The only difference is that the
> device will set priv=1 in its DMA transactions with the PASID.

Soemthing like init_mm should not be used with 'SVA'

Jason