LinuxLists.cc - [PATCH 5/7] x86/mmu: Allocate/free PASID

2020-03-30 20:38:37

Subject: [PATCH 5/7] x86/mmu: Allocate/free PASID

PASID is shared by all threads in a process. So the logical place to keep
track of it is in the "mm". Add the field to the architecture specific
mm_context_t structure.

A PASID is allocated for an "mm" the first time any thread attaches
to an SVM capable device. Later device atatches (whether to the same
device or another SVM device) will re-use the same PASID.

The PASID is freed when the process exits (so no need to keep
reference counts on how many SVM devices are sharing the PASID).

Signed-off-by: Fenghua Yu <[email protected]>
Reviewed-by: Tony Luck <[email protected]>
---
arch/x86/include/asm/iommu.h | 2 +
arch/x86/include/asm/mmu.h | 4 ++
arch/x86/include/asm/mmu_context.h | 14 +++++
drivers/iommu/intel-svm.c | 82 +++++++++++++++++++++++++++---
4 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index bf1ed2ddc74b..ed41259fe7ac 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -26,4 +26,6 @@ arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr)
return -EINVAL;
}

+void __free_pasid(struct mm_struct *mm);
+
#endif /* _ASM_X86_IOMMU_H */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index bdeae9291e5c..137bf51f19e6 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -50,6 +50,10 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
+
+#ifdef CONFIG_INTEL_IOMMU_SVM
+ int pasid;
+#endif
} mm_context_t;

#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b538d9ddee9c..1c020c7955e6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/debugreg.h>
+#include <asm/iommu.h>

extern atomic64_t last_mm_ctx_id;

@@ -129,9 +130,22 @@ static inline int init_new_context(struct task_struct *tsk,
init_new_context_ldt(mm);
return 0;
}
+
+static inline void free_pasid(struct mm_struct *mm)
+{
+ if (!IS_ENABLED(CONFIG_INTEL_IOMMU_SVM))
+ return;
+
+ if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
+ return;
+
+ __free_pasid(mm);
+}
+
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
+ free_pasid(mm);
}

extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index d7f2a5358900..da718a49e91e 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -226,6 +226,45 @@ static LIST_HEAD(global_svm_list);
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else

+/*
+ * If this mm already has a PASID we can use it. Otherwise allocate a new one.
+ * Let the caller know if we did an allocation via 'new_pasid'.
+ */
+static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
+ int pasid_max, bool *new_pasid, int flags)
+{
+ int pasid;
+
+ /*
+ * Reuse the PASID if the mm already has a PASID and not a private
+ * PASID is requested.
+ */
+ if (mm && mm->context.pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ /*
+ * Once a PASID is allocated for this mm, the PASID
+ * stays with the mm until the mm is dropped. Reuse
+ * the PASID which has been already allocated for the
+ * mm instead of allocating a new one.
+ */
+ ioasid_set_data(mm->context.pasid, svm);
+ *new_pasid = false;
+
+ return mm->context.pasid;
+ }
+
+ /*
+ * Allocate a new pasid. Do not use PASID 0, reserved for RID to
+ * PASID.
+ */
+ pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1, svm);
+ if (pasid == INVALID_IOASID)
+ return -ENOSPC;
+
+ *new_pasid = true;
+
+ return pasid;
+}
+
int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
@@ -324,6 +363,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
init_rcu_head(&sdev->rcu);

if (!svm) {
+ bool new_pasid;
+
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
@@ -335,15 +376,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;

- /* Do not use PASID 0, reserved for RID to PASID */
- svm->pasid = ioasid_alloc(NULL, PASID_MIN,
- pasid_max - 1, svm);
- if (svm->pasid == INVALID_IOASID) {
+ svm->pasid = alloc_pasid(svm, mm, pasid_max, &new_pasid, flags);
+ if (svm->pasid < 0) {
kfree(svm);
kfree(sdev);
- ret = -ENOSPC;
goto out;
}
+
svm->notifier.ops = &intel_mmuops;
svm->mm = mm;
svm->flags = flags;
@@ -353,7 +392,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (mm) {
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
- ioasid_free(svm->pasid);
+ if (new_pasid)
+ ioasid_free(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
@@ -371,12 +411,21 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
if (ret) {
if (mm)
mmu_notifier_unregister(&svm->notifier, mm);
- ioasid_free(svm->pasid);
+ if (new_pasid)
+ ioasid_free(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
}

+ if (mm && new_pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
+ /*
+ * Track the new pasid in the mm. The pasid will be
+ * freed at process exit. Don't track requested
+ * private PASID in the mm.
+ */
+ mm->context.pasid = svm->pasid;
+ }
list_add_tail(&svm->list, &global_svm_list);
} else {
/*
@@ -447,7 +496,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
kfree_rcu(sdev, rcu);

if (list_empty(&svm->devs)) {
- ioasid_free(svm->pasid);
+ /* Clear data in the pasid. */
+ ioasid_set_data(pasid, NULL);
if (svm->mm)
mmu_notifier_unregister(&svm->notifier, svm->mm);
list_del(&svm->list);
@@ -693,3 +743,19 @@ static irqreturn_t prq_event_thread(int irq, void *d)

return IRQ_RETVAL(handled);
}
+
+/* On process exit free the PASID (if one was allocated). */
+void __free_pasid(struct mm_struct *mm)
+{
+ int pasid = mm->context.pasid;
+
+ if (!pasid)
+ return;
+
+ /*
+ * Since the pasid is not bound to any svm by now, there is no race
+ * here with binding/unbinding and no need to protect the free
+ * operation by pasid_mutex.
+ */
+ ioasid_free(pasid);
+}
--
2.19.1

2020-04-26 14:57:55

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

Fenghua Yu <[email protected]> writes:

> PASID is shared by all threads in a process. So the logical place to keep
> track of it is in the "mm". Add the field to the architecture specific
> mm_context_t structure.
>
> A PASID is allocated for an "mm" the first time any thread attaches
> to an SVM capable device. Later device atatches (whether to the same

atatches?

> device or another SVM device) will re-use the same PASID.
>
> The PASID is freed when the process exits (so no need to keep
> reference counts on how many SVM devices are sharing the PASID).

I'm not buying that. If there is an outstanding request with the PASID
of a process then tearing down the process address space and freeing the
PASID (which might be reused) is fundamentally broken.

> +void __free_pasid(struct mm_struct *mm);
> +
> #endif /* _ASM_X86_IOMMU_H */
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index bdeae9291e5c..137bf51f19e6 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -50,6 +50,10 @@ typedef struct {
> u16 pkey_allocation_map;
> s16 execute_only_pkey;
> #endif
> +
> +#ifdef CONFIG_INTEL_IOMMU_SVM
> + int pasid;

int? It's a value which gets programmed into the MSR along with the
valid bit (bit 31) set.

> extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> index d7f2a5358900..da718a49e91e 100644
> --- a/drivers/iommu/intel-svm.c
> +++ b/drivers/iommu/intel-svm.c
> @@ -226,6 +226,45 @@ static LIST_HEAD(global_svm_list);
> list_for_each_entry((sdev), &(svm)->devs, list) \
> if ((d) != (sdev)->dev) {} else
>
> +/*
> + * If this mm already has a PASID we can use it. Otherwise allocate a new one.
> + * Let the caller know if we did an allocation via 'new_pasid'.
> + */
> +static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
> + int pasid_max, bool *new_pasid, int flags)

Again, data types please. flags are generally unsigned and not plain
int. Also pasid_max is certainly not plain int either.

> +{
> + int pasid;
> +
> + /*
> + * Reuse the PASID if the mm already has a PASID and not a private
> + * PASID is requested.
> + */
> + if (mm && mm->context.pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
> + /*
> + * Once a PASID is allocated for this mm, the PASID
> + * stays with the mm until the mm is dropped. Reuse
> + * the PASID which has been already allocated for the
> + * mm instead of allocating a new one.
> + */
> + ioasid_set_data(mm->context.pasid, svm);

So if the PASID is reused several times for different SVMs then every
time ioasid_data->private is set to a different SVM. How is that
supposed to work?

> + *new_pasid = false;
> +
> + return mm->context.pasid;
> + }
> +
> + /*
> + * Allocate a new pasid. Do not use PASID 0, reserved for RID to
> + * PASID.
> + */
> + pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1, svm);

ioasid_alloc() uses ioasid_t which is

typedef unsigned int ioasid_t;

Can we please have consistent types and behaviour all over the place?

> + if (pasid == INVALID_IOASID)
> + return -ENOSPC;
> +
> + *new_pasid = true;
> +
> + return pasid;
> +}
> +
> int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
> {
> struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> @@ -324,6 +363,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
> init_rcu_head(&sdev->rcu);
>
> if (!svm) {
> + bool new_pasid;
> +
> svm = kzalloc(sizeof(*svm), GFP_KERNEL);
> if (!svm) {
> ret = -ENOMEM;
> @@ -335,15 +376,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
> if (pasid_max > intel_pasid_max_id)
> pasid_max = intel_pasid_max_id;
>
> - /* Do not use PASID 0, reserved for RID to PASID */
> - svm->pasid = ioasid_alloc(NULL, PASID_MIN,
> - pasid_max - 1, svm);
> - if (svm->pasid == INVALID_IOASID) {
> + svm->pasid = alloc_pasid(svm, mm, pasid_max, &new_pasid, flags);
> + if (svm->pasid < 0) {
> kfree(svm);
> kfree(sdev);
> - ret = -ENOSPC;

ret gets magically initialized to an error return value, right?

> goto out;
> }
> +
> svm->notifier.ops = &intel_mmuops;
> svm->mm = mm;
> svm->flags = flags;
> @@ -353,7 +392,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
> if (mm) {
> ret = mmu_notifier_register(&svm->notifier, mm);
> if (ret) {
> - ioasid_free(svm->pasid);
> + if (new_pasid)
> + ioasid_free(svm->pasid);
> kfree(svm);
> kfree(sdev);
> goto out;
> @@ -371,12 +411,21 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
> if (ret) {
> if (mm)
> mmu_notifier_unregister(&svm->notifier, mm);
> - ioasid_free(svm->pasid);
> + if (new_pasid)
> + ioasid_free(svm->pasid);
> kfree(svm);
> kfree(sdev);

So there are 3 places now freeing svm ad sdev and 2 of them
conditionally free svm->pasid. Can you please rewrite that to have a
proper error exit path instead of glueing that stuff into the existing
mess?

> goto out;
> }
>
> + if (mm && new_pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
> + /*
> + * Track the new pasid in the mm. The pasid will be
> + * freed at process exit. Don't track requested
> + * private PASID in the mm.

What happens to private PASIDs?

Thanks,

tglx

2020-04-27 22:21:35

by Fenghua Yu

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Sun, Apr 26, 2020 at 04:55:25PM +0200, Thomas Gleixner wrote:
> Fenghua Yu <[email protected]> writes:
> > +++ b/arch/x86/include/asm/mmu.h @@ -50,6 +50,10 @@ typedef struct {
> > u16 pkey_allocation_map; s16 execute_only_pkey;
> > #endif
> > + +#ifdef CONFIG_INTEL_IOMMU_SVM + int pasid;
>
> int? It's a value which gets programmed into the MSR along with the valid
> bit (bit 31) set.

The pasid is defined as "int" in struct intel_svm and in
intel_svm_bind_mm() and intel_svm_unbind_mm(). So the pasid defined in this
patch follows the same type defined in those places.

But as you pointed out below, ioasid_t is defined as "unsigned int".

>
> > extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
> > diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
> > index d7f2a5358900..da718a49e91e 100644 --- a/drivers/iommu/intel-svm.c
> > +++ b/drivers/iommu/intel-svm.c @@ -226,6 +226,45 @@ static
> > LIST_HEAD(global_svm_list);
> > list_for_each_entry((sdev), &(svm)->devs, list) \
> > if ((d) != (sdev)->dev) {} else
> >
> > +/* + * If this mm already has a PASID we can use it. Otherwise
> > allocate a new one. + * Let the caller know if we did an allocation via
> > 'new_pasid'. + */ +static int alloc_pasid(struct intel_svm *svm, struct
> > mm_struct *mm, + int pasid_max, bool *new_pasid, int flags)
>
> Again, data types please. flags are generally unsigned and not plain int.
> Also pasid_max is certainly not plain int either.

The caller defines pasid_max and flags as "int". This function just follows
the caller's definitions.

But I will change their definitions to "unsigned int" here.

>
> > + *new_pasid = false; + + return mm->context.pasid; + } + + /* + *
> > Allocate a new pasid. Do not use PASID 0, reserved for RID to + *
> > PASID. + */ + pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1,
> > svm);
>
> ioasid_alloc() uses ioasid_t which is
>
> typedef unsigned int ioasid_t;
>
> Can we please have consistent types and behaviour all over the place?

Should I just define "pasid", "pasid_max", "flags" as "unsigned int" for
the new functions/code?

Or should I also change their types to "unsigned int" in the original
svm code (struct intel_svm, ...bind_mm(), etc)? I'm afraid that will be
a lot of changes and should be in a separate preparation patch.

Thanks.

-Fenghua

2020-04-27 23:46:36

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

Fenghua Yu <[email protected]> writes:
> On Sun, Apr 26, 2020 at 04:55:25PM +0200, Thomas Gleixner wrote:
>> Fenghua Yu <[email protected]> writes:
>> > + +#ifdef CONFIG_INTEL_IOMMU_SVM + int pasid;
>>
>> int? It's a value which gets programmed into the MSR along with the valid
>> bit (bit 31) set.
>
> The pasid is defined as "int" in struct intel_svm and in
> intel_svm_bind_mm() and intel_svm_unbind_mm(). So the pasid defined in this
> patch follows the same type defined in those places.

Which are wrong to begin with.

>> ioasid_alloc() uses ioasid_t which is
>>
>> typedef unsigned int ioasid_t;
>>
>> Can we please have consistent types and behaviour all over the place?
>
> Should I just define "pasid", "pasid_max", "flags" as "unsigned int" for
> the new functions/code?
>
> Or should I also change their types to "unsigned int" in the original
> svm code (struct intel_svm, ...bind_mm(), etc)? I'm afraid that will be
> a lot of changes and should be in a separate preparation patch.

Yes, please. The existance of non-sensical code is not an excuse to
proliferate it.

Thanks,

tglx

2020-04-28 18:25:01

by Jacob Pan

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Sun, 26 Apr 2020 16:55:25 +0200
Thomas Gleixner <[email protected]> wrote:

> Fenghua Yu <[email protected]> writes:
>
> > PASID is shared by all threads in a process. So the logical place
> > to keep track of it is in the "mm". Add the field to the
> > architecture specific mm_context_t structure.
> >
> > A PASID is allocated for an "mm" the first time any thread attaches
> > to an SVM capable device. Later device atatches (whether to the
> > same
>
> atatches?
>
> > device or another SVM device) will re-use the same PASID.
> >
> > The PASID is freed when the process exits (so no need to keep
> > reference counts on how many SVM devices are sharing the PASID).
>
> I'm not buying that. If there is an outstanding request with the PASID
> of a process then tearing down the process address space and freeing
> the PASID (which might be reused) is fundamentally broken.
>
Device driver unbind PASID is tied to FD release. So when a process
exits, FD close causes driver to do the following:
1. stops DMA
2. unbind PASID (clears the PASID entry in IOMMU, flush all TLBs, drain
in flight page requests)

For bare metal SVM, if the last mmdrop always happens after FD release,
we can ensure no outstanding requests at the point of ioasid_free().
Perhaps this is a wrong assumption?

For guest SVM, there will be more users of a PASID. I am also
working on adding refcounting to ioasid. ioasid_free() will not release
the PASID back to the pool until all references are dropped.

> > +void __free_pasid(struct mm_struct *mm);
> > +
> > #endif /* _ASM_X86_IOMMU_H */
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index bdeae9291e5c..137bf51f19e6 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -50,6 +50,10 @@ typedef struct {
> > u16 pkey_allocation_map;
> > s16 execute_only_pkey;
> > #endif
> > +
> > +#ifdef CONFIG_INTEL_IOMMU_SVM
> > + int pasid;
>
> int? It's a value which gets programmed into the MSR along with the
> valid bit (bit 31) set.
>
> > extern void switch_mm(struct mm_struct *prev, struct mm_struct
> > *next, diff --git a/drivers/iommu/intel-svm.c
> > b/drivers/iommu/intel-svm.c index d7f2a5358900..da718a49e91e 100644
> > --- a/drivers/iommu/intel-svm.c
> > +++ b/drivers/iommu/intel-svm.c
> > @@ -226,6 +226,45 @@ static LIST_HEAD(global_svm_list);
> > list_for_each_entry((sdev), &(svm)->devs, list) \
> > if ((d) != (sdev)->dev) {} else
> >
> > +/*
> > + * If this mm already has a PASID we can use it. Otherwise
> > allocate a new one.
> > + * Let the caller know if we did an allocation via 'new_pasid'.
> > + */
> > +static int alloc_pasid(struct intel_svm *svm, struct mm_struct *mm,
> > + int pasid_max, bool *new_pasid, int
> > flags)
>
> Again, data types please. flags are generally unsigned and not plain
> int. Also pasid_max is certainly not plain int either.
>
> > +{
> > + int pasid;
> > +
> > + /*
> > + * Reuse the PASID if the mm already has a PASID and not a
> > private
> > + * PASID is requested.
> > + */
> > + if (mm && mm->context.pasid && !(flags &
> > SVM_FLAG_PRIVATE_PASID)) {
> > + /*
> > + * Once a PASID is allocated for this mm, the PASID
> > + * stays with the mm until the mm is dropped. Reuse
> > + * the PASID which has been already allocated for
> > the
> > + * mm instead of allocating a new one.
> > + */
> > + ioasid_set_data(mm->context.pasid, svm);
>
> So if the PASID is reused several times for different SVMs then every
> time ioasid_data->private is set to a different SVM. How is that
> supposed to work?
>
For the lifetime of the mm, there is only one PASID. svm_bind/unbind_mm
could happen many times with different private data: intel_svm.
Multiple devices can bind to the same PASID as well. But private data
don't change within the first bind and last unbind.

> > + *new_pasid = false;
> > +
> > + return mm->context.pasid;
> > + }
> > +
> > + /*
> > + * Allocate a new pasid. Do not use PASID 0, reserved for
> > RID to
> > + * PASID.
> > + */
> > + pasid = ioasid_alloc(NULL, PASID_MIN, pasid_max - 1,
> > svm);
>
> ioasid_alloc() uses ioasid_t which is
>
> typedef unsigned int ioasid_t;
>
> Can we please have consistent types and behaviour all over the place?
>
> > + if (pasid == INVALID_IOASID)
> > + return -ENOSPC;
> > +
> > + *new_pasid = true;
> > +
> > + return pasid;
> > +}
> > +
> > int intel_svm_bind_mm(struct device *dev, int *pasid, int flags,
> > struct svm_dev_ops *ops) {
> > struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
> > @@ -324,6 +363,8 @@ int intel_svm_bind_mm(struct device *dev, int
> > *pasid, int flags, struct svm_dev_ init_rcu_head(&sdev->rcu);
> >
> > if (!svm) {
> > + bool new_pasid;
> > +
> > svm = kzalloc(sizeof(*svm), GFP_KERNEL);
> > if (!svm) {
> > ret = -ENOMEM;
> > @@ -335,15 +376,13 @@ int intel_svm_bind_mm(struct device *dev, int
> > *pasid, int flags, struct svm_dev_ if (pasid_max >
> > intel_pasid_max_id) pasid_max = intel_pasid_max_id;
> >
> > - /* Do not use PASID 0, reserved for RID to PASID */
> > - svm->pasid = ioasid_alloc(NULL, PASID_MIN,
> > - pasid_max - 1, svm);
> > - if (svm->pasid == INVALID_IOASID) {
> > + svm->pasid = alloc_pasid(svm, mm, pasid_max,
> > &new_pasid, flags);
> > + if (svm->pasid < 0) {
> > kfree(svm);
> > kfree(sdev);
> > - ret = -ENOSPC;
>
> ret gets magically initialized to an error return value, right?
>
> > goto out;
> > }
> > +
> > svm->notifier.ops = &intel_mmuops;
> > svm->mm = mm;
> > svm->flags = flags;
> > @@ -353,7 +392,8 @@ int intel_svm_bind_mm(struct device *dev, int
> > *pasid, int flags, struct svm_dev_ if (mm) {
> > ret =
> > mmu_notifier_register(&svm->notifier, mm); if (ret) {
> > - ioasid_free(svm->pasid);
> > + if (new_pasid)
> > + ioasid_free(svm->pasid);
> > kfree(svm);
> > kfree(sdev);
> > goto out;
> > @@ -371,12 +411,21 @@ int intel_svm_bind_mm(struct device *dev, int
> > *pasid, int flags, struct svm_dev_ if (ret) {
> > if (mm)
> > mmu_notifier_unregister(&svm->notifier,
> > mm);
> > - ioasid_free(svm->pasid);
> > + if (new_pasid)
> > + ioasid_free(svm->pasid);
> > kfree(svm);
> > kfree(sdev);
>
> So there are 3 places now freeing svm ad sdev and 2 of them
> conditionally free svm->pasid. Can you please rewrite that to have a
> proper error exit path instead of glueing that stuff into the existing
> mess?
>
> > goto out;
> > }
> >
> > + if (mm && new_pasid && !(flags &
> > SVM_FLAG_PRIVATE_PASID)) {
> > + /*
> > + * Track the new pasid in the mm. The
> > pasid will be
> > + * freed at process exit. Don't track
> > requested
> > + * private PASID in the mm.
>
> What happens to private PASIDs?
>
Private PASID feature will be removed. We are in the process of
converting from intel_svm_bind_mm to generic sva_bind_device API.
https://lkml.org/lkml/2020/3/23/1022

Thanks,

Jacob

> Thanks,
>
> tglx

2020-04-28 18:58:09

by Thomas Gleixner

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

"Jacob Pan (Jun)" <[email protected]> writes:
> On Sun, 26 Apr 2020 16:55:25 +0200
> Thomas Gleixner <[email protected]> wrote:
>> Fenghua Yu <[email protected]> writes:
>> > The PASID is freed when the process exits (so no need to keep
>> > reference counts on how many SVM devices are sharing the PASID).
>>
>> I'm not buying that. If there is an outstanding request with the PASID
>> of a process then tearing down the process address space and freeing
>> the PASID (which might be reused) is fundamentally broken.
>>
> Device driver unbind PASID is tied to FD release. So when a process
> exits, FD close causes driver to do the following:
>
> 1. stops DMA
> 2. unbind PASID (clears the PASID entry in IOMMU, flush all TLBs, drain
> in flight page requests)

Fair enough. Explaining that somewhere might be helpful.

> For bare metal SVM, if the last mmdrop always happens after FD release,
> we can ensure no outstanding requests at the point of ioasid_free().
> Perhaps this is a wrong assumption?

If fd release cleans up then how should there be something in flight at
the final mmdrop?

> For guest SVM, there will be more users of a PASID. I am also
> working on adding refcounting to ioasid. ioasid_free() will not release
> the PASID back to the pool until all references are dropped.

What does more users mean?

>> > + if (mm && mm->context.pasid && !(flags &
>> > SVM_FLAG_PRIVATE_PASID)) {
>> > + /*
>> > + * Once a PASID is allocated for this mm, the PASID
>> > + * stays with the mm until the mm is dropped. Reuse
>> > + * the PASID which has been already allocated for
>> > the
>> > + * mm instead of allocating a new one.
>> > + */
>> > + ioasid_set_data(mm->context.pasid, svm);
>>
>> So if the PASID is reused several times for different SVMs then every
>> time ioasid_data->private is set to a different SVM. How is that
>> supposed to work?
>>
> For the lifetime of the mm, there is only one PASID. svm_bind/unbind_mm
> could happen many times with different private data: intel_svm.
> Multiple devices can bind to the same PASID as well. But private data
> don't change within the first bind and last unbind.

Ok. I read through that spaghetti of intel_svm_bind_mm() again and now I
start to get an idea how that is supposed to work. What a mess.

That function really wants to be restructured in a way so it is
understandable to mere mortals.

Thanks,

tglx

2020-04-28 19:09:44

by Tony Luck

[permalink] [raw]

Subject: RE: [PATCH 5/7] x86/mmu: Allocate/free PASID

> If fd release cleans up then how should there be something in flight at
> the final mmdrop?

ENQCMD from the user is only synchronous in that it lets the user know their
request has been added to a queue (or not). Execution of the request may happen
later (if the device is busy working on requests for other users). The request will
take some time to complete. Someone told me the theoretical worst case once,
which I've since forgotten, but it can be a long time.

So the driver needs to use flush/drain operations to make sure all the in-flight
work has completed before releasing/re-using the PASID.

-Tony

2020-04-28 20:42:52

by Jacob Pan

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Tue, 28 Apr 2020 20:54:01 +0200
Thomas Gleixner <[email protected]> wrote:

> "Jacob Pan (Jun)" <[email protected]> writes:
> > On Sun, 26 Apr 2020 16:55:25 +0200
> > Thomas Gleixner <[email protected]> wrote:
> >> Fenghua Yu <[email protected]> writes:
> >> > The PASID is freed when the process exits (so no need to keep
> >> > reference counts on how many SVM devices are sharing the
> >> > PASID).
> >>
> >> I'm not buying that. If there is an outstanding request with the
> >> PASID of a process then tearing down the process address space and
> >> freeing the PASID (which might be reused) is fundamentally broken.
> >>
> > Device driver unbind PASID is tied to FD release. So when a process
> > exits, FD close causes driver to do the following:
> >
> > 1. stops DMA
> > 2. unbind PASID (clears the PASID entry in IOMMU, flush all TLBs,
> > drain in flight page requests)
>
> Fair enough. Explaining that somewhere might be helpful.
>
Will do. I plan to document this in a kernel doc for IOASID/PASID
lifecycle management.

> > For bare metal SVM, if the last mmdrop always happens after FD
> > release, we can ensure no outstanding requests at the point of
> > ioasid_free(). Perhaps this is a wrong assumption?
>
> If fd release cleans up then how should there be something in flight
> at the final mmdrop?
>
> > For guest SVM, there will be more users of a PASID. I am also
> > working on adding refcounting to ioasid. ioasid_free() will not
> > release the PASID back to the pool until all references are
> > dropped.
>
> What does more users mean?
For VT-d, a PASID can be used by VFIO, IOMMU driver, KVM, and Virtual
Device Composition Module (VDCM*) at the same time.

*https://software.intel.com/en-us/download/intel-data-streaming-accelerator-preliminary-architecture-specification

There are HW context associated with the PASID in IOMMU, KVM, and VDCM.
So before the lifetime of the PASID is over, clean up must be done in
all of the above. PASID cannot be reclaimed until the last user drops
its reference. Our plan is to do notification and refcouting.

>
> >> > + if (mm && mm->context.pasid && !(flags &
> >> > SVM_FLAG_PRIVATE_PASID)) {
> >> > + /*
> >> > + * Once a PASID is allocated for this mm, the
> >> > PASID
> >> > + * stays with the mm until the mm is dropped.
> >> > Reuse
> >> > + * the PASID which has been already allocated
> >> > for the
> >> > + * mm instead of allocating a new one.
> >> > + */
> >> > + ioasid_set_data(mm->context.pasid, svm);
> >>
> >> So if the PASID is reused several times for different SVMs then
> >> every time ioasid_data->private is set to a different SVM. How is
> >> that supposed to work?
> >>
> > For the lifetime of the mm, there is only one PASID.
> > svm_bind/unbind_mm could happen many times with different private
> > data: intel_svm. Multiple devices can bind to the same PASID as
> > well. But private data don't change within the first bind and last
> > unbind.
>
> Ok. I read through that spaghetti of intel_svm_bind_mm() again and
> now I start to get an idea how that is supposed to work. What a mess.
>
> That function really wants to be restructured in a way so it is
> understandable to mere mortals.
>

Agreed. We are adding many new features and converging with generic
sva_bind_device. Things will get more clear after we have fewer moving
pieces.

> Thanks,
>
> tglx

2020-04-28 20:44:09

by Jacob Pan

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Tue, 28 Apr 2020 12:07:25 -0700
"Luck, Tony" <[email protected]> wrote:

> > If fd release cleans up then how should there be something in
> > flight at the final mmdrop?
>
> ENQCMD from the user is only synchronous in that it lets the user
> know their request has been added to a queue (or not). Execution of
> the request may happen later (if the device is busy working on
> requests for other users). The request will take some time to
> complete. Someone told me the theoretical worst case once, which I've
> since forgotten, but it can be a long time.
>
> So the driver needs to use flush/drain operations to make sure all
> the in-flight work has completed before releasing/re-using the PASID.
>
Are you suggesting we should let driver also hold a reference of the
PASID?

> -Tony

2020-04-28 21:00:13

by Fenghua Yu

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Sun, Apr 26, 2020 at 04:55:25PM +0200, Thomas Gleixner wrote:
> Fenghua Yu <[email protected]> writes:
> > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> > index bdeae9291e5c..137bf51f19e6 100644
> > --- a/arch/x86/include/asm/mmu.h
> > +++ b/arch/x86/include/asm/mmu.h
> > @@ -50,6 +50,10 @@ typedef struct {
> > u16 pkey_allocation_map;
> > s16 execute_only_pkey;
> > #endif
> > +
> > +#ifdef CONFIG_INTEL_IOMMU_SVM
> > + int pasid;
>
> int? It's a value which gets programmed into the MSR along with the
> valid bit (bit 31) set.

BTW, ARM is working on PASID as well. Christoph suggested that the PASID
should be defined in mm_struct instead of mm->context so that both ARM and X86
can access it:
https://lore.kernel.org/linux-iommu/[email protected]/T/#mb57110ffe1aaa24750eeea4f93b611f0d1913911

So I will define "pasid" to mm_struct in a separate patch in the next version.

Thanks.

-Fenghua

2020-04-28 21:03:43

by Tony Luck

[permalink] [raw]

Subject: RE: [PATCH 5/7] x86/mmu: Allocate/free PASID

>> So the driver needs to use flush/drain operations to make sure all
>> the in-flight work has completed before releasing/re-using the PASID.
>>
> Are you suggesting we should let driver also hold a reference of the
> PASID?

The sequence for bare metal is:

process is queuing requests to DSA
process exits (either deliberately, or crashes, or is killed)
kernel does exit processing
DSA driver is called as part of tear down of "mm"
issues drain/flush commands to ensure that all
queued operations on the PASID for this mm have
completed
PASID can be freed

There's a 1:1 map from "mm" to PASID ... so reference counting seems
like overkill. Once the kernel is in the "exit" path, we know that no more
work can be queued using this PASID.

-Tony

2020-04-28 22:15:13

by Jacob Pan

[permalink] [raw]

Subject: Re: [PATCH 5/7] x86/mmu: Allocate/free PASID

On Tue, 28 Apr 2020 13:59:43 -0700
"Luck, Tony" <[email protected]> wrote:

> >> So the driver needs to use flush/drain operations to make sure all
> >> the in-flight work has completed before releasing/re-using the
> >> PASID.
> > Are you suggesting we should let driver also hold a reference of the
> > PASID?
>
> The sequence for bare metal is:
>
> process is queuing requests to DSA
> process exits (either deliberately, or crashes, or is killed)
> kernel does exit processing
> DSA driver is called as part of tear down of "mm"
> issues drain/flush commands to ensure that all
> queued operations on the PASID for this mm have
> completed
> PASID can be freed
>
> There's a 1:1 map from "mm" to PASID ... so reference counting seems
> like overkill. Once the kernel is in the "exit" path, we know that no
> more work can be queued using this PASID.
>
There are two users of a PASID, mm and device driver(FD). If
either one is not done with the PASID, it cannot be reclaimed. As you
mentioned, it could take a long time for the driver to abort. If the
abort ends *after* mmdrop, we are in trouble.
If driver drops reference after abort/drain PASID is done, then we are
safe.

> -Tony

2020-04-28 22:37:18

by Tony Luck

[permalink] [raw]

Subject: RE: [PATCH 5/7] x86/mmu: Allocate/free PASID

> There are two users of a PASID, mm and device driver(FD). If
> either one is not done with the PASID, it cannot be reclaimed. As you
> mentioned, it could take a long time for the driver to abort. If the
> abort ends *after* mmdrop, we are in trouble.
> If driver drops reference after abort/drain PASID is done, then we are
> safe.

I don't think there should be an abort ... suppose the application requested
the DSA to copy some large block of important results from DDR4 to
persistent memory. Driver should wait for that copy operation to complete.

Note that for the operation to succeed, the kernel should still be processing
and fixing page faults for the "mm" (some parts of the data that the user wanted
to save to persistent memory may have been paged out).

The wait by the DSA diver needs to by synchronous ... the "mm" cannot be
freed until DSA says all the pending operations have completed.

Even without persistent memory, there are cases where you want the operations
to complete (mmap'd files, shared memory with other processes).

-Tony