2022-10-06 14:55:26

by Niklas Schnelle

[permalink] [raw]
Subject: [PATCH v5 3/6] iommu/s390: Fix potential s390_domain aperture shrinking

The s390 IOMMU driver currently sets the IOMMU domain's aperture to
match the device specific DMA address range of the device that is first
attached. This is not ideal. For one if the domain has no device
attached in the meantime the aperture could be shrunk allowing
translations outside the aperture to exist in the translation tables.
Also this is a bit of a misuse of the aperture which really should
describe what addresses can be translated and not some device specific
limitations.

Instead of misusing the aperture like this we can instead create
reserved ranges for the ranges inaccessible to the attached devices
allowing devices with overlapping ranges to still share an IOMMU domain.
This also significantly simplifies s390_iommu_attach_device() allowing
us to move the aperture check to the beginning of the function and
removing the need to hold the device list's lock to check the aperture.

As we then use the same aperture for all domains and it only depends on
the table properties we can already check zdev->start_dma/end_dma at
probe time and turn the check on attach into a WARN_ON().

Suggested-by: Jason Gunthorpe <[email protected]>
Signed-off-by: Niklas Schnelle <[email protected]>
---
v4->v5:
- Make aperture check in attach a WARN_ON() and fail in probe if
zdev->start_dma/end_dma doesn't git in aperture (Jason)

drivers/iommu/s390-iommu.c | 65 +++++++++++++++++++++++++-------------
1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9b3ae4b14636..1f6c9bee9a80 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
kfree(s390_domain);
return NULL;
}
+ s390_domain->domain.geometry.force_aperture = true;
+ s390_domain->domain.geometry.aperture_start = 0;
+ s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;

spin_lock_init(&s390_domain->dma_table_lock);
spin_lock_init(&s390_domain->list_lock);
@@ -102,46 +105,32 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
- int cc, rc = 0;
+ int cc;

if (!zdev)
return -ENODEV;

+ WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
+ domain->geometry.aperture_end < zdev->start_dma);
+
if (zdev->s390_domain)
__s390_iommu_detach_device(zdev);
else if (zdev->dma_table)
zpci_dma_exit_device(zdev);

- zdev->dma_table = s390_domain->dma_table;
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table));
+ virt_to_phys(s390_domain->dma_table));
if (cc)
return -EIO;

- spin_lock_irqsave(&s390_domain->list_lock, flags);
- /* First device defines the DMA range limits */
- if (list_empty(&s390_domain->devices)) {
- domain->geometry.aperture_start = zdev->start_dma;
- domain->geometry.aperture_end = zdev->end_dma;
- domain->geometry.force_aperture = true;
- /* Allow only devices with identical DMA range limits */
- } else if (domain->geometry.aperture_start != zdev->start_dma ||
- domain->geometry.aperture_end != zdev->end_dma) {
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
- rc = -EINVAL;
- goto out_unregister;
- }
+ zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
+
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
list_add(&zdev->iommu_list, &s390_domain->devices);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);

return 0;
-
-out_unregister:
- zpci_unregister_ioat(zdev, 0);
- zdev->dma_table = NULL;
-
- return rc;
}

static void s390_iommu_detach_device(struct iommu_domain *domain,
@@ -155,10 +144,41 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
zpci_dma_init_device(zdev);
}

+static void s390_iommu_get_resv_regions(struct device *dev,
+ struct list_head *list)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ struct iommu_resv_region *region;
+
+ if (zdev->start_dma) {
+ region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
+ IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(&region->list, list);
+ }
+
+ if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+ region = iommu_alloc_resv_region(zdev->end_dma + 1,
+ ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+ 0, IOMMU_RESV_RESERVED);
+ if (!region)
+ return;
+ list_add_tail(&region->list, list);
+ }
+}
+
static struct iommu_device *s390_iommu_probe_device(struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);

+ if (zdev->start_dma > zdev->end_dma ||
+ zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+ return ERR_PTR(-EINVAL);
+
+ if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
+ zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+
return &zdev->iommu_dev;
}

@@ -337,6 +357,7 @@ static const struct iommu_ops s390_iommu_ops = {
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+ .get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = s390_iommu_attach_device,
.detach_dev = s390_iommu_detach_device,
--
2.34.1


2022-10-06 16:03:03

by Niklas Schnelle

[permalink] [raw]
Subject: Re: [PATCH v5 3/6] iommu/s390: Fix potential s390_domain aperture shrinking

On Thu, 2022-10-06 at 16:46 +0200, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
>
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
>
> As we then use the same aperture for all domains and it only depends on
> the table properties we can already check zdev->start_dma/end_dma at
> probe time and turn the check on attach into a WARN_ON().
>
> Suggested-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Niklas Schnelle <[email protected]>

@Matt, @Jason I did drop the R-b's here because the change Jason
suggested of changing the aperture check on attach to a WARN_ON() and
checking zdev->start_dma/end_dma on probe is a behavioral change.

> ---
> v4->v5:
> - Make aperture check in attach a WARN_ON() and fail in probe if
> zdev->start_dma/end_dma doesn't git in aperture (Jason)
>
> drivers/iommu/s390-iommu.c | 65 +++++++++++++++++++++++++-------------
> 1 file changed, 43 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 9b3ae4b14636..1f6c9bee9a80 100644
> --- a/drivers/iommu/s390-iommu.c
---8<---

2022-10-06 21:55:36

by Matthew Rosato

[permalink] [raw]
Subject: Re: [PATCH v5 3/6] iommu/s390: Fix potential s390_domain aperture shrinking

On 10/6/22 10:46 AM, Niklas Schnelle wrote:
> The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> match the device specific DMA address range of the device that is first
> attached. This is not ideal. For one if the domain has no device
> attached in the meantime the aperture could be shrunk allowing
> translations outside the aperture to exist in the translation tables.
> Also this is a bit of a misuse of the aperture which really should
> describe what addresses can be translated and not some device specific
> limitations.
>
> Instead of misusing the aperture like this we can instead create
> reserved ranges for the ranges inaccessible to the attached devices
> allowing devices with overlapping ranges to still share an IOMMU domain.
> This also significantly simplifies s390_iommu_attach_device() allowing
> us to move the aperture check to the beginning of the function and
> removing the need to hold the device list's lock to check the aperture.
>
> As we then use the same aperture for all domains and it only depends on
> the table properties we can already check zdev->start_dma/end_dma at
> probe time and turn the check on attach into a WARN_ON().
>
> Suggested-by: Jason Gunthorpe <[email protected]>
> Signed-off-by: Niklas Schnelle <[email protected]>

Reviewed-by: Matthew Rosato <[email protected]>

> ---
> v4->v5:
> - Make aperture check in attach a WARN_ON() and fail in probe if
> zdev->start_dma/end_dma doesn't git in aperture (Jason)
>
> drivers/iommu/s390-iommu.c | 65 +++++++++++++++++++++++++-------------
> 1 file changed, 43 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 9b3ae4b14636..1f6c9bee9a80 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
> kfree(s390_domain);
> return NULL;
> }
> + s390_domain->domain.geometry.force_aperture = true;
> + s390_domain->domain.geometry.aperture_start = 0;
> + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
>
> spin_lock_init(&s390_domain->dma_table_lock);
> spin_lock_init(&s390_domain->list_lock);
> @@ -102,46 +105,32 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> struct s390_domain *s390_domain = to_s390_domain(domain);
> struct zpci_dev *zdev = to_zpci_dev(dev);
> unsigned long flags;
> - int cc, rc = 0;
> + int cc;
>
> if (!zdev)
> return -ENODEV;
>
> + WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
> + domain->geometry.aperture_end < zdev->start_dma);
> +
> if (zdev->s390_domain)
> __s390_iommu_detach_device(zdev);
> else if (zdev->dma_table)
> zpci_dma_exit_device(zdev);
>
> - zdev->dma_table = s390_domain->dma_table;
> cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> - virt_to_phys(zdev->dma_table));
> + virt_to_phys(s390_domain->dma_table));
> if (cc)
> return -EIO;
>
> - spin_lock_irqsave(&s390_domain->list_lock, flags);
> - /* First device defines the DMA range limits */
> - if (list_empty(&s390_domain->devices)) {
> - domain->geometry.aperture_start = zdev->start_dma;
> - domain->geometry.aperture_end = zdev->end_dma;
> - domain->geometry.force_aperture = true;
> - /* Allow only devices with identical DMA range limits */
> - } else if (domain->geometry.aperture_start != zdev->start_dma ||
> - domain->geometry.aperture_end != zdev->end_dma) {
> - spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> - rc = -EINVAL;
> - goto out_unregister;
> - }
> + zdev->dma_table = s390_domain->dma_table;
> zdev->s390_domain = s390_domain;
> +
> + spin_lock_irqsave(&s390_domain->list_lock, flags);
> list_add(&zdev->iommu_list, &s390_domain->devices);
> spin_unlock_irqrestore(&s390_domain->list_lock, flags);
>
> return 0;
> -
> -out_unregister:
> - zpci_unregister_ioat(zdev, 0);
> - zdev->dma_table = NULL;
> -
> - return rc;
> }
>
> static void s390_iommu_detach_device(struct iommu_domain *domain,
> @@ -155,10 +144,41 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
> zpci_dma_init_device(zdev);
> }
>
> +static void s390_iommu_get_resv_regions(struct device *dev,
> + struct list_head *list)
> +{
> + struct zpci_dev *zdev = to_zpci_dev(dev);
> + struct iommu_resv_region *region;
> +
> + if (zdev->start_dma) {
> + region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
> + IOMMU_RESV_RESERVED);
> + if (!region)
> + return;
> + list_add_tail(&region->list, list);
> + }
> +
> + if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
> + region = iommu_alloc_resv_region(zdev->end_dma + 1,
> + ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
> + 0, IOMMU_RESV_RESERVED);
> + if (!region)
> + return;
> + list_add_tail(&region->list, list);
> + }
> +}
> +
> static struct iommu_device *s390_iommu_probe_device(struct device *dev)
> {
> struct zpci_dev *zdev = to_zpci_dev(dev);
>
> + if (zdev->start_dma > zdev->end_dma ||
> + zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
> + return ERR_PTR(-EINVAL);
> +
> + if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
> + zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
> +
> return &zdev->iommu_dev;
> }
>
> @@ -337,6 +357,7 @@ static const struct iommu_ops s390_iommu_ops = {
> .release_device = s390_iommu_release_device,
> .device_group = generic_device_group,
> .pgsize_bitmap = S390_IOMMU_PGSIZES,
> + .get_resv_regions = s390_iommu_get_resv_regions,
> .default_domain_ops = &(const struct iommu_domain_ops) {
> .attach_dev = s390_iommu_attach_device,
> .detach_dev = s390_iommu_detach_device,

2022-10-07 07:57:21

by Niklas Schnelle

[permalink] [raw]
Subject: Re: [PATCH v5 3/6] iommu/s390: Fix potential s390_domain aperture shrinking

On Thu, 2022-10-06 at 17:02 -0400, Matthew Rosato wrote:
> On 10/6/22 10:46 AM, Niklas Schnelle wrote:
> > The s390 IOMMU driver currently sets the IOMMU domain's aperture to
> > match the device specific DMA address range of the device that is first
> > attached. This is not ideal. For one if the domain has no device
> > attached in the meantime the aperture could be shrunk allowing
> > translations outside the aperture to exist in the translation tables.
> > Also this is a bit of a misuse of the aperture which really should
> > describe what addresses can be translated and not some device specific
> > limitations.
> >
> > Instead of misusing the aperture like this we can instead create
> > reserved ranges for the ranges inaccessible to the attached devices
> > allowing devices with overlapping ranges to still share an IOMMU domain.
> > This also significantly simplifies s390_iommu_attach_device() allowing
> > us to move the aperture check to the beginning of the function and
> > removing the need to hold the device list's lock to check the aperture.
> >
> > As we then use the same aperture for all domains and it only depends on
> > the table properties we can already check zdev->start_dma/end_dma at
> > probe time and turn the check on attach into a WARN_ON().
> >
> > Suggested-by: Jason Gunthorpe <[email protected]>
> > Signed-off-by: Niklas Schnelle <[email protected]>
>
> Reviewed-by: Matthew Rosato <[email protected]>
>
> > ---
> > v4->v5:
> > - Make aperture check in attach a WARN_ON() and fail in probe if
> > zdev->start_dma/end_dma doesn't git in aperture (Jason)
> >
> > drivers/iommu/s390-iommu.c | 65 +++++++++++++++++++++++++-------------
> > 1 file changed, 43 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> > index 9b3ae4b14636..1f6c9bee9a80 100644
> > --- a/drivers/iommu/s390-iommu.c
> > +++ b/drivers/iommu/s390-iommu.c
> > @@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
> > kfree(s390_domain);
> > return NULL;
> > }
> > + s390_domain->domain.geometry.force_aperture = true;
> > + s390_domain->domain.geometry.aperture_start = 0;
> > + s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
> >
> > spin_lock_init(&s390_domain->dma_table_lock);
> > spin_lock_init(&s390_domain->list_lock);
> > @@ -102,46 +105,32 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
> > struct s390_domain *s390_domain = to_s390_domain(domain);
> > struct zpci_dev *zdev = to_zpci_dev(dev);
> > unsigned long flags;
> > - int cc, rc = 0;
> > + int cc;
> >
> > if (!zdev)
> > return -ENODEV;
> >
> > + WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
> > + domain->geometry.aperture_end < zdev->start_dma);
> > +

I think this one should still return with -EINVAL.

> > if (zdev->s390_domain)
> > __s390_iommu_detach_device(zdev);
> > else if (zdev->dma_table)
> > zpci_dma_exit_device(zdev);
> >
> > - zdev->dma_table = s390_domain->dma_table;
> > cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
> > - virt_to_phys(zdev->dma_table));
> > + virt_to_phys(s390_domain->dma_table));
> > if (cc)
> > return -EIO;
> >
> > - spin_lock_irqsave(&s390_domain->list_lock, flags);
> > - /* First device defines the DMA range limits */
> > - if (list_empty(&s390_domain->devices)) {
> > - domain->geometry.aperture_start = zdev->start_dma;
> > - domain->geometry.aperture_end = zdev->end_dma;
> > - domain->geometry.force_aperture = true;
> > - /* Allow only devices with identical DMA range limits */
> > - } else if (domain->geometry.aperture_start != zdev->start_dma ||
> > - domain->geometry.aperture_end != zdev->end_dma) {
> > - spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> > - rc = -EINVAL;
> > - goto out_unregister;
> > - }
> > + zdev->dma_table = s390_domain->dma_table;
> > zdev->s390_domain = s390_domain;
> > +
> > + spin_lock_irqsave(&s390_domain->list_lock, flags);
> > list_add(&zdev->iommu_list, &s390_domain->devices);
> > spin_unlock_irqrestore(&s390_domain->list_lock, flags);
> >
> > return 0;
> > -
> > -out_unregister:
> > - zpci_unregister_ioat(zdev, 0);
> > - zdev->dma_table = NULL;
> > -
> > - return rc;
> > }
> >
> >
---8<---