2013-07-26 11:27:17

by Cho KyongHo

[permalink] [raw]
Subject: [PATCH v8 03/12] iommu/exynos: fix page table maintenance

This prevents allocating lv2 page table for the lv1 page table entry
that already has 1MB page mapping. In addition some BUG_ON() is
changed to WARN_ON().

Signed-off-by: Cho KyongHo <[email protected]>
---
drivers/iommu/exynos-iommu.c | 52 +++++++++++++++++++++++++++++------------
1 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index e3be3e5..6c4ecce 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -52,11 +52,11 @@
#define lv2ent_large(pent) ((*(pent) & 3) == 1)

#define section_phys(sent) (*(sent) & SECT_MASK)
-#define section_offs(iova) ((iova) & 0xFFFFF)
+#define section_offs(iova) ((iova) & ~SECT_MASK)
#define lpage_phys(pent) (*(pent) & LPAGE_MASK)
-#define lpage_offs(iova) ((iova) & 0xFFFF)
+#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
#define spage_phys(pent) (*(pent) & SPAGE_MASK)
-#define spage_offs(iova) ((iova) & 0xFFF)
+#define spage_offs(iova) ((iova) & ~SPAGE_MASK)

#define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
#define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
@@ -862,12 +862,14 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
if (!pent)
- return NULL;
+ return ERR_PTR(-ENOMEM);

*sent = mk_lv1ent_page(__pa(pent));
*pgcounter = NUM_LV2ENTRIES;
pgtable_flush(pent, pent + NUM_LV2ENTRIES);
pgtable_flush(sent, sent + 1);
+ } else if (lv1ent_section(sent)) {
+ return ERR_PTR(-EADDRINUSE);
}

return page_entry(sent, iova);
@@ -894,6 +896,12 @@ static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
return 0;
}

+static void clear_page_table(unsigned long *ent, int n)
+{
+ if (n > 0)
+ memset(ent, 0, sizeof(*ent) * n);
+}
+
static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
short *pgcnt)
{
@@ -908,7 +916,7 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
int i;
for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
if (!lv2ent_fault(pent)) {
- memset(pent, 0, sizeof(*pent) * i);
+ clear_page_table(pent - i, i);
return -EADDRINUSE;
}

@@ -944,17 +952,16 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova,
pent = alloc_lv2entry(entry, iova,
&priv->lv2entcnt[lv1ent_offset(iova)]);

- if (!pent)
- ret = -ENOMEM;
+ if (IS_ERR(pent))
+ ret = PTR_ERR(pent);
else
ret = lv2set_page(pent, paddr, size,
&priv->lv2entcnt[lv1ent_offset(iova)]);
}

- if (ret) {
- pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
- __func__, iova, size);
- }
+ if (ret)
+ pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
+ __func__, ret, size, iova);

spin_unlock_irqrestore(&priv->pgtablelock, flags);

@@ -968,6 +975,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
struct sysmmu_drvdata *data;
unsigned long flags;
unsigned long *ent;
+ size_t err_pgsize;

BUG_ON(priv->pgtable == NULL);

@@ -976,7 +984,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
ent = section_entry(priv->pgtable, iova);

if (lv1ent_section(ent)) {
- BUG_ON(size < SECT_SIZE);
+ if (WARN_ON(size < SECT_SIZE)) {
+ err_pgsize = SECT_SIZE;
+ goto err;
+ }

*ent = 0;
pgtable_flush(ent, ent + 1);
@@ -1008,9 +1019,12 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
}

/* lv1ent_large(ent) == true here */
- BUG_ON(size < LPAGE_SIZE);
+ if (WARN_ON(size < LPAGE_SIZE)) {
+ err_pgsize = LPAGE_SIZE;
+ goto err;
+ }

- memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
+ clear_page_table(ent, SPAGES_PER_LPAGE);
pgtable_flush(ent, ent + SPAGES_PER_LPAGE);

size = LPAGE_SIZE;
@@ -1023,8 +1037,16 @@ done:
sysmmu_tlb_invalidate_entry(data->dev, iova);
spin_unlock_irqrestore(&priv->lock, flags);

-
return size;
+err:
+ spin_unlock_irqrestore(&priv->pgtablelock, flags);
+
+ pr_err("%s: Failed due to size(%#x) @ %#lx is"\
+ " smaller than page size %#x\n",
+ __func__, size, iova, err_pgsize);
+
+ return 0;
+
}

static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
--
1.7.2.5


2013-07-26 16:17:06

by Grant Grundler

[permalink] [raw]
Subject: Re: [PATCH v8 03/12] iommu/exynos: fix page table maintenance

On Fri, Jul 26, 2013 at 4:27 AM, Cho KyongHo <[email protected]> wrote:
> This prevents allocating lv2 page table for the lv1 page table entry
> that already has 1MB page mapping. In addition some BUG_ON() is
> changed to WARN_ON().
>
> Signed-off-by: Cho KyongHo <[email protected]>

Reviewed-by: Grant Grundler <[email protected]>

In reviewing this, I noticed another issue that is related, but not
caused by this patch. See below.

> ---
> drivers/iommu/exynos-iommu.c | 52 +++++++++++++++++++++++++++++------------
> 1 files changed, 37 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> index e3be3e5..6c4ecce 100644
> --- a/drivers/iommu/exynos-iommu.c
> +++ b/drivers/iommu/exynos-iommu.c
> @@ -52,11 +52,11 @@
> #define lv2ent_large(pent) ((*(pent) & 3) == 1)
>
> #define section_phys(sent) (*(sent) & SECT_MASK)
> -#define section_offs(iova) ((iova) & 0xFFFFF)
> +#define section_offs(iova) ((iova) & ~SECT_MASK)
> #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> -#define lpage_offs(iova) ((iova) & 0xFFFF)
> +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
> #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> -#define spage_offs(iova) ((iova) & 0xFFF)
> +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
>
> #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
> #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> @@ -862,12 +862,14 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
> pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
> BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
> if (!pent)
> - return NULL;
> + return ERR_PTR(-ENOMEM);
>
> *sent = mk_lv1ent_page(__pa(pent));
> *pgcounter = NUM_LV2ENTRIES;
> pgtable_flush(pent, pent + NUM_LV2ENTRIES);
> pgtable_flush(sent, sent + 1);
> + } else if (lv1ent_section(sent)) {
> + return ERR_PTR(-EADDRINUSE);
> }
>
> return page_entry(sent, iova);
> @@ -894,6 +896,12 @@ static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
> return 0;
> }
>
> +static void clear_page_table(unsigned long *ent, int n)
> +{
> + if (n > 0)
> + memset(ent, 0, sizeof(*ent) * n);
> +}
> +
> static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
> short *pgcnt)
> {
> @@ -908,7 +916,7 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
> int i;
> for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
> if (!lv2ent_fault(pent)) {
> - memset(pent, 0, sizeof(*pent) * i);
> + clear_page_table(pent - i, i);
> return -EADDRINUSE;

I am wondering about two issues with this error handling:
1) we don't call pgtable_flush() in this case - I think just for
consistency we should - don't rush to add since my next comment is to
change this error handling completely.

2) If -EADDRINUSE is correct, why does the code clear the IO Page
table entries?

I think this error path should either
(a) BUG_ON (ie panic) since this is an inconsistency between
generic IOMMU page allocation and chip specific IOMMU mapping code OR
(b) WARN_ON, not clear the entries, and hope whoever was using it
can finish using the system before crashing or gracefully shutting
down.

In any case, I'm pretty sure this code needs to change and it should
be in a follow up to this series.

thanks,
grant

> }
>
> @@ -944,17 +952,16 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova,
> pent = alloc_lv2entry(entry, iova,
> &priv->lv2entcnt[lv1ent_offset(iova)]);
>
> - if (!pent)
> - ret = -ENOMEM;
> + if (IS_ERR(pent))
> + ret = PTR_ERR(pent);
> else
> ret = lv2set_page(pent, paddr, size,
> &priv->lv2entcnt[lv1ent_offset(iova)]);
> }
>
> - if (ret) {
> - pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
> - __func__, iova, size);
> - }
> + if (ret)
> + pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
> + __func__, ret, size, iova);
>
> spin_unlock_irqrestore(&priv->pgtablelock, flags);
>
> @@ -968,6 +975,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> struct sysmmu_drvdata *data;
> unsigned long flags;
> unsigned long *ent;
> + size_t err_pgsize;
>
> BUG_ON(priv->pgtable == NULL);
>
> @@ -976,7 +984,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> ent = section_entry(priv->pgtable, iova);
>
> if (lv1ent_section(ent)) {
> - BUG_ON(size < SECT_SIZE);
> + if (WARN_ON(size < SECT_SIZE)) {
> + err_pgsize = SECT_SIZE;
> + goto err;
> + }
>
> *ent = 0;
> pgtable_flush(ent, ent + 1);
> @@ -1008,9 +1019,12 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> }
>
> /* lv1ent_large(ent) == true here */
> - BUG_ON(size < LPAGE_SIZE);
> + if (WARN_ON(size < LPAGE_SIZE)) {
> + err_pgsize = LPAGE_SIZE;
> + goto err;
> + }
>
> - memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
> + clear_page_table(ent, SPAGES_PER_LPAGE);
> pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
>
> size = LPAGE_SIZE;
> @@ -1023,8 +1037,16 @@ done:
> sysmmu_tlb_invalidate_entry(data->dev, iova);
> spin_unlock_irqrestore(&priv->lock, flags);
>
> -
> return size;
> +err:
> + spin_unlock_irqrestore(&priv->pgtablelock, flags);
> +
> + pr_err("%s: Failed due to size(%#x) @ %#lx is"\
> + " smaller than page size %#x\n",
> + __func__, size, iova, err_pgsize);
> +
> + return 0;
> +
> }
>
> static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
> --
> 1.7.2.5
>
>

2013-07-29 09:18:30

by Cho KyongHo

[permalink] [raw]
Subject: RE: [PATCH v8 03/12] iommu/exynos: fix page table maintenance

> -----Original Message-----
> From: [email protected] [mailto:[email protected]] On Behalf Of Grant Grundler
> Sent: Saturday, July 27, 2013 1:17 AM
> To: Cho KyongHo
>
> On Fri, Jul 26, 2013 at 4:27 AM, Cho KyongHo <[email protected]> wrote:
> > This prevents allocating lv2 page table for the lv1 page table entry
> > that already has 1MB page mapping. In addition some BUG_ON() is
> > changed to WARN_ON().
> >
> > Signed-off-by: Cho KyongHo <[email protected]>
>
> Reviewed-by: Grant Grundler <[email protected]>
>
> In reviewing this, I noticed another issue that is related, but not
> caused by this patch. See below.
>
> > ---
> > drivers/iommu/exynos-iommu.c | 52 +++++++++++++++++++++++++++++------------
> > 1 files changed, 37 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
> > index e3be3e5..6c4ecce 100644
> > --- a/drivers/iommu/exynos-iommu.c
> > +++ b/drivers/iommu/exynos-iommu.c
> > @@ -52,11 +52,11 @@
> > #define lv2ent_large(pent) ((*(pent) & 3) == 1)
> >
> > #define section_phys(sent) (*(sent) & SECT_MASK)
> > -#define section_offs(iova) ((iova) & 0xFFFFF)
> > +#define section_offs(iova) ((iova) & ~SECT_MASK)
> > #define lpage_phys(pent) (*(pent) & LPAGE_MASK)
> > -#define lpage_offs(iova) ((iova) & 0xFFFF)
> > +#define lpage_offs(iova) ((iova) & ~LPAGE_MASK)
> > #define spage_phys(pent) (*(pent) & SPAGE_MASK)
> > -#define spage_offs(iova) ((iova) & 0xFFF)
> > +#define spage_offs(iova) ((iova) & ~SPAGE_MASK)
> >
> > #define lv1ent_offset(iova) ((iova) >> SECT_ORDER)
> > #define lv2ent_offset(iova) (((iova) & 0xFF000) >> SPAGE_ORDER)
> > @@ -862,12 +862,14 @@ static unsigned long *alloc_lv2entry(unsigned long *sent, unsigned long iova,
> > pent = kzalloc(LV2TABLE_SIZE, GFP_ATOMIC);
> > BUG_ON((unsigned long)pent & (LV2TABLE_SIZE - 1));
> > if (!pent)
> > - return NULL;
> > + return ERR_PTR(-ENOMEM);
> >
> > *sent = mk_lv1ent_page(__pa(pent));
> > *pgcounter = NUM_LV2ENTRIES;
> > pgtable_flush(pent, pent + NUM_LV2ENTRIES);
> > pgtable_flush(sent, sent + 1);
> > + } else if (lv1ent_section(sent)) {
> > + return ERR_PTR(-EADDRINUSE);
> > }
> >
> > return page_entry(sent, iova);
> > @@ -894,6 +896,12 @@ static int lv1set_section(unsigned long *sent, phys_addr_t paddr, short *pgcnt)
> > return 0;
> > }
> >
> > +static void clear_page_table(unsigned long *ent, int n)
> > +{
> > + if (n > 0)
> > + memset(ent, 0, sizeof(*ent) * n);
> > +}
> > +
> > static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
> > short *pgcnt)
> > {
> > @@ -908,7 +916,7 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
> > int i;
> > for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
> > if (!lv2ent_fault(pent)) {
> > - memset(pent, 0, sizeof(*pent) * i);
> > + clear_page_table(pent - i, i);
> > return -EADDRINUSE;
>
> I am wondering about two issues with this error handling:
> 1) we don't call pgtable_flush() in this case - I think just for
> consistency we should - don't rush to add since my next comment is to
> change this error handling completely.
>
clear_page_table() is called for the page table entries that are already
fault pages. That is why it does not contain cache flush.

> 2) If -EADDRINUSE is correct, why does the code clear the IO Page
> table entries?
>
> I think this error path should either
> (a) BUG_ON (ie panic) since this is an inconsistency between
> generic IOMMU page allocation and chip specific IOMMU mapping code OR
> (b) WARN_ON, not clear the entries, and hope whoever was using it
> can finish using the system before crashing or gracefully shutting
> down.
>
> In any case, I'm pretty sure this code needs to change and it should
> be in a follow up to this series.

Yes, you're right. But I worried the case that a kernel module calls IOMMU API
functions directly and does not want to make kernel panic when it tries to map
a region that is already in use.
I also wonder if the such kernel module exists.
WARN_ON is also a good idea.

Thank you.

>
> thanks,
> grant
>
> > }
> >
> > @@ -944,17 +952,16 @@ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long iova,
> > pent = alloc_lv2entry(entry, iova,
> > &priv->lv2entcnt[lv1ent_offset(iova)]);
> >
> > - if (!pent)
> > - ret = -ENOMEM;
> > + if (IS_ERR(pent))
> > + ret = PTR_ERR(pent);
> > else
> > ret = lv2set_page(pent, paddr, size,
> > &priv->lv2entcnt[lv1ent_offset(iova)]);
> > }
> >
> > - if (ret) {
> > - pr_debug("%s: Failed to map iova 0x%lx/0x%x bytes\n",
> > - __func__, iova, size);
> > - }
> > + if (ret)
> > + pr_err("%s: Failed(%d) to map 0x%#x bytes @ %#lx\n",
> > + __func__, ret, size, iova);
> >
> > spin_unlock_irqrestore(&priv->pgtablelock, flags);
> >
> > @@ -968,6 +975,7 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> > struct sysmmu_drvdata *data;
> > unsigned long flags;
> > unsigned long *ent;
> > + size_t err_pgsize;
> >
> > BUG_ON(priv->pgtable == NULL);
> >
> > @@ -976,7 +984,10 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> > ent = section_entry(priv->pgtable, iova);
> >
> > if (lv1ent_section(ent)) {
> > - BUG_ON(size < SECT_SIZE);
> > + if (WARN_ON(size < SECT_SIZE)) {
> > + err_pgsize = SECT_SIZE;
> > + goto err;
> > + }
> >
> > *ent = 0;
> > pgtable_flush(ent, ent + 1);
> > @@ -1008,9 +1019,12 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain,
> > }
> >
> > /* lv1ent_large(ent) == true here */
> > - BUG_ON(size < LPAGE_SIZE);
> > + if (WARN_ON(size < LPAGE_SIZE)) {
> > + err_pgsize = LPAGE_SIZE;
> > + goto err;
> > + }
> >
> > - memset(ent, 0, sizeof(*ent) * SPAGES_PER_LPAGE);
> > + clear_page_table(ent, SPAGES_PER_LPAGE);
> > pgtable_flush(ent, ent + SPAGES_PER_LPAGE);
> >
> > size = LPAGE_SIZE;
> > @@ -1023,8 +1037,16 @@ done:
> > sysmmu_tlb_invalidate_entry(data->dev, iova);
> > spin_unlock_irqrestore(&priv->lock, flags);
> >
> > -
> > return size;
> > +err:
> > + spin_unlock_irqrestore(&priv->pgtablelock, flags);
> > +
> > + pr_err("%s: Failed due to size(%#x) @ %#lx is"\
> > + " smaller than page size %#x\n",
> > + __func__, size, iova, err_pgsize);
> > +
> > + return 0;
> > +
> > }
> >
> > static phys_addr_t exynos_iommu_iova_to_phys(struct iommu_domain *domain,
> > --
> > 1.7.2.5
> >
> >

2013-07-29 16:22:05

by Grant Grundler

[permalink] [raw]
Subject: Re: [PATCH v8 03/12] iommu/exynos: fix page table maintenance

On Mon, Jul 29, 2013 at 2:18 AM, Cho KyongHo <[email protected]> wrote:
...
>> > @@ -908,7 +916,7 @@ static int lv2set_page(unsigned long *pent, phys_addr_t paddr, size_t size,
>> > int i;
>> > for (i = 0; i < SPAGES_PER_LPAGE; i++, pent++) {
>> > if (!lv2ent_fault(pent)) {
>> > - memset(pent, 0, sizeof(*pent) * i);
>> > + clear_page_table(pent - i, i);
>> > return -EADDRINUSE;
>>
>> I am wondering about two issues with this error handling:
>> 1) we don't call pgtable_flush() in this case - I think just for
>> consistency we should - don't rush to add since my next comment is to
>> change this error handling completely.
>>
> clear_page_table() is called for the page table entries that are already
> fault pages. That is why it does not contain cache flush.
>
>> 2) If -EADDRINUSE is correct, why does the code clear the IO Page
>> table entries?
>>
>> I think this error path should either
>> (a) BUG_ON (ie panic) since this is an inconsistency between
>> generic IOMMU page allocation and chip specific IOMMU mapping code OR
>> (b) WARN_ON, not clear the entries, and hope whoever was using it
>> can finish using the system before crashing or gracefully shutting
>> down.
>>
>> In any case, I'm pretty sure this code needs to change and it should
>> be in a follow up to this series.
>
> Yes, you're right. But I worried the case that a kernel module calls IOMMU API
> functions directly and does not want to make kernel panic when it tries to map
> a region that is already in use.

Using a DMA address for a different physical page while the current
mapping is still active can only be a bug. I can confidently say there
is no way to map the same DMA address twice (at least not for a single
page table.) We can try to fail the mapping somehow and WARN_ON to
indicate we had a "Re-Use before free" type bug.

> I also wonder if the such kernel module exists.

I believe the kernel will never do this.

> WARN_ON is also a good idea.

After this series goes in, post another patch and I'd be happy to
review that as well.

After thinking about it more, I'm also ok with removing this code.
It's a very "defensive" code to catch errors in the generic IOMMU code
that probably no longer exist. Or maybe just make it
"CONFIG_DEBUG_IOMMU_ALLOC" or something like that.

cheers,
grant