2019-10-09 13:20:07

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v2 1/4] iommu/mediatek: Correct the flush_iotlb_all callback

Use the correct tlb_flush_all instead of the original one.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu <[email protected]>
---
1. rebase on v5.4-rc1
2. v1:
https://lore.kernel.org/linux-iommu/CAAFQd5C3U7pZo4SSUJ52Q7E+0FaUoORQFbQC5RhCHBhi=NFYTw@mail.gmail.com/T/#t
---
drivers/iommu/mtk_iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 67a483c..76b9388 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -447,7 +447,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,

static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
- mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+ mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
}

static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
--
1.9.1


2019-10-09 13:20:31

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v2 2/4] iommu/mediatek: Move the tlb_sync into tlb_flush

The commit 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API
TLB sync") help move the tlb_sync of unmap from v7s into the iommu
framework. It helps add a new function "mtk_iommu_iotlb_sync", But it
lacked the dom->pgtlock, then it will cause the variable
"tlb_flush_active" may be changed unexpectedly, we could see this warning
log randomly:

mtk-iommu 10205000.iommu: Partial TLB flush timed out, falling back to
full flush

To fix this issue, we can add dom->pgtlock in the "mtk_iommu_iotlb_sync".

In addition, when checking this issue, we find that __arm_v7s_unmap call
io_pgtable_tlb_add_flush consecutively when it is supersection/largepage,
this also is potential unsafe for us. There is no tlb flush queue in the
MediaTek M4U HW. The HW always expect the tlb_flush/tlb_sync one by one.
If v7s don't always gurarantee the sequence, Thus, In this patch I move
the tlb_sync into tlb_flush(also rename the function deleting "_nosync").
and we don't care if it is leaf, rearrange the callback functions. Also,
the tlb flush/sync was already finished in v7s, then iotlb_sync is
unnecessary.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Chao Hao <[email protected]>
Signed-off-by: Yong Wu <[email protected]>
---
drivers/iommu/mtk_iommu.c | 63 +++++++++++++----------------------------------
drivers/iommu/mtk_iommu.h | 1 -
2 files changed, 17 insertions(+), 47 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 76b9388..24a13a6 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -173,11 +173,12 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
}
}

-static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf,
- void *cookie)
+static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
{
struct mtk_iommu_data *data = cookie;
+ int ret;
+ u32 tmp;

for_each_m4u(data) {
writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
@@ -188,21 +189,12 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
data->base + REG_MMU_INVLD_END_A);
writel_relaxed(F_MMU_INV_RANGE,
data->base + REG_MMU_INVALIDATE);
- data->tlb_flush_active = true;
- }
-}
-
-static void mtk_iommu_tlb_sync(void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- int ret;
- u32 tmp;
-
- for_each_m4u(data) {
- /* Avoid timing out if there's nothing to wait for */
- if (!data->tlb_flush_active)
- return;

+ /*
+ * There is no tlb flush queue in the HW, the HW always expect
+ * tlb_flush and tlb_sync in pair strictly. Here tlb_sync always
+ * follows tlb_flush to avoid break the sequence.
+ */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
tmp, tmp != 0, 10, 100000);
if (ret) {
@@ -212,36 +204,21 @@ static void mtk_iommu_tlb_sync(void *cookie)
}
/* Clear the CPE status */
writel_relaxed(0, data->base + REG_MMU_CPE_DONE);
- data->tlb_flush_active = false;
}
}

-static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
- mtk_iommu_tlb_sync(cookie);
-}
-
-static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
+static void mtk_iommu_tlb_flush_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
{
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
- mtk_iommu_tlb_sync(cookie);
-}
-
-static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
- unsigned long iova, size_t granule,
- void *cookie)
-{
- mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+ mtk_iommu_tlb_add_flush(iova, granule, granule, cookie);
}

static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
- .tlb_flush_walk = mtk_iommu_tlb_flush_walk,
- .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
- .tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
+ .tlb_flush_walk = mtk_iommu_tlb_add_flush,
+ .tlb_flush_leaf = mtk_iommu_tlb_add_flush,
+ .tlb_add_page = mtk_iommu_tlb_flush_page,
};

static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
@@ -450,12 +427,6 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
}

-static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather)
-{
- mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
-}
-
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
@@ -558,7 +529,7 @@ static int mtk_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
.map = mtk_iommu_map,
.unmap = mtk_iommu_unmap,
.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
- .iotlb_sync = mtk_iommu_iotlb_sync,
+ /* No iotlb_sync here since the tlb_sync always follows the tlb_flush */
.iova_to_phys = mtk_iommu_iova_to_phys,
.add_device = mtk_iommu_add_device,
.remove_device = mtk_iommu_remove_device,
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index fc0f16e..24712f5 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -57,7 +57,6 @@ struct mtk_iommu_data {
struct mtk_iommu_domain *m4u_dom;
struct iommu_group *m4u_group;
bool enable_4GB;
- bool tlb_flush_active;

struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
--
1.9.1

2019-10-09 13:20:59

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

Use writel for the register F_MMU_INV_RANGE which is for triggering the
HW work. We expect all the setting(iova_start/iova_end...) have already
been finished before F_MMU_INV_RANGE.

Signed-off-by: Anan.Sun <[email protected]>
Signed-off-by: Yong Wu <[email protected]>
---
This is a improvement rather than fixing a issue.
---
drivers/iommu/mtk_iommu.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 24a13a6..607f92c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
writel_relaxed(iova + size - 1,
data->base + REG_MMU_INVLD_END_A);
- writel_relaxed(F_MMU_INV_RANGE,
- data->base + REG_MMU_INVALIDATE);
+ writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);

/*
* There is no tlb flush queue in the HW, the HW always expect
--
1.9.1

2019-10-09 13:21:06

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v2 4/4] iommu/mediatek: Reduce the tlb flush timeout value

Reduce the tlb timeout value from 100000us to 1000us. the original value
is so long that affect the multimedia performance. This is only a minor
improvement rather than fixing a issue.

Signed-off-by: Yong Wu <[email protected]>
---
drivers/iommu/mtk_iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 607f92c..1e9ff25 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -195,7 +195,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
* follows tlb_flush to avoid break the sequence.
*/
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
- tmp, tmp != 0, 10, 100000);
+ tmp, tmp != 0, 10, 1000);
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
--
1.9.1

2019-10-11 16:30:51

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> Use writel for the register F_MMU_INV_RANGE which is for triggering the
> HW work. We expect all the setting(iova_start/iova_end...) have already
> been finished before F_MMU_INV_RANGE.
>
> Signed-off-by: Anan.Sun <[email protected]>
> Signed-off-by: Yong Wu <[email protected]>
> ---
> This is a improvement rather than fixing a issue.
> ---
> drivers/iommu/mtk_iommu.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index 24a13a6..607f92c 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
> writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> writel_relaxed(iova + size - 1,
> data->base + REG_MMU_INVLD_END_A);
> - writel_relaxed(F_MMU_INV_RANGE,
> - data->base + REG_MMU_INVALIDATE);
> + writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);

I don't understand this change.

Why is it an "improvement" and which accesses are you ordering with the
writel?

Will

2019-10-12 06:28:18

by Yong Wu (吴勇)

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

On Fri, 2019-10-11 at 17:29 +0100, Will Deacon wrote:
> On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > HW work. We expect all the setting(iova_start/iova_end...) have already
> > been finished before F_MMU_INV_RANGE.
> >
> > Signed-off-by: Anan.Sun <[email protected]>
> > Signed-off-by: Yong Wu <[email protected]>
> > ---
> > This is a improvement rather than fixing a issue.
> > ---
> > drivers/iommu/mtk_iommu.c | 3 +--
> > 1 file changed, 1 insertion(+), 2 deletions(-)
> >
> > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > index 24a13a6..607f92c 100644
> > --- a/drivers/iommu/mtk_iommu.c
> > +++ b/drivers/iommu/mtk_iommu.c
> > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
> > writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> > writel_relaxed(iova + size - 1,
> > data->base + REG_MMU_INVLD_END_A);
> > - writel_relaxed(F_MMU_INV_RANGE,
> > - data->base + REG_MMU_INVALIDATE);
> > + writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
>
> I don't understand this change.
>
> Why is it an "improvement" and which accesses are you ordering with the
> writel?

The register(F_MMU_INV_RANGE) will trigger HW to begin flush range. HW
expect the other register iova_start/end/flush_type always is ready
before trigger. thus I'd like use writel to guarantee the previous
register has been finished.

I didn't see the writel_relaxed cause some error in practice, we only
think writel is necessary here in theory. so call it "improvement".

>
> Will


2019-10-15 00:50:08

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

On Sat, Oct 12, 2019 at 02:23:47PM +0800, Yong Wu wrote:
> On Fri, 2019-10-11 at 17:29 +0100, Will Deacon wrote:
> > On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> > > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > > HW work. We expect all the setting(iova_start/iova_end...) have already
> > > been finished before F_MMU_INV_RANGE.
> > >
> > > Signed-off-by: Anan.Sun <[email protected]>
> > > Signed-off-by: Yong Wu <[email protected]>
> > > ---
> > > This is a improvement rather than fixing a issue.
> > > ---
> > > drivers/iommu/mtk_iommu.c | 3 +--
> > > 1 file changed, 1 insertion(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > index 24a13a6..607f92c 100644
> > > --- a/drivers/iommu/mtk_iommu.c
> > > +++ b/drivers/iommu/mtk_iommu.c
> > > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
> > > writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> > > writel_relaxed(iova + size - 1,
> > > data->base + REG_MMU_INVLD_END_A);
> > > - writel_relaxed(F_MMU_INV_RANGE,
> > > - data->base + REG_MMU_INVALIDATE);
> > > + writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
> >
> > I don't understand this change.
> >
> > Why is it an "improvement" and which accesses are you ordering with the
> > writel?
>
> The register(F_MMU_INV_RANGE) will trigger HW to begin flush range. HW
> expect the other register iova_start/end/flush_type always is ready
> before trigger. thus I'd like use writel to guarantee the previous
> register has been finished.

Given that these are all MMIO writes to the same device, then
writel_relaxed() should give you the ordering you need. If you look at
memory_barriers.txt, it says:

| they [readX_relaxed() and writeX_relaxed()] are still guaranteed to
| be ordered with respect to other accesses from the same CPU thread
| to the same peripheral when operating on __iomem pointers mapped
| with the default I/O attributes.

> I didn't see the writel_relaxed cause some error in practice, we only
> think writel is necessary here in theory. so call it "improvement".

Ok, but I don't think it's needed in this case.

Will

2019-10-15 05:35:03

by Yong Wu (吴勇)

[permalink] [raw]
Subject: Re: [PATCH v2 3/4] iommu/mediatek: Use writel for TLB range invalidation

On Mon, 2019-10-14 at 22:11 +0100, Will Deacon wrote:
> On Sat, Oct 12, 2019 at 02:23:47PM +0800, Yong Wu wrote:
> > On Fri, 2019-10-11 at 17:29 +0100, Will Deacon wrote:
> > > On Wed, Oct 09, 2019 at 09:19:02PM +0800, Yong Wu wrote:
> > > > Use writel for the register F_MMU_INV_RANGE which is for triggering the
> > > > HW work. We expect all the setting(iova_start/iova_end...) have already
> > > > been finished before F_MMU_INV_RANGE.
> > > >
> > > > Signed-off-by: Anan.Sun <[email protected]>
> > > > Signed-off-by: Yong Wu <[email protected]>
> > > > ---
> > > > This is a improvement rather than fixing a issue.
> > > > ---
> > > > drivers/iommu/mtk_iommu.c | 3 +--
> > > > 1 file changed, 1 insertion(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> > > > index 24a13a6..607f92c 100644
> > > > --- a/drivers/iommu/mtk_iommu.c
> > > > +++ b/drivers/iommu/mtk_iommu.c
> > > > @@ -187,8 +187,7 @@ static void mtk_iommu_tlb_add_flush(unsigned long iova, size_t size,
> > > > writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A);
> > > > writel_relaxed(iova + size - 1,
> > > > data->base + REG_MMU_INVLD_END_A);
> > > > - writel_relaxed(F_MMU_INV_RANGE,
> > > > - data->base + REG_MMU_INVALIDATE);
> > > > + writel(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE);
> > >
> > > I don't understand this change.
> > >
> > > Why is it an "improvement" and which accesses are you ordering with the
> > > writel?
> >
> > The register(F_MMU_INV_RANGE) will trigger HW to begin flush range. HW
> > expect the other register iova_start/end/flush_type always is ready
> > before trigger. thus I'd like use writel to guarantee the previous
> > register has been finished.
>
> Given that these are all MMIO writes to the same device, then
> writel_relaxed() should give you the ordering you need. If you look at
> memory_barriers.txt, it says:
>
> | they [readX_relaxed() and writeX_relaxed()] are still guaranteed to
> | be ordered with respect to other accesses from the same CPU thread
> | to the same peripheral when operating on __iomem pointers mapped
> | with the default I/O attributes.

Thanks for this info. See it now. then I will delete this patch in next
version.

>
> > I didn't see the writel_relaxed cause some error in practice, we only
> > think writel is necessary here in theory. so call it "improvement".
>
> Ok, but I don't think it's needed in this case.
>
> Will