2019-11-04 07:04:35

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 0/7] Improve tlb range flush

This patchset mainly fixes a tlb flush timeout issue and use the new
iommu_gather to re-implement the tlb flush flow. and several clean up
patches about the tlb_flush.

change note:

v5: No code change. Only update the commit message of the last patch[7/7]
suggested from Tomasz in the internal review.

v4: https://lore.kernel.org/linux-iommu/[email protected]/#t
1. Add a new tlb_lock for tlb operations.
2. Delete the pgtlock.
3. Remove the "writel" patch.

v3: https://lore.kernel.org/linux-iommu/[email protected]/T/#t
1. Use the gather to implement the tlb_flush suggested from Tomasz.
2. add some clean up patches.

v2:
https://lore.kernel.org/linux-iommu/[email protected]/T/#t
1. rebase on v5.4-rc1
2. only split to several patches.

v1:
https://lore.kernel.org/linux-iommu/CAAFQd5C3U7pZo4SSUJ52Q7E+0FaUoORQFbQC5RhCHBhi=NFYTw@mail.gmail.com/T/#t

Yong Wu (7):
iommu/mediatek: Correct the flush_iotlb_all callback
iommu/mediatek: Add a new tlb_lock for tlb_flush
iommu/mediatek: Use gather to achieve the tlb range flush
iommu/mediatek: Delete the leaf in the tlb_flush
iommu/mediatek: Move the tlb_sync into tlb_flush
iommu/mediatek: Get rid of the pgtlock
iommu/mediatek: Reduce the tlb flush timeout value

drivers/iommu/mtk_iommu.c | 88 +++++++++++++++--------------------------------
drivers/iommu/mtk_iommu.h | 2 +-
2 files changed, 29 insertions(+), 61 deletions(-)

--
1.9.1


2019-11-04 07:04:43

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 1/7] iommu/mediatek: Correct the flush_iotlb_all callback

Use the correct tlb_flush_all instead of the original one.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/mtk_iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 67a483c..76b9388 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -447,7 +447,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,

static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
- mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+ mtk_iommu_tlb_flush_all(mtk_iommu_get_m4u_data());
}

static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
--
1.9.1

2019-11-04 07:04:53

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 2/7] iommu/mediatek: Add a new tlb_lock for tlb_flush

The commit 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API
TLB sync") help move the tlb_sync of unmap from v7s into the iommu
framework. It helps add a new function "mtk_iommu_iotlb_sync", But it
lacked the lock, then it will cause the variable "tlb_flush_active"
may be changed unexpectedly, we could see this warning log randomly:

mtk-iommu 10205000.iommu: Partial TLB flush timed out, falling back to
full flush

The HW requires tlb_flush/tlb_sync in pairs strictly, this patch adds
a new tlb_lock for tlb operations to fix this issue.

Fixes: 4d689b619445 ("iommu/io-pgtable-arm-v7s: Convert to IOMMU API TLB sync")
Signed-off-by: Yong Wu <[email protected]>
---
drivers/iommu/mtk_iommu.c | 23 ++++++++++++++++++++++-
drivers/iommu/mtk_iommu.h | 1 +
2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 76b9388..c2f6c78 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -219,22 +219,37 @@ static void mtk_iommu_tlb_sync(void *cookie)
static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
+ struct mtk_iommu_data *data = cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&data->tlb_lock, flags);
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
mtk_iommu_tlb_sync(cookie);
+ spin_unlock_irqrestore(&data->tlb_lock, flags);
}

static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
+ struct mtk_iommu_data *data = cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&data->tlb_lock, flags);
mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
mtk_iommu_tlb_sync(cookie);
+ spin_unlock_irqrestore(&data->tlb_lock, flags);
}

static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
+ struct mtk_iommu_data *data = cookie;
+ unsigned long flags;
+
+ spin_lock_irqsave(&data->tlb_lock, flags);
mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+ spin_unlock_irqrestore(&data->tlb_lock, flags);
}

static const struct iommu_flush_ops mtk_iommu_flush_ops = {
@@ -453,7 +468,12 @@ static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
- mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+ struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+ unsigned long flags;
+
+ spin_lock_irqsave(&data->tlb_lock, flags);
+ mtk_iommu_tlb_sync(data);
+ spin_unlock_irqrestore(&data->tlb_lock, flags);
}

static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -733,6 +753,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
if (ret)
return ret;

+ spin_lock_init(&data->tlb_lock);
list_add_tail(&data->list, &m4ulist);

if (!iommu_present(&platform_bus_type))
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index fc0f16e..8cae22d 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -58,6 +58,7 @@ struct mtk_iommu_data {
struct iommu_group *m4u_group;
bool enable_4GB;
bool tlb_flush_active;
+ spinlock_t tlb_lock; /* lock for tlb range flush */

struct iommu_device iommu;
const struct mtk_iommu_plat_data *plat_data;
--
1.9.1

2019-11-04 07:05:07

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 3/7] iommu/mediatek: Use gather to achieve the tlb range flush

Use the iommu_gather mechanism to achieve the tlb range flush.
Gather the iova range in the "tlb_add_page", then flush the merged iova
range in iotlb_sync.

Suggested-by: Tomasz Figa <[email protected]>
Signed-off-by: Yong Wu <[email protected]>
---
drivers/iommu/mtk_iommu.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c2f6c78..81ac95f 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -245,11 +245,9 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
void *cookie)
{
struct mtk_iommu_data *data = cookie;
- unsigned long flags;
+ struct iommu_domain *domain = &data->m4u_dom->domain;

- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
+ iommu_iotlb_gather_add_page(domain, gather, iova, granule);
}

static const struct iommu_flush_ops mtk_iommu_flush_ops = {
@@ -469,9 +467,15 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
+ size_t length = gather->end - gather->start;
unsigned long flags;

+ if (gather->start == ULONG_MAX)
+ return;
+
spin_lock_irqsave(&data->tlb_lock, flags);
+ mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
+ false, data);
mtk_iommu_tlb_sync(data);
spin_unlock_irqrestore(&data->tlb_lock, flags);
}
--
1.9.1

2019-11-04 07:05:12

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 4/7] iommu/mediatek: Delete the leaf in the tlb_flush

In our tlb range flush, we don't care the "leaf". Remove it to simplify
the code. no functional change.

"granule" also is unnecessary for us, Keep it satisfy the format of
tlb_flush_walk.

Signed-off-by: Yong Wu <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/mtk_iommu.c | 21 ++++-----------------
1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 81ac95f..1d7254c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -174,8 +174,7 @@ static void mtk_iommu_tlb_flush_all(void *cookie)
}

static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size,
- size_t granule, bool leaf,
- void *cookie)
+ size_t granule, void *cookie)
{
struct mtk_iommu_data *data = cookie;

@@ -223,19 +222,7 @@ static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size,
unsigned long flags;

spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie);
- mtk_iommu_tlb_sync(cookie);
- spin_unlock_irqrestore(&data->tlb_lock, flags);
-}
-
-static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
- size_t granule, void *cookie)
-{
- struct mtk_iommu_data *data = cookie;
- unsigned long flags;
-
- spin_lock_irqsave(&data->tlb_lock, flags);
- mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie);
+ mtk_iommu_tlb_add_flush_nosync(iova, size, granule, cookie);
mtk_iommu_tlb_sync(cookie);
spin_unlock_irqrestore(&data->tlb_lock, flags);
}
@@ -253,7 +240,7 @@ static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
static const struct iommu_flush_ops mtk_iommu_flush_ops = {
.tlb_flush_all = mtk_iommu_tlb_flush_all,
.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
- .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
+ .tlb_flush_leaf = mtk_iommu_tlb_flush_walk,
.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
};

@@ -475,7 +462,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,

spin_lock_irqsave(&data->tlb_lock, flags);
mtk_iommu_tlb_add_flush_nosync(gather->start, length, gather->pgsize,
- false, data);
+ data);
mtk_iommu_tlb_sync(data);
spin_unlock_irqrestore(&data->tlb_lock, flags);
}
--
1.9.1

2019-11-04 07:06:12

by Yong Wu (吴勇)

[permalink] [raw]
Subject: [PATCH v5 7/7] iommu/mediatek: Reduce the tlb flush timeout value

Reduce the tlb timeout value from 100000us to 1000us. The original value
would make the kernel stuck for 100 ms with interrupts disabled, which
could have other side effects. The flush is expected to always take much
less than 1 ms, so use that instead.

Signed-off-by: Yong Wu <[email protected]>
---
drivers/iommu/mtk_iommu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c2b7ed5..8ca2e99 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -192,7 +192,7 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long iova, size_t size,

/* tlb sync */
ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE,
- tmp, tmp != 0, 10, 100000);
+ tmp, tmp != 0, 10, 1000);
if (ret) {
dev_warn(data->dev,
"Partial TLB flush timed out, falling back to full flush\n");
--
1.9.1

2019-11-04 15:21:24

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v5 0/7] Improve tlb range flush

On Mon, Nov 04, 2019 at 03:01:01PM +0800, Yong Wu wrote:
> This patchset mainly fixes a tlb flush timeout issue and use the new
> iommu_gather to re-implement the tlb flush flow. and several clean up
> patches about the tlb_flush.
>
> change note:
>
> v5: No code change. Only update the commit message of the last patch[7/7]
> suggested from Tomasz in the internal review.

I'm assuming Joerg will pick this up for 5.5.

Will

2019-11-11 14:06:49

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH v5 0/7] Improve tlb range flush

On Mon, Nov 04, 2019 at 03:01:01PM +0800, Yong Wu wrote:
> Yong Wu (7):
> iommu/mediatek: Correct the flush_iotlb_all callback
> iommu/mediatek: Add a new tlb_lock for tlb_flush
> iommu/mediatek: Use gather to achieve the tlb range flush
> iommu/mediatek: Delete the leaf in the tlb_flush
> iommu/mediatek: Move the tlb_sync into tlb_flush
> iommu/mediatek: Get rid of the pgtlock
> iommu/mediatek: Reduce the tlb flush timeout value

Applied, thanks.