2019-11-14 18:27:42

by Omer Shpigelman

[permalink] [raw]
Subject: [PATCH 5/8] habanalabs: optimize MMU unmap

Reduce context close time by skipping hash table lookup if possible in
order to avoid hard reset with open contexts.
Reset with open contexts can potentially lead to a kernel crash as the
generic pool of the MMU hops is destroyed while it is not empty because
some unmap operations are not done.
This commit affect mainly when running on simulator.

Signed-off-by: Omer Shpigelman <[email protected]>
---
drivers/misc/habanalabs/mmu.c | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 3a7f8ff19eb2..6262b26e2086 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -25,10 +25,9 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
return pgt_info;
}

-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
{
struct hl_device *hdev = ctx->hdev;
- struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);

gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.mmu_hop_table_size);
@@ -37,6 +36,13 @@ static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
kfree(pgt_info);
}

+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+ struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+ _free_hop(ctx, pgt_info);
+}
+
static u64 alloc_hop(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
@@ -159,7 +165,7 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
*/
num_of_ptes_left = pgt_info->num_of_ptes;
if (!num_of_ptes_left)
- free_hop(ctx, hop_addr);
+ _free_hop(ctx, pgt_info);

return num_of_ptes_left;
}
@@ -516,13 +522,14 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
dram_default_mapping_fini(ctx);

if (!hash_empty(ctx->mmu_shadow_hash))
- dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
+ dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+ ctx->asid);

hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
- dev_err(hdev->dev,
+ dev_err_ratelimited(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
- free_hop(ctx, pgt_info->shadow_addr);
+ _free_hop(ctx, pgt_info);
}

mutex_destroy(&ctx->mmu_lock);
--
2.17.1