Update IOMMU M2VCBMT assignments based on new analysis, add
support for the IOMMU for the second 2D graphics core,
improve performance under certain situations, add support
for cache-coherent operations, and generally clean up the
code.
Stepan Moskovchenko (14):
msm: iommu: Increase maximum MID size to 5 bits
msm: iomap: Addresses and IRQs for 2nd GFX core IOMMU
msm: iommu: Use more consistent naming in platform data
msm: iommu: Revise GFX3D IOMMU contexts and M2V mappings
msm: iommu: Revise GFX2D0 IOMMU contexts and M2V mappings
msm: iommu: Support for the 2nd gfx core's IOMMU
msm: iommu: Mark functions with the right section names
msm: iommu: Don't flush page tables if no devices attached
msm: iommu: Kconfig option for cacheable page tables
msm: iommu: Check if device is already attached
msm: iommu: Kconfig dependency for the IOMMU API
msm: iommu: Definitions for extended memory attributes
msm: iommu: Support cache-coherent memory access
msm: iommu: Miscellaneous code cleanup
arch/arm/mach-msm/Kconfig | 19 ++
arch/arm/mach-msm/devices-msm8x60-iommu.c | 243 +++++++++++++----------
arch/arm/mach-msm/include/mach/iommu.h | 13 ++-
arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h | 22 ++-
arch/arm/mach-msm/include/mach/irqs-8x60.h | 7 +-
arch/arm/mach-msm/include/mach/msm_iomap-8x60.h | 3 +
arch/arm/mach-msm/iommu.c | 144 +++++++++++---
arch/arm/mach-msm/iommu_dev.c | 4 +-
8 files changed, 310 insertions(+), 145 deletions(-)
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add the platform data and resources needed for the second
2D graphics core's IOMMU.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/devices-msm8x60-iommu.c | 54 +++++++++++++++++++++++++++++
1 files changed, 54 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-msm/devices-msm8x60-iommu.c b/arch/arm/mach-msm/devices-msm8x60-iommu.c
index 8cccb26..a6ecd39 100644
--- a/arch/arm/mach-msm/devices-msm8x60-iommu.c
+++ b/arch/arm/mach-msm/devices-msm8x60-iommu.c
@@ -254,6 +254,27 @@ static struct resource msm_iommu_gfx2d0_resources[] = {
},
};
+static struct resource msm_iommu_gfx2d1_resources[] = {
+ {
+ .start = MSM_IOMMU_GFX2D1_PHYS,
+ .end = MSM_IOMMU_GFX2D1_PHYS + MSM_IOMMU_GFX2D1_SIZE - 1,
+ .name = "physbase",
+ .flags = IORESOURCE_MEM,
+ },
+ {
+ .name = "nonsecure_irq",
+ .start = SMMU_GFX2D1_CB_SC_NON_SECURE_IRQ,
+ .end = SMMU_GFX2D1_CB_SC_NON_SECURE_IRQ,
+ .flags = IORESOURCE_IRQ,
+ },
+ {
+ .name = "secure_irq",
+ .start = SMMU_GFX2D1_CB_SC_SECURE_IRQ,
+ .end = SMMU_GFX2D1_CB_SC_SECURE_IRQ,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
static struct platform_device msm_root_iommu_dev = {
.name = "msm_iommu",
.id = -1,
@@ -307,6 +328,11 @@ static struct msm_iommu_dev gfx2d0_iommu = {
.clk_rate = 27000000
};
+static struct msm_iommu_dev gfx2d1_iommu = {
+ .name = "gfx2d1",
+ .clk_rate = 27000000
+};
+
static struct platform_device msm_device_iommu_jpegd = {
.name = "msm_iommu",
.id = 0,
@@ -417,6 +443,16 @@ static struct platform_device msm_device_iommu_gfx2d0 = {
.resource = msm_iommu_gfx2d0_resources,
};
+struct platform_device msm_device_iommu_gfx2d1 = {
+ .name = "msm_iommu",
+ .id = 11,
+ .dev = {
+ .parent = &msm_root_iommu_dev.dev,
+ },
+ .num_resources = ARRAY_SIZE(msm_iommu_gfx2d1_resources),
+ .resource = msm_iommu_gfx2d1_resources,
+};
+
static struct msm_iommu_ctx_dev jpegd_src_ctx = {
.name = "jpegd_src",
.num = 0,
@@ -538,6 +574,12 @@ static struct msm_iommu_ctx_dev gfx2d0_2d0_ctx = {
.mids = {0, 1, 2, 3, 4, 5, 6, 7, -1}
};
+static struct msm_iommu_ctx_dev gfx2d1_2d1_ctx = {
+ .name = "gfx2d1_2d1",
+ .num = 0,
+ .mids = {0, 1, 2, 3, 4, 5, 6, 7, -1}
+};
+
static struct platform_device msm_device_jpegd_src_ctx = {
.name = "msm_iommu_ctx",
.id = 0,
@@ -698,6 +740,14 @@ static struct platform_device msm_device_gfx2d0_2d0_ctx = {
},
};
+static struct platform_device msm_device_gfx2d1_2d1_ctx = {
+ .name = "msm_iommu_ctx",
+ .id = 20,
+ .dev = {
+ .parent = &msm_device_iommu_gfx2d1.dev,
+ },
+};
+
static struct platform_device *msm_iommu_devs[] = {
&msm_device_iommu_jpegd,
&msm_device_iommu_vpe,
@@ -710,6 +760,7 @@ static struct platform_device *msm_iommu_devs[] = {
&msm_device_iommu_vcodec_b,
&msm_device_iommu_gfx3d,
&msm_device_iommu_gfx2d0,
+ &msm_device_iommu_gfx2d1,
};
static struct msm_iommu_dev *msm_iommu_data[] = {
@@ -724,6 +775,7 @@ static struct msm_iommu_dev *msm_iommu_data[] = {
&vcodec_b_iommu,
&gfx3d_iommu,
&gfx2d0_iommu,
+ &gfx2d1_iommu,
};
static struct platform_device *msm_iommu_ctx_devs[] = {
@@ -747,6 +799,7 @@ static struct platform_device *msm_iommu_ctx_devs[] = {
&msm_device_gfx3d_user_ctx,
&msm_device_gfx3d_priv_ctx,
&msm_device_gfx2d0_2d0_ctx,
+ &msm_device_gfx2d1_2d1_ctx,
};
static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
@@ -770,6 +823,7 @@ static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
&gfx3d_user_ctx,
&gfx3d_priv_ctx,
&gfx2d0_2d0_ctx,
+ &gfx2d1_2d1_ctx,
};
static int msm8x60_iommu_init(void)
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Update the platform data for the 2D core's IOMMU based on
the revised usage model. Merge the two contexts and their
M2VCBMT mappings, adding the previously-missing mappings to
the newly-formed context.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/devices-msm8x60-iommu.c | 28 ++++++----------------------
1 files changed, 6 insertions(+), 22 deletions(-)
diff --git a/arch/arm/mach-msm/devices-msm8x60-iommu.c b/arch/arm/mach-msm/devices-msm8x60-iommu.c
index 22d0c7c..8cccb26 100644
--- a/arch/arm/mach-msm/devices-msm8x60-iommu.c
+++ b/arch/arm/mach-msm/devices-msm8x60-iommu.c
@@ -532,16 +532,10 @@ static struct msm_iommu_ctx_dev gfx3d_priv_ctx = {
31, -1}
};
-static struct msm_iommu_ctx_dev gfx2d0_pixv1_ctx = {
- .name = "gfx2d0_pixv1_smmu",
+static struct msm_iommu_ctx_dev gfx2d0_2d0_ctx = {
+ .name = "gfx2d0_2d0",
.num = 0,
- .mids = {0, 3, 4, -1}
-};
-
-static struct msm_iommu_ctx_dev gfx2d0_texv3_ctx = {
- .name = "gfx2d0_texv3_smmu",
- .num = 1,
- .mids = {1, 6, 7, -1}
+ .mids = {0, 1, 2, 3, 4, 5, 6, 7, -1}
};
static struct platform_device msm_device_jpegd_src_ctx = {
@@ -696,7 +690,7 @@ static struct platform_device msm_device_gfx3d_priv_ctx = {
},
};
-static struct platform_device msm_device_gfx2d0_pixv1_ctx = {
+static struct platform_device msm_device_gfx2d0_2d0_ctx = {
.name = "msm_iommu_ctx",
.id = 19,
.dev = {
@@ -704,14 +698,6 @@ static struct platform_device msm_device_gfx2d0_pixv1_ctx = {
},
};
-static struct platform_device msm_device_gfx2d0_texv3_ctx = {
- .name = "msm_iommu_ctx",
- .id = 20,
- .dev = {
- .parent = &msm_device_iommu_gfx2d0.dev,
- },
-};
-
static struct platform_device *msm_iommu_devs[] = {
&msm_device_iommu_jpegd,
&msm_device_iommu_vpe,
@@ -760,8 +746,7 @@ static struct platform_device *msm_iommu_ctx_devs[] = {
&msm_device_vcodec_b_mm2_ctx,
&msm_device_gfx3d_user_ctx,
&msm_device_gfx3d_priv_ctx,
- &msm_device_gfx2d0_pixv1_ctx,
- &msm_device_gfx2d0_texv3_ctx,
+ &msm_device_gfx2d0_2d0_ctx,
};
static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
@@ -784,8 +769,7 @@ static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
&vcodec_b_mm2_ctx,
&gfx3d_user_ctx,
&gfx3d_priv_ctx,
- &gfx2d0_pixv1_ctx,
- &gfx2d0_texv3_ctx,
+ &gfx2d0_2d0_ctx,
};
static int msm8x60_iommu_init(void)
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
An IOMMU device can only be attached to one IOMMU domain at
any given time. Check whether the device is already
attached to a domain before allowing it to be attached to
another domain. If so, return busy.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/iommu.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 74f2157..67e8f53 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -241,6 +241,11 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
goto fail;
}
+ if (!list_empty(&ctx_drvdata->attached_elm)) {
+ ret = -EBUSY;
+ goto fail;
+ }
+
list_for_each_entry(tmp_drvdata, &priv->list_attached, attached_elm)
if (tmp_drvdata == ctx_drvdata) {
ret = -EBUSY;
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Make the IOMMU driver select the IOMMU API in the kernel
configuration.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/Kconfig | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index 7781920..2f7ca4c 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -44,6 +44,7 @@ config ARCH_MSM8X60
select CPU_V7
select MSM_V2_TLMM
select MSM_GPIOMUX
+ select IOMMU_API
endchoice
@@ -177,4 +178,7 @@ config MSM_GPIOMUX
config MSM_V2_TLMM
bool
+
+config IOMMU_API
+ bool
endif
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add a Kconfig option to allow the IOMMU page tables to be
coherent in the L2 cache. This generally reduces TLB miss
latencies, but may lead to cache pollution if the
multimedia core's access pattern does not benefit from fast
handling of TLB misses.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/Kconfig | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index dbbcfeb..7781920 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -122,6 +122,21 @@ config MACH_MSM8X60_FFA
endmenu
+config IOMMU_PGTABLES_L2
+ depends on ARCH_MSM8X60
+ depends on MMU
+ depends on CPU_DCACHE_DISABLE=n
+ depends on SMP
+ bool "Cacheable IOMMU page tables"
+ default y
+ help
+ Allows the IOMMU page tables to be brought into the L2 cache. This
+ improves the TLB miss latency at the expense of potential pollution
+ of the L2 cache. This option has been shown to improve multimedia
+ performance in some cases.
+
+ If unsure, say Y here.
+
config MSM_DEBUG_UART
int
default 1 if MSM_DEBUG_UART1
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Remove some unneeded assignments and messages, restructure
a failure path in iova_to_phys, and make __flush_iotlb
return int in preparation for adding IOMMU clock control.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/iommu.c | 29 +++++++++++++++++------------
1 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 935025e..932728c 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -50,12 +50,12 @@ struct msm_priv {
struct list_head list_attached;
};
-static void __flush_iotlb(struct iommu_domain *domain)
+static int __flush_iotlb(struct iommu_domain *domain)
{
struct msm_priv *priv = domain->priv;
struct msm_iommu_drvdata *iommu_drvdata;
struct msm_iommu_ctx_drvdata *ctx_drvdata;
-
+ int ret = 0;
#ifndef CONFIG_IOMMU_PGTABLES_L2
unsigned long *fl_table = priv->pgtable;
int i;
@@ -79,6 +79,8 @@ static void __flush_iotlb(struct iommu_domain *domain)
iommu_drvdata = dev_get_drvdata(ctx_drvdata->pdev->dev.parent);
SET_CTX_TLBIALL(iommu_drvdata->base, ctx_drvdata->num, 0);
}
+
+ return ret;
}
static void __reset_context(void __iomem *base, int ctx)
@@ -267,7 +269,7 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
__pa(priv->pgtable));
list_add(&(ctx_drvdata->attached_elm), &priv->list_attached);
- __flush_iotlb(domain);
+ ret = __flush_iotlb(domain);
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
@@ -282,6 +284,7 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain,
struct msm_iommu_drvdata *iommu_drvdata;
struct msm_iommu_ctx_drvdata *ctx_drvdata;
unsigned long flags;
+ int ret;
spin_lock_irqsave(&msm_iommu_lock, flags);
priv = domain->priv;
@@ -296,7 +299,10 @@ static void msm_iommu_detach_dev(struct iommu_domain *domain,
if (!iommu_drvdata || !ctx_drvdata || !ctx_dev)
goto fail;
- __flush_iotlb(domain);
+ ret = __flush_iotlb(domain);
+ if (ret)
+ goto fail;
+
__reset_context(iommu_drvdata->base, ctx_dev->num);
list_del_init(&ctx_drvdata->attached_elm);
@@ -410,7 +416,7 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
}
- __flush_iotlb(domain);
+ ret = __flush_iotlb(domain);
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
return ret;
@@ -495,7 +501,7 @@ static int msm_iommu_unmap(struct iommu_domain *domain, unsigned long va,
}
}
- __flush_iotlb(domain);
+ ret = __flush_iotlb(domain);
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
return ret;
@@ -530,9 +536,6 @@ static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
SET_CTX_TLBIALL(base, ctx, 0);
SET_V2PPR_VA(base, ctx, va >> V2Pxx_VA_SHIFT);
- if (GET_FAULT(base, ctx))
- goto fail;
-
par = GET_PAR(base, ctx);
/* We are dealing with a supersection */
@@ -541,6 +544,9 @@ static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
else /* Upper 20 bits from PAR, lower 12 from VA */
ret = (par & 0xFFFFF000) | (va & 0x00000FFF);
+ if (GET_FAULT(base, ctx))
+ ret = 0;
+
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
return ret;
@@ -583,8 +589,8 @@ irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
{
struct msm_iommu_drvdata *drvdata = dev_id;
void __iomem *base;
- unsigned int fsr = 0;
- int ncb = 0, i = 0;
+ unsigned int fsr;
+ int ncb, i;
spin_lock(&msm_iommu_lock);
@@ -595,7 +601,6 @@ irqreturn_t msm_iommu_fault_handler(int irq, void *dev_id)
base = drvdata->base;
- pr_err("===== WOAH! =====\n");
pr_err("Unexpected IOMMU page fault!\n");
pr_err("base = %08x\n", (unsigned int) base);
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add support for allowing IOMMU memory transactions to be
cache coherent, eliminating the need for software cache
management in certain situations. This can lead to
improvements in performance and power usage, assuming the
multimedia core's access pattern exhibits spatial locality
and that its working set fits into the cache.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/iommu.c | 93 +++++++++++++++++++++++++++++++++++++++-----
1 files changed, 82 insertions(+), 11 deletions(-)
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 67e8f53..935025e 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -33,6 +33,16 @@
#include <mach/iommu_hw-8xxx.h>
#include <mach/iommu.h>
+#define MRC(reg, processor, op1, crn, crm, op2) \
+__asm__ __volatile__ ( \
+" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \
+: "=r" (reg))
+
+#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
+#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
+
+static int msm_iommu_tex_class[4];
+
DEFINE_SPINLOCK(msm_iommu_lock);
struct msm_priv {
@@ -98,6 +108,7 @@ static void __reset_context(void __iomem *base, int ctx)
static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
{
+ unsigned int prrr, nmrr;
__reset_context(base, ctx);
/* Set up HTW mode */
@@ -130,11 +141,11 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
/* Turn on TEX Remap */
SET_TRE(base, ctx, 1);
- /* Do not configure PRRR / NMRR on the IOMMU for now. We will assume
- * TEX class 0 for everything until attributes are properly worked out
- */
- SET_PRRR(base, ctx, 0);
- SET_NMRR(base, ctx, 0);
+ /* Set TEX remap attributes */
+ RCP15_PRRR(prrr);
+ RCP15_NMRR(nmrr);
+ SET_PRRR(base, ctx, prrr);
+ SET_NMRR(base, ctx, nmrr);
/* Turn on BFB prefetch */
SET_BFBDFE(base, ctx, 1);
@@ -304,12 +315,21 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
unsigned long *sl_table;
unsigned long *sl_pte;
unsigned long sl_offset;
+ unsigned int pgprot;
size_t len = 0x1000UL << order;
- int ret = 0;
+ int ret = 0, tex, sh;
spin_lock_irqsave(&msm_iommu_lock, flags);
- priv = domain->priv;
+ sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
+ tex = msm_iommu_tex_class[prot & 0x03];
+
+ if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ priv = domain->priv;
if (!priv) {
ret = -EINVAL;
goto fail;
@@ -330,6 +350,18 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
goto fail;
}
+ if (len == SZ_16M || len == SZ_1M) {
+ pgprot = sh ? FL_SHARED : 0;
+ pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
+ pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
+ pgprot |= tex & 0x04 ? FL_TEX0 : 0;
+ } else {
+ pgprot = sh ? SL_SHARED : 0;
+ pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
+ pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
+ pgprot |= tex & 0x04 ? SL_TEX0 : 0;
+ }
+
fl_offset = FL_OFFSET(va); /* Upper 12 bits */
fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
@@ -338,12 +370,12 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
for (i = 0; i < 16; i++)
*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
- FL_SHARED;
+ FL_SHARED | pgprot;
}
if (len == SZ_1M)
*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE |
- FL_TYPE_SECT | FL_SHARED;
+ FL_TYPE_SECT | FL_SHARED | pgprot;
/* Need a 2nd level table */
if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
@@ -368,14 +400,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
if (len == SZ_4K)
*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 |
- SL_SHARED | SL_TYPE_SMALL;
+ SL_SHARED | SL_TYPE_SMALL | pgprot;
if (len == SZ_64K) {
int i;
for (i = 0; i < 16; i++)
*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
- SL_AP1 | SL_SHARED | SL_TYPE_LARGE;
+ SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
}
__flush_iotlb(domain);
@@ -593,8 +625,47 @@ static struct iommu_ops msm_iommu_ops = {
.domain_has_cap = msm_iommu_domain_has_cap
};
+static int __init get_tex_class(int icp, int ocp, int mt, int nos)
+{
+ int i = 0;
+ unsigned int prrr = 0;
+ unsigned int nmrr = 0;
+ int c_icp, c_ocp, c_mt, c_nos;
+
+ RCP15_PRRR(prrr);
+ RCP15_NMRR(nmrr);
+
+ for (i = 0; i < NUM_TEX_CLASS; i++) {
+ c_nos = PRRR_NOS(prrr, i);
+ c_mt = PRRR_MT(prrr, i);
+ c_icp = NMRR_ICP(nmrr, i);
+ c_ocp = NMRR_OCP(nmrr, i);
+
+ if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
+ return i;
+ }
+
+ return -ENODEV;
+}
+
+static void __init setup_iommu_tex_classes(void)
+{
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
+ get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
+ get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
+ get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
+ get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
+}
+
static int __init msm_iommu_init(void)
{
+ setup_iommu_tex_classes();
register_iommu(&msm_iommu_ops);
return 0;
}
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add register addresses and IRQ numbers for the IOMMU used
for the second 2D graphics core.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/include/mach/irqs-8x60.h | 7 ++++++-
arch/arm/mach-msm/include/mach/msm_iomap-8x60.h | 3 +++
2 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/arch/arm/mach-msm/include/mach/irqs-8x60.h b/arch/arm/mach-msm/include/mach/irqs-8x60.h
index 36074cf..f65841c 100644
--- a/arch/arm/mach-msm/include/mach/irqs-8x60.h
+++ b/arch/arm/mach-msm/include/mach/irqs-8x60.h
@@ -237,7 +237,12 @@
#define GSBI11_QUP_IRQ (GIC_SPI_START + 194)
#define INT_UART12DM_IRQ (GIC_SPI_START + 195)
#define GSBI12_QUP_IRQ (GIC_SPI_START + 196)
-/*SPI 197 to 216 arent used in 8x60*/
+
+/*SPI 197 to 209 arent used in 8x60*/
+#define SMMU_GFX2D1_CB_SC_SECURE_IRQ (GIC_SPI_START + 210)
+#define SMMU_GFX2D1_CB_SC_NON_SECURE_IRQ (GIC_SPI_START + 211)
+
+/*SPI 212 to 216 arent used in 8x60*/
#define SMPSS_SPARE_1 (GIC_SPI_START + 217)
#define SMPSS_SPARE_2 (GIC_SPI_START + 218)
#define SMPSS_SPARE_3 (GIC_SPI_START + 219)
diff --git a/arch/arm/mach-msm/include/mach/msm_iomap-8x60.h b/arch/arm/mach-msm/include/mach/msm_iomap-8x60.h
index 45bab50..7c43a9b 100644
--- a/arch/arm/mach-msm/include/mach/msm_iomap-8x60.h
+++ b/arch/arm/mach-msm/include/mach/msm_iomap-8x60.h
@@ -98,4 +98,7 @@
#define MSM_IOMMU_GFX2D0_PHYS 0x07D00000
#define MSM_IOMMU_GFX2D0_SIZE SZ_1M
+#define MSM_IOMMU_GFX2D1_PHYS 0x07E00000
+#define MSM_IOMMU_GFX2D1_SIZE SZ_1M
+
#endif
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Rename all the IOMMU platform devices so that the names are
more consistent with the rest of the codebase.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/devices-msm8x60-iommu.c | 136 ++++++++++++++--------------
1 files changed, 68 insertions(+), 68 deletions(-)
diff --git a/arch/arm/mach-msm/devices-msm8x60-iommu.c b/arch/arm/mach-msm/devices-msm8x60-iommu.c
index 89b9d44..9e657e0 100644
--- a/arch/arm/mach-msm/devices-msm8x60-iommu.c
+++ b/arch/arm/mach-msm/devices-msm8x60-iommu.c
@@ -259,55 +259,55 @@ static struct platform_device msm_root_iommu_dev = {
.id = -1,
};
-static struct msm_iommu_dev jpegd_smmu = {
+static struct msm_iommu_dev jpegd_iommu = {
.name = "jpegd",
.clk_rate = -1
};
-static struct msm_iommu_dev vpe_smmu = {
+static struct msm_iommu_dev vpe_iommu = {
.name = "vpe"
};
-static struct msm_iommu_dev mdp0_smmu = {
+static struct msm_iommu_dev mdp0_iommu = {
.name = "mdp0"
};
-static struct msm_iommu_dev mdp1_smmu = {
+static struct msm_iommu_dev mdp1_iommu = {
.name = "mdp1"
};
-static struct msm_iommu_dev rot_smmu = {
+static struct msm_iommu_dev rot_iommu = {
.name = "rot"
};
-static struct msm_iommu_dev ijpeg_smmu = {
+static struct msm_iommu_dev ijpeg_iommu = {
.name = "ijpeg"
};
-static struct msm_iommu_dev vfe_smmu = {
+static struct msm_iommu_dev vfe_iommu = {
.name = "vfe",
.clk_rate = -1
};
-static struct msm_iommu_dev vcodec_a_smmu = {
+static struct msm_iommu_dev vcodec_a_iommu = {
.name = "vcodec_a"
};
-static struct msm_iommu_dev vcodec_b_smmu = {
+static struct msm_iommu_dev vcodec_b_iommu = {
.name = "vcodec_b"
};
-static struct msm_iommu_dev gfx3d_smmu = {
+static struct msm_iommu_dev gfx3d_iommu = {
.name = "gfx3d",
.clk_rate = 27000000
};
-static struct msm_iommu_dev gfx2d0_smmu = {
+static struct msm_iommu_dev gfx2d0_iommu = {
.name = "gfx2d0",
.clk_rate = 27000000
};
-static struct platform_device msm_device_smmu_jpegd = {
+static struct platform_device msm_device_iommu_jpegd = {
.name = "msm_iommu",
.id = 0,
.dev = {
@@ -317,7 +317,7 @@ static struct platform_device msm_device_smmu_jpegd = {
.resource = msm_iommu_jpegd_resources,
};
-static struct platform_device msm_device_smmu_vpe = {
+static struct platform_device msm_device_iommu_vpe = {
.name = "msm_iommu",
.id = 1,
.dev = {
@@ -327,7 +327,7 @@ static struct platform_device msm_device_smmu_vpe = {
.resource = msm_iommu_vpe_resources,
};
-static struct platform_device msm_device_smmu_mdp0 = {
+static struct platform_device msm_device_iommu_mdp0 = {
.name = "msm_iommu",
.id = 2,
.dev = {
@@ -337,7 +337,7 @@ static struct platform_device msm_device_smmu_mdp0 = {
.resource = msm_iommu_mdp0_resources,
};
-static struct platform_device msm_device_smmu_mdp1 = {
+static struct platform_device msm_device_iommu_mdp1 = {
.name = "msm_iommu",
.id = 3,
.dev = {
@@ -347,7 +347,7 @@ static struct platform_device msm_device_smmu_mdp1 = {
.resource = msm_iommu_mdp1_resources,
};
-static struct platform_device msm_device_smmu_rot = {
+static struct platform_device msm_device_iommu_rot = {
.name = "msm_iommu",
.id = 4,
.dev = {
@@ -357,7 +357,7 @@ static struct platform_device msm_device_smmu_rot = {
.resource = msm_iommu_rot_resources,
};
-static struct platform_device msm_device_smmu_ijpeg = {
+static struct platform_device msm_device_iommu_ijpeg = {
.name = "msm_iommu",
.id = 5,
.dev = {
@@ -367,7 +367,7 @@ static struct platform_device msm_device_smmu_ijpeg = {
.resource = msm_iommu_ijpeg_resources,
};
-static struct platform_device msm_device_smmu_vfe = {
+static struct platform_device msm_device_iommu_vfe = {
.name = "msm_iommu",
.id = 6,
.dev = {
@@ -377,7 +377,7 @@ static struct platform_device msm_device_smmu_vfe = {
.resource = msm_iommu_vfe_resources,
};
-static struct platform_device msm_device_smmu_vcodec_a = {
+static struct platform_device msm_device_iommu_vcodec_a = {
.name = "msm_iommu",
.id = 7,
.dev = {
@@ -387,7 +387,7 @@ static struct platform_device msm_device_smmu_vcodec_a = {
.resource = msm_iommu_vcodec_a_resources,
};
-static struct platform_device msm_device_smmu_vcodec_b = {
+static struct platform_device msm_device_iommu_vcodec_b = {
.name = "msm_iommu",
.id = 8,
.dev = {
@@ -397,7 +397,7 @@ static struct platform_device msm_device_smmu_vcodec_b = {
.resource = msm_iommu_vcodec_b_resources,
};
-static struct platform_device msm_device_smmu_gfx3d = {
+static struct platform_device msm_device_iommu_gfx3d = {
.name = "msm_iommu",
.id = 9,
.dev = {
@@ -407,7 +407,7 @@ static struct platform_device msm_device_smmu_gfx3d = {
.resource = msm_iommu_gfx3d_resources,
};
-static struct platform_device msm_device_smmu_gfx2d0 = {
+static struct platform_device msm_device_iommu_gfx2d0 = {
.name = "msm_iommu",
.id = 10,
.dev = {
@@ -553,7 +553,7 @@ static struct platform_device msm_device_jpegd_src_ctx = {
.name = "msm_iommu_ctx",
.id = 0,
.dev = {
- .parent = &msm_device_smmu_jpegd.dev,
+ .parent = &msm_device_iommu_jpegd.dev,
},
};
@@ -561,7 +561,7 @@ static struct platform_device msm_device_jpegd_dst_ctx = {
.name = "msm_iommu_ctx",
.id = 1,
.dev = {
- .parent = &msm_device_smmu_jpegd.dev,
+ .parent = &msm_device_iommu_jpegd.dev,
},
};
@@ -569,7 +569,7 @@ static struct platform_device msm_device_vpe_src_ctx = {
.name = "msm_iommu_ctx",
.id = 2,
.dev = {
- .parent = &msm_device_smmu_vpe.dev,
+ .parent = &msm_device_iommu_vpe.dev,
},
};
@@ -577,7 +577,7 @@ static struct platform_device msm_device_vpe_dst_ctx = {
.name = "msm_iommu_ctx",
.id = 3,
.dev = {
- .parent = &msm_device_smmu_vpe.dev,
+ .parent = &msm_device_iommu_vpe.dev,
},
};
@@ -585,7 +585,7 @@ static struct platform_device msm_device_mdp_vg1_ctx = {
.name = "msm_iommu_ctx",
.id = 4,
.dev = {
- .parent = &msm_device_smmu_mdp0.dev,
+ .parent = &msm_device_iommu_mdp0.dev,
},
};
@@ -593,7 +593,7 @@ static struct platform_device msm_device_mdp_rgb1_ctx = {
.name = "msm_iommu_ctx",
.id = 5,
.dev = {
- .parent = &msm_device_smmu_mdp0.dev,
+ .parent = &msm_device_iommu_mdp0.dev,
},
};
@@ -601,7 +601,7 @@ static struct platform_device msm_device_mdp_vg2_ctx = {
.name = "msm_iommu_ctx",
.id = 6,
.dev = {
- .parent = &msm_device_smmu_mdp1.dev,
+ .parent = &msm_device_iommu_mdp1.dev,
},
};
@@ -609,7 +609,7 @@ static struct platform_device msm_device_mdp_rgb2_ctx = {
.name = "msm_iommu_ctx",
.id = 7,
.dev = {
- .parent = &msm_device_smmu_mdp1.dev,
+ .parent = &msm_device_iommu_mdp1.dev,
},
};
@@ -617,7 +617,7 @@ static struct platform_device msm_device_rot_src_ctx = {
.name = "msm_iommu_ctx",
.id = 8,
.dev = {
- .parent = &msm_device_smmu_rot.dev,
+ .parent = &msm_device_iommu_rot.dev,
},
};
@@ -625,7 +625,7 @@ static struct platform_device msm_device_rot_dst_ctx = {
.name = "msm_iommu_ctx",
.id = 9,
.dev = {
- .parent = &msm_device_smmu_rot.dev,
+ .parent = &msm_device_iommu_rot.dev,
},
};
@@ -633,7 +633,7 @@ static struct platform_device msm_device_ijpeg_src_ctx = {
.name = "msm_iommu_ctx",
.id = 10,
.dev = {
- .parent = &msm_device_smmu_ijpeg.dev,
+ .parent = &msm_device_iommu_ijpeg.dev,
},
};
@@ -641,7 +641,7 @@ static struct platform_device msm_device_ijpeg_dst_ctx = {
.name = "msm_iommu_ctx",
.id = 11,
.dev = {
- .parent = &msm_device_smmu_ijpeg.dev,
+ .parent = &msm_device_iommu_ijpeg.dev,
},
};
@@ -649,7 +649,7 @@ static struct platform_device msm_device_vfe_imgwr_ctx = {
.name = "msm_iommu_ctx",
.id = 12,
.dev = {
- .parent = &msm_device_smmu_vfe.dev,
+ .parent = &msm_device_iommu_vfe.dev,
},
};
@@ -657,7 +657,7 @@ static struct platform_device msm_device_vfe_misc_ctx = {
.name = "msm_iommu_ctx",
.id = 13,
.dev = {
- .parent = &msm_device_smmu_vfe.dev,
+ .parent = &msm_device_iommu_vfe.dev,
},
};
@@ -665,7 +665,7 @@ static struct platform_device msm_device_vcodec_a_stream_ctx = {
.name = "msm_iommu_ctx",
.id = 14,
.dev = {
- .parent = &msm_device_smmu_vcodec_a.dev,
+ .parent = &msm_device_iommu_vcodec_a.dev,
},
};
@@ -673,7 +673,7 @@ static struct platform_device msm_device_vcodec_a_mm1_ctx = {
.name = "msm_iommu_ctx",
.id = 15,
.dev = {
- .parent = &msm_device_smmu_vcodec_a.dev,
+ .parent = &msm_device_iommu_vcodec_a.dev,
},
};
@@ -681,7 +681,7 @@ static struct platform_device msm_device_vcodec_b_mm2_ctx = {
.name = "msm_iommu_ctx",
.id = 16,
.dev = {
- .parent = &msm_device_smmu_vcodec_b.dev,
+ .parent = &msm_device_iommu_vcodec_b.dev,
},
};
@@ -689,7 +689,7 @@ static struct platform_device msm_device_gfx3d_rbpa_ctx = {
.name = "msm_iommu_ctx",
.id = 17,
.dev = {
- .parent = &msm_device_smmu_gfx3d.dev,
+ .parent = &msm_device_iommu_gfx3d.dev,
},
};
@@ -697,7 +697,7 @@ static struct platform_device msm_device_gfx3d_cpvgttc_ctx = {
.name = "msm_iommu_ctx",
.id = 18,
.dev = {
- .parent = &msm_device_smmu_gfx3d.dev,
+ .parent = &msm_device_iommu_gfx3d.dev,
},
};
@@ -705,7 +705,7 @@ static struct platform_device msm_device_gfx3d_smmu_ctx = {
.name = "msm_iommu_ctx",
.id = 19,
.dev = {
- .parent = &msm_device_smmu_gfx3d.dev,
+ .parent = &msm_device_iommu_gfx3d.dev,
},
};
@@ -713,7 +713,7 @@ static struct platform_device msm_device_gfx2d0_pixv1_ctx = {
.name = "msm_iommu_ctx",
.id = 20,
.dev = {
- .parent = &msm_device_smmu_gfx2d0.dev,
+ .parent = &msm_device_iommu_gfx2d0.dev,
},
};
@@ -721,36 +721,36 @@ static struct platform_device msm_device_gfx2d0_texv3_ctx = {
.name = "msm_iommu_ctx",
.id = 21,
.dev = {
- .parent = &msm_device_smmu_gfx2d0.dev,
+ .parent = &msm_device_iommu_gfx2d0.dev,
},
};
static struct platform_device *msm_iommu_devs[] = {
- &msm_device_smmu_jpegd,
- &msm_device_smmu_vpe,
- &msm_device_smmu_mdp0,
- &msm_device_smmu_mdp1,
- &msm_device_smmu_rot,
- &msm_device_smmu_ijpeg,
- &msm_device_smmu_vfe,
- &msm_device_smmu_vcodec_a,
- &msm_device_smmu_vcodec_b,
- &msm_device_smmu_gfx3d,
- &msm_device_smmu_gfx2d0,
+ &msm_device_iommu_jpegd,
+ &msm_device_iommu_vpe,
+ &msm_device_iommu_mdp0,
+ &msm_device_iommu_mdp1,
+ &msm_device_iommu_rot,
+ &msm_device_iommu_ijpeg,
+ &msm_device_iommu_vfe,
+ &msm_device_iommu_vcodec_a,
+ &msm_device_iommu_vcodec_b,
+ &msm_device_iommu_gfx3d,
+ &msm_device_iommu_gfx2d0,
};
static struct msm_iommu_dev *msm_iommu_data[] = {
- &jpegd_smmu,
- &vpe_smmu,
- &mdp0_smmu,
- &mdp1_smmu,
- &rot_smmu,
- &ijpeg_smmu,
- &vfe_smmu,
- &vcodec_a_smmu,
- &vcodec_b_smmu,
- &gfx3d_smmu,
- &gfx2d0_smmu,
+ &jpegd_iommu,
+ &vpe_iommu,
+ &mdp0_iommu,
+ &mdp1_iommu,
+ &rot_iommu,
+ &ijpeg_iommu,
+ &vfe_iommu,
+ &vcodec_a_iommu,
+ &vcodec_b_iommu,
+ &gfx3d_iommu,
+ &gfx2d0_iommu,
};
static struct platform_device *msm_iommu_ctx_devs[] = {
@@ -826,7 +826,7 @@ static int msm8x60_iommu_init(void)
ret = platform_device_register(msm_iommu_devs[i]);
if (ret != 0) {
- pr_err("platform_device_register smmu failed, "
+ pr_err("platform_device_register iommu failed, "
"i = %d\n", i);
goto failure_unwind;
}
@@ -837,7 +837,7 @@ static int msm8x60_iommu_init(void)
msm_iommu_ctx_data[i],
sizeof(*msm_iommu_ctx_devs[i]));
if (ret != 0) {
- pr_err("platform_device_add_data smmu failed, "
+ pr_err("platform_device_add_data iommu failed, "
"i = %d\n", i);
goto failure_unwind2;
}
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add the register field definitions and memory attribute
definitions that will be needed to support IOMMU
transactions with cache-coherent memory access.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/include/mach/iommu.h | 11 +++++++++++
arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h | 22 +++++++++++++++++++++-
2 files changed, 32 insertions(+), 1 deletions(-)
diff --git a/arch/arm/mach-msm/include/mach/iommu.h b/arch/arm/mach-msm/include/mach/iommu.h
index 17fc79f..bfc80c8 100644
--- a/arch/arm/mach-msm/include/mach/iommu.h
+++ b/arch/arm/mach-msm/include/mach/iommu.h
@@ -20,6 +20,17 @@
#include <linux/interrupt.h>
+/* Sharability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NON_SH 0x0
+#define MSM_IOMMU_ATTR_SH 0x4
+
+/* Cacheability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NONCACHED 0x0
+#define MSM_IOMMU_ATTR_CACHED_WB_WA 0x1
+#define MSM_IOMMU_ATTR_CACHED_WB_NWA 0x2
+#define MSM_IOMMU_ATTR_CACHED_WT 0x3
+
+
/* Maximum number of Machine IDs that we are allowing to be mapped to the same
* context bank. The number of MIDs mapped to the same CB does not affect
* performance, but there is a practical limit on how many distinct MIDs may
diff --git a/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h b/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
index f9386d3..c2c3da9 100644
--- a/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
+++ b/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
@@ -54,6 +54,7 @@ do { \
#define NUM_FL_PTE 4096
#define NUM_SL_PTE 256
+#define NUM_TEX_CLASS 8
/* First-level page table bits */
#define FL_BASE_MASK 0xFFFFFC00
@@ -63,6 +64,9 @@ do { \
#define FL_AP_WRITE (1 << 10)
#define FL_AP_READ (1 << 11)
#define FL_SHARED (1 << 16)
+#define FL_BUFFERABLE (1 << 2)
+#define FL_CACHEABLE (1 << 3)
+#define FL_TEX0 (1 << 12)
#define FL_OFFSET(va) (((va) & 0xFFF00000) >> 20)
/* Second-level page table bits */
@@ -73,8 +77,20 @@ do { \
#define SL_AP0 (1 << 4)
#define SL_AP1 (2 << 4)
#define SL_SHARED (1 << 10)
+#define SL_BUFFERABLE (1 << 2)
+#define SL_CACHEABLE (1 << 3)
+#define SL_TEX0 (1 << 6)
#define SL_OFFSET(va) (((va) & 0xFF000) >> 12)
+/* Memory type and cache policy attributes */
+#define MT_SO 0
+#define MT_DEV 1
+#define MT_NORMAL 2
+#define CP_NONCACHED 0
+#define CP_WB_WA 1
+#define CP_WT 2
+#define CP_WB_NWA 3
+
/* Global register setters / getters */
#define SET_M2VCBR_N(b, N, v) SET_GLOBAL_REG_N(M2VCBR_N, N, (b), (v))
#define SET_CBACR_N(b, N, v) SET_GLOBAL_REG_N(CBACR_N, N, (b), (v))
@@ -706,7 +722,9 @@ do { \
#define GET_OCPC5(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC5)
#define GET_OCPC6(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC6)
#define GET_OCPC7(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC7)
-
+#define NMRR_ICP(nmrr, n) (((nmrr) & (3 << ((n) * 2))) >> ((n) * 2))
+#define NMRR_OCP(nmrr, n) (((nmrr) & (3 << ((n) * 2 + 16))) >> \
+ ((n) * 2 + 16))
/* PAR */
#define GET_FAULT(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT)
@@ -750,6 +768,8 @@ do { \
#define GET_NOS5(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS5)
#define GET_NOS6(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS6)
#define GET_NOS7(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS7)
+#define PRRR_NOS(prrr, n) ((prrr) & (1 << ((n) + 24)) ? 1 : 0)
+#define PRRR_MT(prrr, n) ((((prrr) & (3 << ((n) * 2))) >> ((n) * 2)))
/* RESUME */
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Don't flush the page tables on an IOMMU domain if there are
no IOMMU devices attached to the domain. The act of
attaching to the domain will cause an implicit flush of
those areas if the page tables are configured to not be L2
cacheable.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/iommu.c | 15 +++++++++------
1 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 134add7..74f2157 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -50,13 +50,16 @@ static void __flush_iotlb(struct iommu_domain *domain)
unsigned long *fl_table = priv->pgtable;
int i;
- dmac_flush_range(fl_table, fl_table + SZ_16K);
+ if (!list_empty(&priv->list_attached)) {
+ dmac_flush_range(fl_table, fl_table + SZ_16K);
- for (i = 0; i < NUM_FL_PTE; i++)
- if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
- void *sl_table = __va(fl_table[i] & FL_BASE_MASK);
- dmac_flush_range(sl_table, sl_table + SZ_4K);
- }
+ for (i = 0; i < NUM_FL_PTE; i++)
+ if ((fl_table[i] & 0x03) == FL_TYPE_TABLE) {
+ void *sl_table = __va(fl_table[i] &
+ FL_BASE_MASK);
+ dmac_flush_range(sl_table, sl_table + SZ_4K);
+ }
+ }
#endif
list_for_each_entry(ctx_drvdata, &priv->list_attached, attached_elm) {
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Update the platform data for the 3D core's IOMMU based on
the revised usage model. Remove unused contexts and rename
the remaining contexts based on their new function. Add the
new M2VCBMT mappings for the updated contexts.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/devices-msm8x60-iommu.c | 45 +++++++++-------------------
1 files changed, 15 insertions(+), 30 deletions(-)
diff --git a/arch/arm/mach-msm/devices-msm8x60-iommu.c b/arch/arm/mach-msm/devices-msm8x60-iommu.c
index 9e657e0..22d0c7c 100644
--- a/arch/arm/mach-msm/devices-msm8x60-iommu.c
+++ b/arch/arm/mach-msm/devices-msm8x60-iommu.c
@@ -519,22 +519,17 @@ static struct msm_iommu_ctx_dev vcodec_b_mm2_ctx = {
.mids = {0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1}
};
-static struct msm_iommu_ctx_dev gfx3d_rbpa_ctx = {
- .name = "gfx3d_rbpa",
+static struct msm_iommu_ctx_dev gfx3d_user_ctx = {
+ .name = "gfx3d_user",
.num = 0,
- .mids = {-1}
+ .mids = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1}
};
-static struct msm_iommu_ctx_dev gfx3d_cpvgttc_ctx = {
- .name = "gfx3d_cpvgttc",
+static struct msm_iommu_ctx_dev gfx3d_priv_ctx = {
+ .name = "gfx3d_priv",
.num = 1,
- .mids = {0, 1, 2, 3, 4, 5, 6, 7, -1}
-};
-
-static struct msm_iommu_ctx_dev gfx3d_smmu_ctx = {
- .name = "gfx3d_smmu",
- .num = 2,
- .mids = {8, 9, 10, 11, 12, -1}
+ .mids = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, -1}
};
static struct msm_iommu_ctx_dev gfx2d0_pixv1_ctx = {
@@ -685,7 +680,7 @@ static struct platform_device msm_device_vcodec_b_mm2_ctx = {
},
};
-static struct platform_device msm_device_gfx3d_rbpa_ctx = {
+static struct platform_device msm_device_gfx3d_user_ctx = {
.name = "msm_iommu_ctx",
.id = 17,
.dev = {
@@ -693,7 +688,7 @@ static struct platform_device msm_device_gfx3d_rbpa_ctx = {
},
};
-static struct platform_device msm_device_gfx3d_cpvgttc_ctx = {
+static struct platform_device msm_device_gfx3d_priv_ctx = {
.name = "msm_iommu_ctx",
.id = 18,
.dev = {
@@ -701,17 +696,9 @@ static struct platform_device msm_device_gfx3d_cpvgttc_ctx = {
},
};
-static struct platform_device msm_device_gfx3d_smmu_ctx = {
- .name = "msm_iommu_ctx",
- .id = 19,
- .dev = {
- .parent = &msm_device_iommu_gfx3d.dev,
- },
-};
-
static struct platform_device msm_device_gfx2d0_pixv1_ctx = {
.name = "msm_iommu_ctx",
- .id = 20,
+ .id = 19,
.dev = {
.parent = &msm_device_iommu_gfx2d0.dev,
},
@@ -719,7 +706,7 @@ static struct platform_device msm_device_gfx2d0_pixv1_ctx = {
static struct platform_device msm_device_gfx2d0_texv3_ctx = {
.name = "msm_iommu_ctx",
- .id = 21,
+ .id = 20,
.dev = {
.parent = &msm_device_iommu_gfx2d0.dev,
},
@@ -771,9 +758,8 @@ static struct platform_device *msm_iommu_ctx_devs[] = {
&msm_device_vcodec_a_stream_ctx,
&msm_device_vcodec_a_mm1_ctx,
&msm_device_vcodec_b_mm2_ctx,
- &msm_device_gfx3d_rbpa_ctx,
- &msm_device_gfx3d_cpvgttc_ctx,
- &msm_device_gfx3d_smmu_ctx,
+ &msm_device_gfx3d_user_ctx,
+ &msm_device_gfx3d_priv_ctx,
&msm_device_gfx2d0_pixv1_ctx,
&msm_device_gfx2d0_texv3_ctx,
};
@@ -796,9 +782,8 @@ static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
&vcodec_a_stream_ctx,
&vcodec_a_mm1_ctx,
&vcodec_b_mm2_ctx,
- &gfx3d_rbpa_ctx,
- &gfx3d_cpvgttc_ctx,
- &gfx3d_smmu_ctx,
+ &gfx3d_user_ctx,
+ &gfx3d_priv_ctx,
&gfx2d0_pixv1_ctx,
&gfx2d0_texv3_ctx,
};
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On msm8x60, the MID field on the AXI connection to the
IOMMU can be up to five bits wide. Thus, allow the IOMMU
context platform data to map up to 32 MIDs.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/include/mach/iommu.h | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/arm/mach-msm/include/mach/iommu.h b/arch/arm/mach-msm/include/mach/iommu.h
index 218ef57..17fc79f 100644
--- a/arch/arm/mach-msm/include/mach/iommu.h
+++ b/arch/arm/mach-msm/include/mach/iommu.h
@@ -26,7 +26,7 @@
* be present. These mappings are typically determined at design time and are
* not expected to change at run time.
*/
-#define MAX_NUM_MIDS 16
+#define MAX_NUM_MIDS 32
/**
* struct msm_iommu_dev - a single IOMMU hardware instance
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Mark the init and exit functions as __init and __exit where
appropriate.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/devices-msm8x60-iommu.c | 4 ++--
arch/arm/mach-msm/iommu.c | 2 +-
arch/arm/mach-msm/iommu_dev.c | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/arm/mach-msm/devices-msm8x60-iommu.c b/arch/arm/mach-msm/devices-msm8x60-iommu.c
index a6ecd39..f9e7bd3 100644
--- a/arch/arm/mach-msm/devices-msm8x60-iommu.c
+++ b/arch/arm/mach-msm/devices-msm8x60-iommu.c
@@ -826,7 +826,7 @@ static struct msm_iommu_ctx_dev *msm_iommu_ctx_data[] = {
&gfx2d1_2d1_ctx,
};
-static int msm8x60_iommu_init(void)
+static int __init msm8x60_iommu_init(void)
{
int ret, i;
@@ -886,7 +886,7 @@ failure:
return ret;
}
-static void msm8x60_iommu_exit(void)
+static void __exit msm8x60_iommu_exit(void)
{
int i;
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index f71747d..134add7 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -585,7 +585,7 @@ static struct iommu_ops msm_iommu_ops = {
.domain_has_cap = msm_iommu_domain_has_cap
};
-static int msm_iommu_init(void)
+static int __init msm_iommu_init(void)
{
register_iommu(&msm_iommu_ops);
return 0;
diff --git a/arch/arm/mach-msm/iommu_dev.c b/arch/arm/mach-msm/iommu_dev.c
index 9019cee..b83c73b 100644
--- a/arch/arm/mach-msm/iommu_dev.c
+++ b/arch/arm/mach-msm/iommu_dev.c
@@ -346,7 +346,7 @@ static struct platform_driver msm_iommu_ctx_driver = {
.remove = msm_iommu_ctx_remove,
};
-static int msm_iommu_driver_init(void)
+static int __init msm_iommu_driver_init(void)
{
int ret;
ret = platform_driver_register(&msm_iommu_driver);
@@ -365,7 +365,7 @@ error:
return ret;
}
-static void msm_iommu_driver_exit(void)
+static void __exit msm_iommu_driver_exit(void)
{
platform_driver_unregister(&msm_iommu_ctx_driver);
platform_driver_unregister(&msm_iommu_driver);
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On Fri, 2010-11-12 at 19:29 -0800, Stepan Moskovchenko wrote:
>
> +config IOMMU_PGTABLES_L2
> + depends on ARCH_MSM8X60
> + depends on MMU
> + depends on CPU_DCACHE_DISABLE=n
> + depends on SMP
> + bool "Cacheable IOMMU page tables"
> + default y
> + help
> + Allows the IOMMU page tables to be brought into the L2 cache. This
> + improves the TLB miss latency at the expense of potential pollution
> + of the L2 cache. This option has been shown to improve multimedia
> + performance in some cases.
> +
> + If unsure, say Y here.
Why would someone want this off?
The other thing is that you usually want this included with the code
that uses the option.
Daniel
--
Sent by an consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora
Forum.
> On Fri, 2010-11-12 at 19:29 -0800, Stepan Moskovchenko wrote:
>>
>> +config IOMMU_PGTABLES_L2
>> + depends on ARCH_MSM8X60
>> + depends on MMU
>> + depends on CPU_DCACHE_DISABLE=n
>> + depends on SMP
>> + bool "Cacheable IOMMU page tables"
>> + default y
>> + help
>> + Allows the IOMMU page tables to be brought into the L2 cache. This
>> + improves the TLB miss latency at the expense of potential pollution
>> + of the L2 cache. This option has been shown to improve multimedia
>> + performance in some cases.
>> +
>> + If unsure, say Y here.
>
> Why would someone want this off?
>
> The other thing is that you usually want this included with the code
> that uses the option.
The code that uses it had gone in during a previous patch series, but I
didn't want to meddle in the Kconfig just yet, especially since the option
only improves performance and does not add new functionality at a high
level. This patch should be the last of what is needed for this feature.
You would want to turn this off if you wanted more deterministic behavior
from the multimedia subsystem, such as when trying to run benchmarks for
the worst-case behavior in terms of memory latency and TLB misses. You
might also want to turn it off if you are debugging memory problems that
you suspect might be related to the cache maintenance code, in which case
turning this off would give an idea as to whether that is the problem.
Similarly, this needs to be off (and will be, due to the dependencies) if
certain required things (like the Dcache, MMU, etc) have been disabled.
Finally, if you are doing things with the MMSS that you know will not
result in many TLB misses, (or if you know that you can tolerate high miss
latency) you may as well turn this off to avoid the (small) bit of cache
pollution.
Steve
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On Sun, 2010-11-14 at 18:56 -0800, Stepan Moskovchenko wrote:
> > On Fri, 2010-11-12 at 19:29 -0800, Stepan Moskovchenko wrote:
> >>
> >> +config IOMMU_PGTABLES_L2
> >> + depends on ARCH_MSM8X60
> >> + depends on MMU
> >> + depends on CPU_DCACHE_DISABLE=n
> >> + depends on SMP
> >> + bool "Cacheable IOMMU page tables"
> >> + default y
> >> + help
> >> + Allows the IOMMU page tables to be brought into the L2 cache. This
> >> + improves the TLB miss latency at the expense of potential pollution
> >> + of the L2 cache. This option has been shown to improve multimedia
> >> + performance in some cases.
> >> +
> >> + If unsure, say Y here.
> >
> > Why would someone want this off?
> >
> > The other thing is that you usually want this included with the code
> > that uses the option.
>
> The code that uses it had gone in during a previous patch series, but I
> didn't want to meddle in the Kconfig just yet, especially since the option
> only improves performance and does not add new functionality at a high
> level. This patch should be the last of what is needed for this feature.
>
> You would want to turn this off if you wanted more deterministic behavior
> from the multimedia subsystem, such as when trying to run benchmarks for
> the worst-case behavior in terms of memory latency and TLB misses. You
> might also want to turn it off if you are debugging memory problems that
> you suspect might be related to the cache maintenance code, in which case
> turning this off would give an idea as to whether that is the problem.
> Similarly, this needs to be off (and will be, due to the dependencies) if
> certain required things (like the Dcache, MMU, etc) have been disabled.
> Finally, if you are doing things with the MMSS that you know will not
> result in many TLB misses, (or if you know that you can tolerate high miss
> latency) you may as well turn this off to avoid the (small) bit of cache
> pollution.
It sounds like you don't really want it off unless your a developer (or
you turn one of the dependencies) .. I think this might be better as a
hidden option just cause of the developer centric nature of it.
Daniel
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On Fri, 2010-11-12 at 19:30 -0800, Stepan Moskovchenko wrote:
> Remove some unneeded assignments and messages, restructure
> a failure path in iova_to_phys, and make __flush_iotlb
> return int in preparation for adding IOMMU clock control.
>
Why restructure the failure path ?
Daniel
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On 11/15/2010 4:25 PM, Daniel Walker wrote:
> On Fri, 2010-11-12 at 19:30 -0800, Stepan Moskovchenko wrote:
>> Remove some unneeded assignments and messages, restructure
>> a failure path in iova_to_phys, and make __flush_iotlb
>> return int in preparation for adding IOMMU clock control.
> Why restructure the failure path ?
>
> Daniel
It is a trivial change of replacing a goto with an assignment and moving
it a few lines down. It reduces "jumpiness" within that function and is
a cleaner version. On the more practical side, it was done in
preparation for some other changes I have coming up, which touch that
function and work a lot better with the cleaned-up failure path. The
next patch was delayed (due to a dependency) but as long as I was doing
code cleanup, I saw no reason not to also clean up the failure path as
part of this series.
Steve
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add a Kconfig item to allow the IOMMU page tables to be
coherent in the L2 cache. This generally reduces IOTLB miss
latencies and has been shown to improve multimedia
performance.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/Kconfig | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index dbbcfeb..1c6f76b 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -122,6 +122,10 @@ config MACH_MSM8X60_FFA
endmenu
+config IOMMU_PGTABLES_L2
+ def_bool y
+ depends on ARCH_MSM8X60 && MMU && SMP && CPU_DCACHE_DISABLE=n
+
config MSM_DEBUG_UART
int
default 1 if MSM_DEBUG_UART1
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On 11/15/2010 10:20 AM, Daniel Walker wrote:
> On Sun, 2010-11-14 at 18:56 -0800, Stepan Moskovchenko wrote:
>>> On Fri, 2010-11-12 at 19:29 -0800, Stepan Moskovchenko wrote:
>>>> +config IOMMU_PGTABLES_L2
>>>> + depends on ARCH_MSM8X60
>>>> + depends on MMU
>>>> + depends on CPU_DCACHE_DISABLE=n
>>>> + depends on SMP
>>>> + bool "Cacheable IOMMU page tables"
>>>> + default y
>>>> + help
>>>> + Allows the IOMMU page tables to be brought into the L2 cache. This
>>>> + improves the TLB miss latency at the expense of potential pollution
>>>> + of the L2 cache. This option has been shown to improve multimedia
>>>> + performance in some cases.
>>>> +
>>>> + If unsure, say Y here.
>>> Why would someone want this off?
>>>
>>> The other thing is that you usually want this included with the code
>>> that uses the option.
>> The code that uses it had gone in during a previous patch series, but I
>> didn't want to meddle in the Kconfig just yet, especially since the option
>> only improves performance and does not add new functionality at a high
>> level. This patch should be the last of what is needed for this feature.
>>
>> You would want to turn this off if you wanted more deterministic behavior
>> from the multimedia subsystem, such as when trying to run benchmarks for
>> the worst-case behavior in terms of memory latency and TLB misses. You
>> might also want to turn it off if you are debugging memory problems that
>> you suspect might be related to the cache maintenance code, in which case
>> turning this off would give an idea as to whether that is the problem.
>> Similarly, this needs to be off (and will be, due to the dependencies) if
>> certain required things (like the Dcache, MMU, etc) have been disabled.
>> Finally, if you are doing things with the MMSS that you know will not
>> result in many TLB misses, (or if you know that you can tolerate high miss
>> latency) you may as well turn this off to avoid the (small) bit of cache
>> pollution.
> It sounds like you don't really want it off unless your a developer (or
> you turn one of the dependencies) .. I think this might be better as a
> hidden option just cause of the developer centric nature of it.
>
> Daniel
Alright. Fixed in v2.
Steve
---
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add the register field definitions and memory attribute
definitions that will be needed to support IOMMU
transactions with cache-coherent memory access.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/include/mach/iommu.h | 13 +++++++++++++
arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h | 22 +++++++++++++++++++++-
2 files changed, 34 insertions(+), 1 deletions(-)
diff --git a/arch/arm/mach-msm/include/mach/iommu.h b/arch/arm/mach-msm/include/mach/iommu.h
index 17fc79f..296c0f1 100644
--- a/arch/arm/mach-msm/include/mach/iommu.h
+++ b/arch/arm/mach-msm/include/mach/iommu.h
@@ -20,6 +20,19 @@
#include <linux/interrupt.h>
+/* Sharability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NON_SH 0x0
+#define MSM_IOMMU_ATTR_SH 0x4
+
+/* Cacheability attributes of MSM IOMMU mappings */
+#define MSM_IOMMU_ATTR_NONCACHED 0x0
+#define MSM_IOMMU_ATTR_CACHED_WB_WA 0x1
+#define MSM_IOMMU_ATTR_CACHED_WB_NWA 0x2
+#define MSM_IOMMU_ATTR_CACHED_WT 0x3
+
+/* Mask for the cache policy attribute */
+#define MSM_IOMMU_CP_MASK 0x03
+
/* Maximum number of Machine IDs that we are allowing to be mapped to the same
* context bank. The number of MIDs mapped to the same CB does not affect
* performance, but there is a practical limit on how many distinct MIDs may
diff --git a/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h b/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
index f9386d3..c2c3da9 100644
--- a/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
+++ b/arch/arm/mach-msm/include/mach/iommu_hw-8xxx.h
@@ -54,6 +54,7 @@ do { \
#define NUM_FL_PTE 4096
#define NUM_SL_PTE 256
+#define NUM_TEX_CLASS 8
/* First-level page table bits */
#define FL_BASE_MASK 0xFFFFFC00
@@ -63,6 +64,9 @@ do { \
#define FL_AP_WRITE (1 << 10)
#define FL_AP_READ (1 << 11)
#define FL_SHARED (1 << 16)
+#define FL_BUFFERABLE (1 << 2)
+#define FL_CACHEABLE (1 << 3)
+#define FL_TEX0 (1 << 12)
#define FL_OFFSET(va) (((va) & 0xFFF00000) >> 20)
/* Second-level page table bits */
@@ -73,8 +77,20 @@ do { \
#define SL_AP0 (1 << 4)
#define SL_AP1 (2 << 4)
#define SL_SHARED (1 << 10)
+#define SL_BUFFERABLE (1 << 2)
+#define SL_CACHEABLE (1 << 3)
+#define SL_TEX0 (1 << 6)
#define SL_OFFSET(va) (((va) & 0xFF000) >> 12)
+/* Memory type and cache policy attributes */
+#define MT_SO 0
+#define MT_DEV 1
+#define MT_NORMAL 2
+#define CP_NONCACHED 0
+#define CP_WB_WA 1
+#define CP_WT 2
+#define CP_WB_NWA 3
+
/* Global register setters / getters */
#define SET_M2VCBR_N(b, N, v) SET_GLOBAL_REG_N(M2VCBR_N, N, (b), (v))
#define SET_CBACR_N(b, N, v) SET_GLOBAL_REG_N(CBACR_N, N, (b), (v))
@@ -706,7 +722,9 @@ do { \
#define GET_OCPC5(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC5)
#define GET_OCPC6(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC6)
#define GET_OCPC7(b, c) GET_CONTEXT_FIELD(b, c, NMRR, OCPC7)
-
+#define NMRR_ICP(nmrr, n) (((nmrr) & (3 << ((n) * 2))) >> ((n) * 2))
+#define NMRR_OCP(nmrr, n) (((nmrr) & (3 << ((n) * 2 + 16))) >> \
+ ((n) * 2 + 16))
/* PAR */
#define GET_FAULT(b, c) GET_CONTEXT_FIELD(b, c, PAR, FAULT)
@@ -750,6 +768,8 @@ do { \
#define GET_NOS5(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS5)
#define GET_NOS6(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS6)
#define GET_NOS7(b, c) GET_CONTEXT_FIELD(b, c, PRRR, NOS7)
+#define PRRR_NOS(prrr, n) ((prrr) & (1 << ((n) + 24)) ? 1 : 0)
+#define PRRR_MT(prrr, n) ((((prrr) & (3 << ((n) * 2))) >> ((n) * 2)))
/* RESUME */
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
Add support for allowing IOMMU memory transactions to be
cache coherent, eliminating the need for software cache
management in certain situations. This can lead to
improvements in performance and power usage, assuming the
multimedia core's access pattern exhibits spatial locality
and that its working set fits into the cache.
Signed-off-by: Stepan Moskovchenko <[email protected]>
---
arch/arm/mach-msm/iommu.c | 93 +++++++++++++++++++++++++++++++++++++++-----
1 files changed, 82 insertions(+), 11 deletions(-)
diff --git a/arch/arm/mach-msm/iommu.c b/arch/arm/mach-msm/iommu.c
index 67e8f53..a468ee3 100644
--- a/arch/arm/mach-msm/iommu.c
+++ b/arch/arm/mach-msm/iommu.c
@@ -33,6 +33,16 @@
#include <mach/iommu_hw-8xxx.h>
#include <mach/iommu.h>
+#define MRC(reg, processor, op1, crn, crm, op2) \
+__asm__ __volatile__ ( \
+" mrc " #processor "," #op1 ", %0," #crn "," #crm "," #op2 "\n" \
+: "=r" (reg))
+
+#define RCP15_PRRR(reg) MRC(reg, p15, 0, c10, c2, 0)
+#define RCP15_NMRR(reg) MRC(reg, p15, 0, c10, c2, 1)
+
+static int msm_iommu_tex_class[4];
+
DEFINE_SPINLOCK(msm_iommu_lock);
struct msm_priv {
@@ -98,6 +108,7 @@ static void __reset_context(void __iomem *base, int ctx)
static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
{
+ unsigned int prrr, nmrr;
__reset_context(base, ctx);
/* Set up HTW mode */
@@ -130,11 +141,11 @@ static void __program_context(void __iomem *base, int ctx, phys_addr_t pgtable)
/* Turn on TEX Remap */
SET_TRE(base, ctx, 1);
- /* Do not configure PRRR / NMRR on the IOMMU for now. We will assume
- * TEX class 0 for everything until attributes are properly worked out
- */
- SET_PRRR(base, ctx, 0);
- SET_NMRR(base, ctx, 0);
+ /* Set TEX remap attributes */
+ RCP15_PRRR(prrr);
+ RCP15_NMRR(nmrr);
+ SET_PRRR(base, ctx, prrr);
+ SET_NMRR(base, ctx, nmrr);
/* Turn on BFB prefetch */
SET_BFBDFE(base, ctx, 1);
@@ -304,12 +315,21 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
unsigned long *sl_table;
unsigned long *sl_pte;
unsigned long sl_offset;
+ unsigned int pgprot;
size_t len = 0x1000UL << order;
- int ret = 0;
+ int ret = 0, tex, sh;
spin_lock_irqsave(&msm_iommu_lock, flags);
- priv = domain->priv;
+ sh = (prot & MSM_IOMMU_ATTR_SH) ? 1 : 0;
+ tex = msm_iommu_tex_class[prot & MSM_IOMMU_CP_MASK];
+
+ if (tex < 0 || tex > NUM_TEX_CLASS - 1) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ priv = domain->priv;
if (!priv) {
ret = -EINVAL;
goto fail;
@@ -330,6 +350,18 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
goto fail;
}
+ if (len == SZ_16M || len == SZ_1M) {
+ pgprot = sh ? FL_SHARED : 0;
+ pgprot |= tex & 0x01 ? FL_BUFFERABLE : 0;
+ pgprot |= tex & 0x02 ? FL_CACHEABLE : 0;
+ pgprot |= tex & 0x04 ? FL_TEX0 : 0;
+ } else {
+ pgprot = sh ? SL_SHARED : 0;
+ pgprot |= tex & 0x01 ? SL_BUFFERABLE : 0;
+ pgprot |= tex & 0x02 ? SL_CACHEABLE : 0;
+ pgprot |= tex & 0x04 ? SL_TEX0 : 0;
+ }
+
fl_offset = FL_OFFSET(va); /* Upper 12 bits */
fl_pte = fl_table + fl_offset; /* int pointers, 4 bytes */
@@ -338,12 +370,12 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
for (i = 0; i < 16; i++)
*(fl_pte+i) = (pa & 0xFF000000) | FL_SUPERSECTION |
FL_AP_READ | FL_AP_WRITE | FL_TYPE_SECT |
- FL_SHARED;
+ FL_SHARED | pgprot;
}
if (len == SZ_1M)
*fl_pte = (pa & 0xFFF00000) | FL_AP_READ | FL_AP_WRITE |
- FL_TYPE_SECT | FL_SHARED;
+ FL_TYPE_SECT | FL_SHARED | pgprot;
/* Need a 2nd level table */
if ((len == SZ_4K || len == SZ_64K) && (*fl_pte) == 0) {
@@ -368,14 +400,14 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long va,
if (len == SZ_4K)
*sl_pte = (pa & SL_BASE_MASK_SMALL) | SL_AP0 | SL_AP1 |
- SL_SHARED | SL_TYPE_SMALL;
+ SL_SHARED | SL_TYPE_SMALL | pgprot;
if (len == SZ_64K) {
int i;
for (i = 0; i < 16; i++)
*(sl_pte+i) = (pa & SL_BASE_MASK_LARGE) | SL_AP0 |
- SL_AP1 | SL_SHARED | SL_TYPE_LARGE;
+ SL_AP1 | SL_SHARED | SL_TYPE_LARGE | pgprot;
}
__flush_iotlb(domain);
@@ -593,8 +625,47 @@ static struct iommu_ops msm_iommu_ops = {
.domain_has_cap = msm_iommu_domain_has_cap
};
+static int __init get_tex_class(int icp, int ocp, int mt, int nos)
+{
+ int i = 0;
+ unsigned int prrr = 0;
+ unsigned int nmrr = 0;
+ int c_icp, c_ocp, c_mt, c_nos;
+
+ RCP15_PRRR(prrr);
+ RCP15_NMRR(nmrr);
+
+ for (i = 0; i < NUM_TEX_CLASS; i++) {
+ c_nos = PRRR_NOS(prrr, i);
+ c_mt = PRRR_MT(prrr, i);
+ c_icp = NMRR_ICP(nmrr, i);
+ c_ocp = NMRR_OCP(nmrr, i);
+
+ if (icp == c_icp && ocp == c_ocp && c_mt == mt && c_nos == nos)
+ return i;
+ }
+
+ return -ENODEV;
+}
+
+static void __init setup_iommu_tex_classes(void)
+{
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_NONCACHED] =
+ get_tex_class(CP_NONCACHED, CP_NONCACHED, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_WA] =
+ get_tex_class(CP_WB_WA, CP_WB_WA, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WB_NWA] =
+ get_tex_class(CP_WB_NWA, CP_WB_NWA, MT_NORMAL, 1);
+
+ msm_iommu_tex_class[MSM_IOMMU_ATTR_CACHED_WT] =
+ get_tex_class(CP_WT, CP_WT, MT_NORMAL, 1);
+}
+
static int __init msm_iommu_init(void)
{
+ setup_iommu_tex_classes();
register_iommu(&msm_iommu_ops);
return 0;
}
--
1.7.0.2
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.
On Mon, 2010-11-15 at 17:16 -0800, Stepan Moskovchenko wrote:
> On 11/15/2010 4:25 PM, Daniel Walker wrote:
> > On Fri, 2010-11-12 at 19:30 -0800, Stepan Moskovchenko wrote:
> >> Remove some unneeded assignments and messages, restructure
> >> a failure path in iova_to_phys, and make __flush_iotlb
> >> return int in preparation for adding IOMMU clock control.
> > Why restructure the failure path ?
> >
> > Daniel
>
> It is a trivial change of replacing a goto with an assignment and moving
> it a few lines down. It reduces "jumpiness" within that function and is
> a cleaner version. On the more practical side, it was done in
> preparation for some other changes I have coming up, which touch that
> function and work a lot better with the cleaned-up failure path. The
> next patch was delayed (due to a dependency) but as long as I was doing
> code cleanup, I saw no reason not to also clean up the failure path as
> part of this series.
Some of what your have said above really needs to be in your commit
text. I've noticed that your commit text in general is not verbose
enough. You need to explain what doing better.
Also generally you want to organize similar sets of changes. So if you
doing a cleanup in preparation for another change then the cleanup
should go with the other change. In this case it's not clear that this
is actually a cleanup, so it would be much nicer to get that change
along with the one you've delayed .. Also the two Kconfig changes you've
sent indicate that your not organizing your changes properly, so I think
you need to take more time considering how to the organize patches.
Just so we're clear on this, any changes that you send me will go into
_permanent_ public history. This history will not disappear at some set
date, and we will not be rebasing out changes or squashing changes.
Daniel
--
Sent by a consultant of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum.