Hi Will,
I am resending this series after we concluded on comments [1,2] on v16 of
this patch-series, and the subsequent patch [3] was posted.
Kindly merge this series.
Thanks
Vivek
Previous version of this patch series is @ [4].
Also refer to [4] for change logs for previous versions.
[1] https://lore.kernel.org/patchwork/patch/979430/
[2] https://lore.kernel.org/patchwork/patch/979433/
[3] https://lore.kernel.org/patchwork/patch/994194/
[4] https://lore.kernel.org/patchwork/cover/979429/
Sricharan R (3):
iommu/arm-smmu: Add pm_runtime/sleep ops
iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device
iommu/arm-smmu: Add the device_link between masters and smmu
Vivek Gautam (2):
dt-bindings: arm-smmu: Add bindings for qcom,smmu-v2
iommu/arm-smmu: Add support for qcom,smmu-v2 variant
.../devicetree/bindings/iommu/arm,smmu.txt | 39 +++++
drivers/iommu/arm-smmu.c | 192 +++++++++++++++++++--
2 files changed, 219 insertions(+), 12 deletions(-)
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
From: Sricharan R <[email protected]>
The smmu needs to be functional only when the respective
master's using it are active. The device_link feature
helps to track such functional dependencies, so that the
iommu gets powered when the master device enables itself
using pm_runtime. So by adapting the smmu driver for
runtime pm, above said dependency can be addressed.
This patch adds the pm runtime/sleep callbacks to the
driver and also the functions to parse the smmu clocks
from DT and enable them in resume/suspend.
Also, while we enable the runtime pm add a pm sleep suspend
callback that pushes devices to low power state by turning
the clocks off in a system sleep.
Also add corresponding clock enable path in resume callback.
Signed-off-by: Sricharan R <[email protected]>
Signed-off-by: Archit Taneja <[email protected]>
[vivek: rework for clock and pm ops]
Signed-off-by: Vivek Gautam <[email protected]>
Reviewed-by: Tomasz Figa <[email protected]>
Tested-by: Srinivas Kandagatla <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/arm-smmu.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 74 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5a28ae892504..f7ab7ce87a94 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -48,6 +48,7 @@
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -206,6 +207,8 @@ struct arm_smmu_device {
u32 num_global_irqs;
u32 num_context_irqs;
unsigned int *irqs;
+ struct clk_bulk_data *clks;
+ int num_clks;
u32 cavium_id_base; /* Specific to Cavium */
@@ -1944,10 +1947,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
struct arm_smmu_match_data {
enum arm_smmu_arch_version version;
enum arm_smmu_implementation model;
+ const char * const *clks;
+ int num_clks;
};
#define ARM_SMMU_MATCH_DATA(name, ver, imp) \
-static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
@@ -1966,6 +1971,23 @@ static const struct of_device_id arm_smmu_of_match[] = {
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
+static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu,
+ const char * const *clks)
+{
+ int i;
+
+ if (smmu->num_clks < 1)
+ return;
+
+ smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks,
+ sizeof(*smmu->clks), GFP_KERNEL);
+ if (!smmu->clks)
+ return;
+
+ for (i = 0; i < smmu->num_clks; i++)
+ smmu->clks[i].id = clks[i];
+}
+
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
{
@@ -2048,6 +2070,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
data = of_device_get_match_data(dev);
smmu->version = data->version;
smmu->model = data->model;
+ smmu->num_clks = data->num_clks;
+
+ arm_smmu_fill_clk_data(smmu, data->clks);
parse_driver_options(smmu);
@@ -2150,6 +2175,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
smmu->irqs[i] = irq;
}
+ err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks);
+ if (err)
+ return err;
+
+ err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
+ if (err)
+ return err;
+
err = arm_smmu_device_cfg_probe(smmu);
if (err)
return err;
@@ -2236,6 +2269,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
/* Turn the thing off */
writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+
+ clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks);
+
return 0;
}
@@ -2244,15 +2280,50 @@ static void arm_smmu_device_shutdown(struct platform_device *pdev)
arm_smmu_device_remove(pdev);
}
-static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
+static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
{
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
+ if (ret)
+ return ret;
arm_smmu_device_reset(smmu);
+
return 0;
}
-static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
+static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
+{
+ struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
+
+ return 0;
+}
+
+static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
+{
+ if (pm_runtime_suspended(dev))
+ return 0;
+
+ return arm_smmu_runtime_resume(dev);
+}
+
+static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
+{
+ if (pm_runtime_suspended(dev))
+ return 0;
+
+ return arm_smmu_runtime_suspend(dev);
+}
+
+static const struct dev_pm_ops arm_smmu_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
+ SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
+ arm_smmu_runtime_resume, NULL)
+};
static struct platform_driver arm_smmu_driver = {
.driver = {
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
From: Sricharan R <[email protected]>
The smmu device probe/remove and add/remove master device callbacks
gets called when the smmu is not linked to its master, that is without
the context of the master device. So calling runtime apis in those places
separately.
Global locks are also initialized before enabling runtime pm as the
runtime_resume() calls device_reset() which does tlb_sync_global()
that ultimately requires locks to be initialized.
Signed-off-by: Sricharan R <[email protected]>
[vivek: Cleanup pm runtime calls]
Signed-off-by: Vivek Gautam <[email protected]>
Reviewed-by: Tomasz Figa <[email protected]>
Tested-by: Srinivas Kandagatla <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 91 insertions(+), 10 deletions(-)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f7ab7ce87a94..cae88c9f83ca 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -270,6 +270,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ return pm_runtime_get_sync(smmu->dev);
+
+ return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_put(smmu->dev);
+}
+
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
return container_of(dom, struct arm_smmu_domain, domain);
@@ -929,11 +943,15 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- int irq;
+ int ret, irq;
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
/*
* Disable the context bank and free the page tables before freeing
* it.
@@ -948,6 +966,8 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+ arm_smmu_rpm_put(smmu);
}
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1229,10 +1249,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
return -ENODEV;
smmu = fwspec_smmu(fwspec);
+
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return ret;
+
/* Ensure that the domain is finalised */
ret = arm_smmu_init_domain_context(domain, smmu);
if (ret < 0)
- return ret;
+ goto rpm_put;
/*
* Sanity check the domain. We don't support domains across
@@ -1242,49 +1267,74 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
dev_err(dev,
"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
- return -EINVAL;
+ ret = -EINVAL;
+ goto rpm_put;
}
/* Looks ok, so add the device to the domain */
- return arm_smmu_domain_add_master(smmu_domain, fwspec);
+ ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+ arm_smmu_rpm_put(smmu);
+ return ret;
}
static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ int ret;
if (!ops)
return -ENODEV;
- return ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->map(ops, iova, paddr, size, prot);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
size_t size)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+ size_t ret;
if (!ops)
return 0;
- return ops->unmap(ops, iova, size);
+ arm_smmu_rpm_get(smmu);
+ ret = ops->unmap(ops, iova, size);
+ arm_smmu_rpm_put(smmu);
+
+ return ret;
}
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->tlb_ops)
+ if (smmu_domain->tlb_ops) {
+ arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
}
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (smmu_domain->tlb_ops)
+ if (smmu_domain->tlb_ops) {
+ arm_smmu_rpm_get(smmu);
smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+ arm_smmu_rpm_put(smmu);
+ }
}
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
@@ -1431,7 +1481,13 @@ static int arm_smmu_add_device(struct device *dev)
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ goto out_cfg_free;
+
ret = arm_smmu_master_alloc_smes(dev);
+ arm_smmu_rpm_put(smmu);
+
if (ret)
goto out_cfg_free;
@@ -1451,7 +1507,7 @@ static void arm_smmu_remove_device(struct device *dev)
struct iommu_fwspec *fwspec = dev->iommu_fwspec;
struct arm_smmu_master_cfg *cfg;
struct arm_smmu_device *smmu;
-
+ int ret;
if (!fwspec || fwspec->ops != &arm_smmu_ops)
return;
@@ -1459,8 +1515,15 @@ static void arm_smmu_remove_device(struct device *dev)
cfg = fwspec->iommu_priv;
smmu = cfg->smmu;
+ ret = arm_smmu_rpm_get(smmu);
+ if (ret < 0)
+ return;
+
iommu_device_unlink(&smmu->iommu, dev);
arm_smmu_master_free_smes(fwspec);
+
+ arm_smmu_rpm_put(smmu);
+
iommu_group_remove_device(dev);
kfree(fwspec->iommu_priv);
iommu_fwspec_free(dev);
@@ -2232,6 +2295,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
arm_smmu_device_reset(smmu);
arm_smmu_test_smr_masks(smmu);
+ /*
+ * We want to avoid touching dev->power.lock in fastpaths unless
+ * it's really going to do something useful - pm_runtime_enabled()
+ * can serve as an ideal proxy for that decision. So, conditionally
+ * enable pm_runtime.
+ */
+ if (dev->pm_domain) {
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+ }
+
/*
* For ACPI and generic DT bindings, an SMMU will be probed before
* any device which might need it, so we want the bus ops in place
@@ -2267,10 +2341,17 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
dev_err(&pdev->dev, "removing device with active domains!\n");
+ arm_smmu_rpm_get(smmu);
/* Turn the thing off */
writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+ arm_smmu_rpm_put(smmu);
+
+ if (pm_runtime_enabled(smmu->dev))
+ pm_runtime_force_suspend(smmu->dev);
+ else
+ clk_bulk_disable(smmu->num_clks, smmu->clks);
- clk_bulk_disable_unprepare(smmu->num_clks, smmu->clks);
+ clk_bulk_unprepare(smmu->num_clks, smmu->clks);
return 0;
}
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
From: Sricharan R <[email protected]>
Finally add the device link between the master device and
smmu, so that the smmu gets runtime enabled/disabled only when the
master needs it. This is done from add_device callback which gets
called once when the master is added to the smmu.
Signed-off-by: Sricharan R <[email protected]>
Signed-off-by: Vivek Gautam <[email protected]>
Reviewed-by: Tomasz Figa <[email protected]>
Tested-by: Srinivas Kandagatla <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/arm-smmu.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index cae88c9f83ca..2098c3141f5f 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1493,6 +1493,9 @@ static int arm_smmu_add_device(struct device *dev)
iommu_device_link(&smmu->iommu, dev);
+ device_link_add(dev, smmu->dev,
+ DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
+
return 0;
out_cfg_free:
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Add bindings doc for Qcom's smmu-v2 implementation.
Signed-off-by: Vivek Gautam <[email protected]>
Reviewed-by: Tomasz Figa <[email protected]>
Tested-by: Srinivas Kandagatla <[email protected]>
Reviewed-by: Rob Herring <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
.../devicetree/bindings/iommu/arm,smmu.txt | 39 ++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 8a6ffce12af5..a6504b37cc21 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -17,10 +17,16 @@ conditions.
"arm,mmu-401"
"arm,mmu-500"
"cavium,smmu-v2"
+ "qcom,smmu-v2"
depending on the particular implementation and/or the
version of the architecture implemented.
+ Qcom SoCs must contain, as below, SoC-specific compatibles
+ along with "qcom,smmu-v2":
+ "qcom,msm8996-smmu-v2", "qcom,smmu-v2",
+ "qcom,sdm845-smmu-v2", "qcom,smmu-v2".
+
- reg : Base address and size of the SMMU.
- #global-interrupts : The number of global interrupts exposed by the
@@ -71,6 +77,22 @@ conditions.
or using stream matching with #iommu-cells = <2>, and
may be ignored if present in such cases.
+- clock-names: List of the names of clocks input to the device. The
+ required list depends on particular implementation and
+ is as follows:
+ - for "qcom,smmu-v2":
+ - "bus": clock required for downstream bus access and
+ for the smmu ptw,
+ - "iface": clock required to access smmu's registers
+ through the TCU's programming interface.
+ - unspecified for other implementations.
+
+- clocks: Specifiers for all clocks listed in the clock-names property,
+ as per generic clock bindings.
+
+- power-domains: Specifiers for power domains required to be powered on for
+ the SMMU to operate, as per generic power domain bindings.
+
** Deprecated properties:
- mmu-masters (deprecated in favour of the generic "iommus" binding) :
@@ -137,3 +159,20 @@ conditions.
iommu-map = <0 &smmu3 0 0x400>;
...
};
+
+ /* Qcom's arm,smmu-v2 implementation */
+ smmu4: iommu@d00000 {
+ compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+ reg = <0xd00000 0x10000>;
+
+ #global-interrupts = <1>;
+ interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>;
+ #iommu-cells = <1>;
+ power-domains = <&mmcc MDSS_GDSC>;
+
+ clocks = <&mmcc SMMU_MDP_AXI_CLK>,
+ <&mmcc SMMU_MDP_AHB_CLK>;
+ clock-names = "bus", "iface";
+ };
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
clock and power requirements.
On msm8996, multiple cores, viz. mdss, video, etc. use this
smmu. On sdm845, this smmu is used with gpu.
Add bindings for the same.
Signed-off-by: Vivek Gautam <[email protected]>
Reviewed-by: Rob Herring <[email protected]>
Reviewed-by: Tomasz Figa <[email protected]>
Tested-by: Srinivas Kandagatla <[email protected]>
Reviewed-by: Robin Murphy <[email protected]>
---
drivers/iommu/arm-smmu.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 2098c3141f5f..d315ca637097 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -120,6 +120,7 @@ enum arm_smmu_implementation {
GENERIC_SMMU,
ARM_MMU500,
CAVIUM_SMMUV2,
+ QCOM_SMMUV2,
};
struct arm_smmu_s2cr {
@@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
+static const char * const qcom_smmuv2_clks[] = {
+ "bus", "iface",
+};
+
+static const struct arm_smmu_match_data qcom_smmuv2 = {
+ .version = ARM_SMMU_V2,
+ .model = QCOM_SMMUV2,
+ .clks = qcom_smmuv2_clks,
+ .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
+};
+
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
@@ -2033,6 +2045,7 @@ static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
+ { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
{ },
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
[+Thor]
On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
> clock and power requirements.
> On msm8996, multiple cores, viz. mdss, video, etc. use this
> smmu. On sdm845, this smmu is used with gpu.
> Add bindings for the same.
>
> Signed-off-by: Vivek Gautam <[email protected]>
> Reviewed-by: Rob Herring <[email protected]>
> Reviewed-by: Tomasz Figa <[email protected]>
> Tested-by: Srinivas Kandagatla <[email protected]>
> Reviewed-by: Robin Murphy <[email protected]>
> ---
> drivers/iommu/arm-smmu.c | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 2098c3141f5f..d315ca637097 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -120,6 +120,7 @@ enum arm_smmu_implementation {
> GENERIC_SMMU,
> ARM_MMU500,
> CAVIUM_SMMUV2,
> + QCOM_SMMUV2,
> };
>
> struct arm_smmu_s2cr {
> @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
>
> +static const char * const qcom_smmuv2_clks[] = {
> + "bus", "iface",
> +};
> +
> +static const struct arm_smmu_match_data qcom_smmuv2 = {
> + .version = ARM_SMMU_V2,
> + .model = QCOM_SMMUV2,
> + .clks = qcom_smmuv2_clks,
> + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> +};
These seems redundant if we go down the route proposed by Thor, where we
just pull all of the clocks out of the device-tree. In which case, why
do we need this match_data at all?
Will
On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> From: Sricharan R <[email protected]>
>
> The smmu device probe/remove and add/remove master device callbacks
> gets called when the smmu is not linked to its master, that is without
> the context of the master device. So calling runtime apis in those places
> separately.
> Global locks are also initialized before enabling runtime pm as the
> runtime_resume() calls device_reset() which does tlb_sync_global()
> that ultimately requires locks to be initialized.
>
> Signed-off-by: Sricharan R <[email protected]>
> [vivek: Cleanup pm runtime calls]
> Signed-off-by: Vivek Gautam <[email protected]>
> Reviewed-by: Tomasz Figa <[email protected]>
> Tested-by: Srinivas Kandagatla <[email protected]>
> Reviewed-by: Robin Murphy <[email protected]>
> ---
> drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 91 insertions(+), 10 deletions(-)
Given that you're doing the get/put in the TLBI ops unconditionally:
> static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>
> - if (smmu_domain->tlb_ops)
> + if (smmu_domain->tlb_ops) {
> + arm_smmu_rpm_get(smmu);
> smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> + arm_smmu_rpm_put(smmu);
> + }
> }
>
> static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> {
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>
> - if (smmu_domain->tlb_ops)
> + if (smmu_domain->tlb_ops) {
> + arm_smmu_rpm_get(smmu);
> smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> + arm_smmu_rpm_put(smmu);
> + }
Why do you need them around the map/unmap calls as well?
Will
Hi Will,
On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
>
> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> > From: Sricharan R <[email protected]>
> >
> > The smmu device probe/remove and add/remove master device callbacks
> > gets called when the smmu is not linked to its master, that is without
> > the context of the master device. So calling runtime apis in those places
> > separately.
> > Global locks are also initialized before enabling runtime pm as the
> > runtime_resume() calls device_reset() which does tlb_sync_global()
> > that ultimately requires locks to be initialized.
> >
> > Signed-off-by: Sricharan R <[email protected]>
> > [vivek: Cleanup pm runtime calls]
> > Signed-off-by: Vivek Gautam <[email protected]>
> > Reviewed-by: Tomasz Figa <[email protected]>
> > Tested-by: Srinivas Kandagatla <[email protected]>
> > Reviewed-by: Robin Murphy <[email protected]>
> > ---
> > drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 91 insertions(+), 10 deletions(-)
>
> Given that you're doing the get/put in the TLBI ops unconditionally:
>
> > static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> > {
> > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > + struct arm_smmu_device *smmu = smmu_domain->smmu;
> >
> > - if (smmu_domain->tlb_ops)
> > + if (smmu_domain->tlb_ops) {
> > + arm_smmu_rpm_get(smmu);
> > smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> > + arm_smmu_rpm_put(smmu);
> > + }
> > }
> >
> > static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> > {
> > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > + struct arm_smmu_device *smmu = smmu_domain->smmu;
> >
> > - if (smmu_domain->tlb_ops)
> > + if (smmu_domain->tlb_ops) {
> > + arm_smmu_rpm_get(smmu);
> > smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> > + arm_smmu_rpm_put(smmu);
> > + }
>
> Why do you need them around the map/unmap calls as well?
We still have .tlb_add_flush path?
Thanks
Vivek
>
> Will
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Hi Will,
On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
>
> [+Thor]
>
> On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
> > clock and power requirements.
> > On msm8996, multiple cores, viz. mdss, video, etc. use this
> > smmu. On sdm845, this smmu is used with gpu.
> > Add bindings for the same.
> >
> > Signed-off-by: Vivek Gautam <[email protected]>
> > Reviewed-by: Rob Herring <[email protected]>
> > Reviewed-by: Tomasz Figa <[email protected]>
> > Tested-by: Srinivas Kandagatla <[email protected]>
> > Reviewed-by: Robin Murphy <[email protected]>
> > ---
> > drivers/iommu/arm-smmu.c | 13 +++++++++++++
> > 1 file changed, 13 insertions(+)
> >
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index 2098c3141f5f..d315ca637097 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -120,6 +120,7 @@ enum arm_smmu_implementation {
> > GENERIC_SMMU,
> > ARM_MMU500,
> > CAVIUM_SMMUV2,
> > + QCOM_SMMUV2,
> > };
> >
> > struct arm_smmu_s2cr {
> > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> >
> > +static const char * const qcom_smmuv2_clks[] = {
> > + "bus", "iface",
> > +};
> > +
> > +static const struct arm_smmu_match_data qcom_smmuv2 = {
> > + .version = ARM_SMMU_V2,
> > + .model = QCOM_SMMUV2,
> > + .clks = qcom_smmuv2_clks,
> > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> > +};
>
> These seems redundant if we go down the route proposed by Thor, where we
> just pull all of the clocks out of the device-tree. In which case, why
> do we need this match_data at all?
Which is better? Driver relying solely on the device tree to tell
which all clocks
are required to be enabled,
or, the driver deciding itself based on the platform's match data,
that it should
have X, Y, & Z clocks that should be supplied from the device tree.
Thanks
Vivek
>
> Will
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
Hi Vivek, Will,
On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
<[email protected]> wrote:
>
> Hi Will,
>
> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
> >
> > [+Thor]
> >
> > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> > > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
> > > clock and power requirements.
> > > On msm8996, multiple cores, viz. mdss, video, etc. use this
> > > smmu. On sdm845, this smmu is used with gpu.
> > > Add bindings for the same.
> > >
> > > Signed-off-by: Vivek Gautam <[email protected]>
> > > Reviewed-by: Rob Herring <[email protected]>
> > > Reviewed-by: Tomasz Figa <[email protected]>
> > > Tested-by: Srinivas Kandagatla <[email protected]>
> > > Reviewed-by: Robin Murphy <[email protected]>
> > > ---
> > > drivers/iommu/arm-smmu.c | 13 +++++++++++++
> > > 1 file changed, 13 insertions(+)
> > >
> > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > > index 2098c3141f5f..d315ca637097 100644
> > > --- a/drivers/iommu/arm-smmu.c
> > > +++ b/drivers/iommu/arm-smmu.c
> > > @@ -120,6 +120,7 @@ enum arm_smmu_implementation {
> > > GENERIC_SMMU,
> > > ARM_MMU500,
> > > CAVIUM_SMMUV2,
> > > + QCOM_SMMUV2,
> > > };
> > >
> > > struct arm_smmu_s2cr {
> > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> > >
> > > +static const char * const qcom_smmuv2_clks[] = {
> > > + "bus", "iface",
> > > +};
> > > +
> > > +static const struct arm_smmu_match_data qcom_smmuv2 = {
> > > + .version = ARM_SMMU_V2,
> > > + .model = QCOM_SMMUV2,
> > > + .clks = qcom_smmuv2_clks,
> > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> > > +};
> >
> > These seems redundant if we go down the route proposed by Thor, where we
> > just pull all of the clocks out of the device-tree. In which case, why
> > do we need this match_data at all?
>
> Which is better? Driver relying solely on the device tree to tell
> which all clocks
> are required to be enabled,
> or, the driver deciding itself based on the platform's match data,
> that it should
> have X, Y, & Z clocks that should be supplied from the device tree.
The former would simplify the driver, but would also make it
impossible to spot mistakes in DT, which would ultimately surface out
as very hard to debug bugs (likely complete system lockups).
For qcom_smmuv2, I believe we're eventually going to end up with
platform-specific quirks anyway, so specifying the clocks too wouldn't
hurt. Given that, I'd recommend sticking to the latter, i.e. what this
patch does.
Best regards,
Tomasz
Hi Tomasz,
On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]> wrote:
>
> Hi Vivek, Will,
>
> On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
> <[email protected]> wrote:
> >
> > Hi Will,
> >
> > On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
> > >
> > > [+Thor]
> > >
> > > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> > > > qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
> > > > clock and power requirements.
> > > > On msm8996, multiple cores, viz. mdss, video, etc. use this
> > > > smmu. On sdm845, this smmu is used with gpu.
> > > > Add bindings for the same.
> > > >
> > > > Signed-off-by: Vivek Gautam <[email protected]>
> > > > Reviewed-by: Rob Herring <[email protected]>
> > > > Reviewed-by: Tomasz Figa <[email protected]>
> > > > Tested-by: Srinivas Kandagatla <[email protected]>
> > > > Reviewed-by: Robin Murphy <[email protected]>
> > > > ---
> > > > drivers/iommu/arm-smmu.c | 13 +++++++++++++
> > > > 1 file changed, 13 insertions(+)
> > > >
> > > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > > > index 2098c3141f5f..d315ca637097 100644
> > > > --- a/drivers/iommu/arm-smmu.c
> > > > +++ b/drivers/iommu/arm-smmu.c
> > > > @@ -120,6 +120,7 @@ enum arm_smmu_implementation {
> > > > GENERIC_SMMU,
> > > > ARM_MMU500,
> > > > CAVIUM_SMMUV2,
> > > > + QCOM_SMMUV2,
> > > > };
> > > >
> > > > struct arm_smmu_s2cr {
> > > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> > > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> > > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> > > >
> > > > +static const char * const qcom_smmuv2_clks[] = {
> > > > + "bus", "iface",
> > > > +};
> > > > +
> > > > +static const struct arm_smmu_match_data qcom_smmuv2 = {
> > > > + .version = ARM_SMMU_V2,
> > > > + .model = QCOM_SMMUV2,
> > > > + .clks = qcom_smmuv2_clks,
> > > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> > > > +};
> > >
> > > These seems redundant if we go down the route proposed by Thor, where we
> > > just pull all of the clocks out of the device-tree. In which case, why
> > > do we need this match_data at all?
> >
> > Which is better? Driver relying solely on the device tree to tell
> > which all clocks
> > are required to be enabled,
> > or, the driver deciding itself based on the platform's match data,
> > that it should
> > have X, Y, & Z clocks that should be supplied from the device tree.
>
> The former would simplify the driver, but would also make it
> impossible to spot mistakes in DT, which would ultimately surface out
> as very hard to debug bugs (likely complete system lockups).
Thanks.
Yea, this is how I understand things presently. Relying on device tree
puts the things out of driver's control.
Hi Will,
Am I unable to understand the intentions here for Thor's clock-fetch
design change?
>
> For qcom_smmuv2, I believe we're eventually going to end up with
> platform-specific quirks anyway, so specifying the clocks too wouldn't
> hurt. Given that, I'd recommend sticking to the latter, i.e. what this
> patch does.
>
> Best regards,
> Tomasz
Best regards
Vivek
> _______________________________________________
> iommu mailing list
> [email protected]
> https://lists.linuxfoundation.org/mailman/listinfo/iommu
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation
On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote:
> On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]> wrote:
> > On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
> > <[email protected]> wrote:
> > > On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
> > > > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> > > > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> > > > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> > > > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> > > > >
> > > > > +static const char * const qcom_smmuv2_clks[] = {
> > > > > + "bus", "iface",
> > > > > +};
> > > > > +
> > > > > +static const struct arm_smmu_match_data qcom_smmuv2 = {
> > > > > + .version = ARM_SMMU_V2,
> > > > > + .model = QCOM_SMMUV2,
> > > > > + .clks = qcom_smmuv2_clks,
> > > > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> > > > > +};
> > > >
> > > > These seems redundant if we go down the route proposed by Thor, where we
> > > > just pull all of the clocks out of the device-tree. In which case, why
> > > > do we need this match_data at all?
> > >
> > > Which is better? Driver relying solely on the device tree to tell
> > > which all clocks
> > > are required to be enabled,
> > > or, the driver deciding itself based on the platform's match data,
> > > that it should
> > > have X, Y, & Z clocks that should be supplied from the device tree.
> >
> > The former would simplify the driver, but would also make it
> > impossible to spot mistakes in DT, which would ultimately surface out
> > as very hard to debug bugs (likely complete system lockups).
>
> Thanks.
> Yea, this is how I understand things presently. Relying on device tree
> puts the things out of driver's control.
But it also has the undesirable effect of having to update the driver
code whenever we want to add support for a new SMMU implementation. If
we do this all in the DT, as Thor is trying to do, then older kernels
will work well with new hardware.
> Hi Will,
> Am I unable to understand the intentions here for Thor's clock-fetch
> design change?
I'm having trouble parsing your question, sorry. Please work with Thor
so that we have a single way to get the clock information. My preference
is to take it from the firmware, for the reason I stated above.
Will
On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
> Hi Will,
>
> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
> >
> > On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> > > From: Sricharan R <[email protected]>
> > >
> > > The smmu device probe/remove and add/remove master device callbacks
> > > gets called when the smmu is not linked to its master, that is without
> > > the context of the master device. So calling runtime apis in those places
> > > separately.
> > > Global locks are also initialized before enabling runtime pm as the
> > > runtime_resume() calls device_reset() which does tlb_sync_global()
> > > that ultimately requires locks to be initialized.
> > >
> > > Signed-off-by: Sricharan R <[email protected]>
> > > [vivek: Cleanup pm runtime calls]
> > > Signed-off-by: Vivek Gautam <[email protected]>
> > > Reviewed-by: Tomasz Figa <[email protected]>
> > > Tested-by: Srinivas Kandagatla <[email protected]>
> > > Reviewed-by: Robin Murphy <[email protected]>
> > > ---
> > > drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
> > > 1 file changed, 91 insertions(+), 10 deletions(-)
> >
> > Given that you're doing the get/put in the TLBI ops unconditionally:
> >
> > > static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> > > {
> > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > > + struct arm_smmu_device *smmu = smmu_domain->smmu;
> > >
> > > - if (smmu_domain->tlb_ops)
> > > + if (smmu_domain->tlb_ops) {
> > > + arm_smmu_rpm_get(smmu);
> > > smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> > > + arm_smmu_rpm_put(smmu);
> > > + }
> > > }
> > >
> > > static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> > > {
> > > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> > > + struct arm_smmu_device *smmu = smmu_domain->smmu;
> > >
> > > - if (smmu_domain->tlb_ops)
> > > + if (smmu_domain->tlb_ops) {
> > > + arm_smmu_rpm_get(smmu);
> > > smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> > > + arm_smmu_rpm_put(smmu);
> > > + }
> >
> > Why do you need them around the map/unmap calls as well?
>
> We still have .tlb_add_flush path?
Ok, so we could add the ops around that as well. Right now, we've got
the runtime pm hooks crossing two parts of the API.
Will
On Sat, Nov 24, 2018 at 3:34 AM Will Deacon <[email protected]> wrote:
>
> On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote:
> > On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]> wrote:
> > > On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
> > > <[email protected]> wrote:
> > > > On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
> > > > > On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
> > > > > > @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> > > > > > ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> > > > > > ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> > > > > >
> > > > > > +static const char * const qcom_smmuv2_clks[] = {
> > > > > > + "bus", "iface",
> > > > > > +};
> > > > > > +
> > > > > > +static const struct arm_smmu_match_data qcom_smmuv2 = {
> > > > > > + .version = ARM_SMMU_V2,
> > > > > > + .model = QCOM_SMMUV2,
> > > > > > + .clks = qcom_smmuv2_clks,
> > > > > > + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
> > > > > > +};
> > > > >
> > > > > These seems redundant if we go down the route proposed by Thor, where we
> > > > > just pull all of the clocks out of the device-tree. In which case, why
> > > > > do we need this match_data at all?
> > > >
> > > > Which is better? Driver relying solely on the device tree to tell
> > > > which all clocks
> > > > are required to be enabled,
> > > > or, the driver deciding itself based on the platform's match data,
> > > > that it should
> > > > have X, Y, & Z clocks that should be supplied from the device tree.
> > >
> > > The former would simplify the driver, but would also make it
> > > impossible to spot mistakes in DT, which would ultimately surface out
> > > as very hard to debug bugs (likely complete system lockups).
> >
> > Thanks.
> > Yea, this is how I understand things presently. Relying on device tree
> > puts the things out of driver's control.
>
> But it also has the undesirable effect of having to update the driver
> code whenever we want to add support for a new SMMU implementation. If
> we do this all in the DT, as Thor is trying to do, then older kernels
> will work well with new hardware.
Fair enough, if you're okay with that. Obviously one would still have
to change the DT bindings to list the exact set of clocks for the new
hardware variant, unless the convention changed recently.
Best regards,
Tomasz
On 11/24/2018 12:06 AM, Will Deacon wrote:
> On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
>> Hi Will,
>>
>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
>>> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
>>>> From: Sricharan R <[email protected]>
>>>>
>>>> The smmu device probe/remove and add/remove master device callbacks
>>>> gets called when the smmu is not linked to its master, that is without
>>>> the context of the master device. So calling runtime apis in those places
>>>> separately.
>>>> Global locks are also initialized before enabling runtime pm as the
>>>> runtime_resume() calls device_reset() which does tlb_sync_global()
>>>> that ultimately requires locks to be initialized.
>>>>
>>>> Signed-off-by: Sricharan R <[email protected]>
>>>> [vivek: Cleanup pm runtime calls]
>>>> Signed-off-by: Vivek Gautam <[email protected]>
>>>> Reviewed-by: Tomasz Figa <[email protected]>
>>>> Tested-by: Srinivas Kandagatla <[email protected]>
>>>> Reviewed-by: Robin Murphy <[email protected]>
>>>> ---
>>>> drivers/iommu/arm-smmu.c | 101 ++++++++++++++++++++++++++++++++++++++++++-----
>>>> 1 file changed, 91 insertions(+), 10 deletions(-)
>>> Given that you're doing the get/put in the TLBI ops unconditionally:
>>>
>>>> static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
>>>> {
>>>> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>
>>>> - if (smmu_domain->tlb_ops)
>>>> + if (smmu_domain->tlb_ops) {
>>>> + arm_smmu_rpm_get(smmu);
>>>> smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
>>>> + arm_smmu_rpm_put(smmu);
>>>> + }
>>>> }
>>>>
>>>> static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
>>>> {
>>>> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>
>>>> - if (smmu_domain->tlb_ops)
>>>> + if (smmu_domain->tlb_ops) {
>>>> + arm_smmu_rpm_get(smmu);
>>>> smmu_domain->tlb_ops->tlb_sync(smmu_domain);
>>>> + arm_smmu_rpm_put(smmu);
>>>> + }
>>> Why do you need them around the map/unmap calls as well?
>> We still have .tlb_add_flush path?
> Ok, so we could add the ops around that as well. Right now, we've got
> the runtime pm hooks crossing two parts of the API.
Sure, will do that then, and remove the runtime pm hooks from map/unmap.
Thanks
Vivek
>
> Will
On 11/24/2018 12:04 AM, Will Deacon wrote:
> On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote:
>> On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]> wrote:
>>> On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
>>> <[email protected]> wrote:
>>>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]> wrote:
>>>>> On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
>>>>>> @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
>>>>>> ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
>>>>>> ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
>>>>>>
>>>>>> +static const char * const qcom_smmuv2_clks[] = {
>>>>>> + "bus", "iface",
>>>>>> +};
>>>>>> +
>>>>>> +static const struct arm_smmu_match_data qcom_smmuv2 = {
>>>>>> + .version = ARM_SMMU_V2,
>>>>>> + .model = QCOM_SMMUV2,
>>>>>> + .clks = qcom_smmuv2_clks,
>>>>>> + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
>>>>>> +};
>>>>> These seems redundant if we go down the route proposed by Thor, where we
>>>>> just pull all of the clocks out of the device-tree. In which case, why
>>>>> do we need this match_data at all?
>>>> Which is better? Driver relying solely on the device tree to tell
>>>> which all clocks
>>>> are required to be enabled,
>>>> or, the driver deciding itself based on the platform's match data,
>>>> that it should
>>>> have X, Y, & Z clocks that should be supplied from the device tree.
>>> The former would simplify the driver, but would also make it
>>> impossible to spot mistakes in DT, which would ultimately surface out
>>> as very hard to debug bugs (likely complete system lockups).
>> Thanks.
>> Yea, this is how I understand things presently. Relying on device tree
>> puts the things out of driver's control.
> But it also has the undesirable effect of having to update the driver
> code whenever we want to add support for a new SMMU implementation. If
> we do this all in the DT, as Thor is trying to do, then older kernels
> will work well with new hardware.
>
>> Hi Will,
>> Am I unable to understand the intentions here for Thor's clock-fetch
>> design change?
> I'm having trouble parsing your question, sorry. Please work with Thor
> so that we have a single way to get the clock information. My preference
> is to take it from the firmware, for the reason I stated above.
Hi Will,
Sure, thanks. I will work with Thor to get this going.
Hi Thor,
Does it sound okay to you to squash your patch [1] into my patch [2] with
your 'Signed-off-by' tag?
I will update the commit log to include the information about getting
clock details from device tree.
[1] https://patchwork.kernel.org/patch/10628725/
[2] https://patchwork.kernel.org/patch/10686061/
Best regards
Vivek
>
> Will
On 11/26/2018 11:33 AM, Vivek Gautam wrote:
>
>
> On 11/24/2018 12:06 AM, Will Deacon wrote:
>> On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
>>> Hi Will,
>>>
>>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]>
>>> wrote:
>>>> On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
>>>>> From: Sricharan R <[email protected]>
>>>>>
>>>>> The smmu device probe/remove and add/remove master device callbacks
>>>>> gets called when the smmu is not linked to its master, that is
>>>>> without
>>>>> the context of the master device. So calling runtime apis in those
>>>>> places
>>>>> separately.
>>>>> Global locks are also initialized before enabling runtime pm as the
>>>>> runtime_resume() calls device_reset() which does tlb_sync_global()
>>>>> that ultimately requires locks to be initialized.
>>>>>
>>>>> Signed-off-by: Sricharan R <[email protected]>
>>>>> [vivek: Cleanup pm runtime calls]
>>>>> Signed-off-by: Vivek Gautam <[email protected]>
>>>>> Reviewed-by: Tomasz Figa <[email protected]>
>>>>> Tested-by: Srinivas Kandagatla <[email protected]>
>>>>> Reviewed-by: Robin Murphy <[email protected]>
>>>>> ---
>>>>> drivers/iommu/arm-smmu.c | 101
>>>>> ++++++++++++++++++++++++++++++++++++++++++-----
>>>>> 1 file changed, 91 insertions(+), 10 deletions(-)
>>>> Given that you're doing the get/put in the TLBI ops unconditionally:
>>>>
>>>>> static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
>>>>> {
>>>>> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>>> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>
>>>>> - if (smmu_domain->tlb_ops)
>>>>> + if (smmu_domain->tlb_ops) {
>>>>> + arm_smmu_rpm_get(smmu);
>>>>> smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
>>>>> + arm_smmu_rpm_put(smmu);
>>>>> + }
>>>>> }
>>>>>
>>>>> static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
>>>>> {
>>>>> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
>>>>> + struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>
>>>>> - if (smmu_domain->tlb_ops)
>>>>> + if (smmu_domain->tlb_ops) {
>>>>> + arm_smmu_rpm_get(smmu);
>>>>> smmu_domain->tlb_ops->tlb_sync(smmu_domain);
>>>>> + arm_smmu_rpm_put(smmu);
>>>>> + }
>>>> Why do you need them around the map/unmap calls as well?
>>> We still have .tlb_add_flush path?
>> Ok, so we could add the ops around that as well. Right now, we've got
>> the runtime pm hooks crossing two parts of the API.
>
> Sure, will do that then, and remove the runtime pm hooks from map/unmap.
I missed this earlier -
We are adding runtime pm hooks in the 'iommu_ops' callbacks and not
really to
'tlb_ops'. So how the runtime pm hooks crossing the paths?
'.map/.unmap' iommu_ops don't call '.flush_iotlb_all' or '.iotlb_sync'
iommu_ops
anywhere.
E.g., only callers to domain->ops->flush_iotlb_all() are:
iommu_dma_flush_iotlb_all(), or iommu_flush_tlb_all() which are not in
map/unmap paths.
Regards
Vivek
>
> Thanks
> Vivek
>>
>> Will
>
Hi Vivek,
On 11/26/18 4:55 AM, Vivek Gautam wrote:
>
> On 11/24/2018 12:04 AM, Will Deacon wrote:
>> On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote:
>>> On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]> wrote:
>>>> On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
>>>> <[email protected]> wrote:
>>>>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]>
>>>>> wrote:
>>>>>> On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
>>>>>>> @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401,
>>>>>>> ARM_SMMU_V1_64K, GENERIC_SMMU);
>>>>>>> ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
>>>>>>> ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
>>>>>>>
>>>>>>> +static const char * const qcom_smmuv2_clks[] = {
>>>>>>> + "bus", "iface",
>>>>>>> +};
>>>>>>> +
>>>>>>> +static const struct arm_smmu_match_data qcom_smmuv2 = {
>>>>>>> + .version = ARM_SMMU_V2,
>>>>>>> + .model = QCOM_SMMUV2,
>>>>>>> + .clks = qcom_smmuv2_clks,
>>>>>>> + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
>>>>>>> +};
>>>>>> These seems redundant if we go down the route proposed by Thor,
>>>>>> where we
>>>>>> just pull all of the clocks out of the device-tree. In which case,
>>>>>> why
>>>>>> do we need this match_data at all?
>>>>> Which is better? Driver relying solely on the device tree to tell
>>>>> which all clocks
>>>>> are required to be enabled,
>>>>> or, the driver deciding itself based on the platform's match data,
>>>>> that it should
>>>>> have X, Y, & Z clocks that should be supplied from the device tree.
>>>> The former would simplify the driver, but would also make it
>>>> impossible to spot mistakes in DT, which would ultimately surface out
>>>> as very hard to debug bugs (likely complete system lockups).
>>> Thanks.
>>> Yea, this is how I understand things presently. Relying on device tree
>>> puts the things out of driver's control.
>> But it also has the undesirable effect of having to update the driver
>> code whenever we want to add support for a new SMMU implementation. If
>> we do this all in the DT, as Thor is trying to do, then older kernels
>> will work well with new hardware.
>>
>>> Hi Will,
>>> Am I unable to understand the intentions here for Thor's clock-fetch
>>> design change?
>> I'm having trouble parsing your question, sorry. Please work with Thor
>> so that we have a single way to get the clock information. My preference
>> is to take it from the firmware, for the reason I stated above.
> Hi Will,
>
> Sure, thanks. I will work with Thor to get this going.
>
> Hi Thor,
> Does it sound okay to you to squash your patch [1] into my patch [2] with
> your 'Signed-off-by' tag?
> I will update the commit log to include the information about getting
> clock details from device tree.
>
> [1] https://patchwork.kernel.org/patch/10628725/
> [2] https://patchwork.kernel.org/patch/10686061/
>
Yes, that would be great and easier to understand than my patch on top
of yours.
Additionally, can you remove the "Error:" as Will requested as part of
the squash?
Thank you!
Thor
> Best regards
> Vivek
>>
>> Will
>
>
Hi Thor,
On 11/26/2018 8:11 PM, Thor Thayer wrote:
> Hi Vivek,
>
> On 11/26/18 4:55 AM, Vivek Gautam wrote:
>>
>> On 11/24/2018 12:04 AM, Will Deacon wrote:
>>> On Fri, Nov 23, 2018 at 03:06:29PM +0530, Vivek Gautam wrote:
>>>> On Fri, Nov 23, 2018 at 2:52 PM Tomasz Figa <[email protected]>
>>>> wrote:
>>>>> On Fri, Nov 23, 2018 at 6:13 PM Vivek Gautam
>>>>> <[email protected]> wrote:
>>>>>> On Wed, Nov 21, 2018 at 11:09 PM Will Deacon
>>>>>> <[email protected]> wrote:
>>>>>>> On Fri, Nov 16, 2018 at 04:54:30PM +0530, Vivek Gautam wrote:
>>>>>>>> @@ -2026,6 +2027,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401,
>>>>>>>> ARM_SMMU_V1_64K, GENERIC_SMMU);
>>>>>>>> ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
>>>>>>>> ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
>>>>>>>>
>>>>>>>> +static const char * const qcom_smmuv2_clks[] = {
>>>>>>>> + "bus", "iface",
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +static const struct arm_smmu_match_data qcom_smmuv2 = {
>>>>>>>> + .version = ARM_SMMU_V2,
>>>>>>>> + .model = QCOM_SMMUV2,
>>>>>>>> + .clks = qcom_smmuv2_clks,
>>>>>>>> + .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
>>>>>>>> +};
>>>>>>> These seems redundant if we go down the route proposed by Thor,
>>>>>>> where we
>>>>>>> just pull all of the clocks out of the device-tree. In which
>>>>>>> case, why
>>>>>>> do we need this match_data at all?
>>>>>> Which is better? Driver relying solely on the device tree to tell
>>>>>> which all clocks
>>>>>> are required to be enabled,
>>>>>> or, the driver deciding itself based on the platform's match data,
>>>>>> that it should
>>>>>> have X, Y, & Z clocks that should be supplied from the device tree.
>>>>> The former would simplify the driver, but would also make it
>>>>> impossible to spot mistakes in DT, which would ultimately surface out
>>>>> as very hard to debug bugs (likely complete system lockups).
>>>> Thanks.
>>>> Yea, this is how I understand things presently. Relying on device tree
>>>> puts the things out of driver's control.
>>> But it also has the undesirable effect of having to update the driver
>>> code whenever we want to add support for a new SMMU implementation. If
>>> we do this all in the DT, as Thor is trying to do, then older kernels
>>> will work well with new hardware.
>>>
>>>> Hi Will,
>>>> Am I unable to understand the intentions here for Thor's clock-fetch
>>>> design change?
>>> I'm having trouble parsing your question, sorry. Please work with Thor
>>> so that we have a single way to get the clock information. My
>>> preference
>>> is to take it from the firmware, for the reason I stated above.
>> Hi Will,
>>
>> Sure, thanks. I will work with Thor to get this going.
>>
>> Hi Thor,
>> Does it sound okay to you to squash your patch [1] into my patch [2]
>> with
>> your 'Signed-off-by' tag?
>> I will update the commit log to include the information about getting
>> clock details from device tree.
>>
>> [1] https://patchwork.kernel.org/patch/10628725/
>> [2] https://patchwork.kernel.org/patch/10686061/
>>
>
> Yes, that would be great and easier to understand than my patch on top
> of yours.
>
> Additionally, can you remove the "Error:" as Will requested as part of
> the squash?
Thanks for your consent. I have reworked the patch today, and have
addressed Will's
comment. I will give a try on the board and post it by tomorrow.
Best regards
Vivek
>
> Thank you!
>
> Thor
>
>> Best regards
>> Vivek
>>>
>>> Will
>>
>>
>
On Mon, Nov 26, 2018 at 04:56:42PM +0530, Vivek Gautam wrote:
> On 11/26/2018 11:33 AM, Vivek Gautam wrote:
> >On 11/24/2018 12:06 AM, Will Deacon wrote:
> >>On Thu, Nov 22, 2018 at 05:32:24PM +0530, Vivek Gautam wrote:
> >>>On Wed, Nov 21, 2018 at 11:09 PM Will Deacon <[email protected]>
> >>>wrote:
> >>>>On Fri, Nov 16, 2018 at 04:54:27PM +0530, Vivek Gautam wrote:
> >>>>>From: Sricharan R <[email protected]>
> >>>>>
> >>>>>The smmu device probe/remove and add/remove master device callbacks
> >>>>>gets called when the smmu is not linked to its master, that is
> >>>>>without
> >>>>>the context of the master device. So calling runtime apis in those
> >>>>>places
> >>>>>separately.
> >>>>>Global locks are also initialized before enabling runtime pm as the
> >>>>>runtime_resume() calls device_reset() which does tlb_sync_global()
> >>>>>that ultimately requires locks to be initialized.
> >>>>>
> >>>>>Signed-off-by: Sricharan R <[email protected]>
> >>>>>[vivek: Cleanup pm runtime calls]
> >>>>>Signed-off-by: Vivek Gautam <[email protected]>
> >>>>>Reviewed-by: Tomasz Figa <[email protected]>
> >>>>>Tested-by: Srinivas Kandagatla <[email protected]>
> >>>>>Reviewed-by: Robin Murphy <[email protected]>
> >>>>>---
> >>>>>? drivers/iommu/arm-smmu.c | 101
> >>>>>++++++++++++++++++++++++++++++++++++++++++-----
> >>>>>? 1 file changed, 91 insertions(+), 10 deletions(-)
> >>>>Given that you're doing the get/put in the TLBI ops unconditionally:
> >>>>
> >>>>>? static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
> >>>>>? {
> >>>>>?????? struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >>>>>+???? struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>
> >>>>>-???? if (smmu_domain->tlb_ops)
> >>>>>+???? if (smmu_domain->tlb_ops) {
> >>>>>+???????????? arm_smmu_rpm_get(smmu);
> >>>>>smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
> >>>>>+???????????? arm_smmu_rpm_put(smmu);
> >>>>>+???? }
> >>>>>? }
> >>>>>
> >>>>>? static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
> >>>>>? {
> >>>>>?????? struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> >>>>>+???? struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>
> >>>>>-???? if (smmu_domain->tlb_ops)
> >>>>>+???? if (smmu_domain->tlb_ops) {
> >>>>>+???????????? arm_smmu_rpm_get(smmu);
> >>>>>smmu_domain->tlb_ops->tlb_sync(smmu_domain);
> >>>>>+???????????? arm_smmu_rpm_put(smmu);
> >>>>>+???? }
> >>>>Why do you need them around the map/unmap calls as well?
> >>>We still have .tlb_add_flush path?
> >>Ok, so we could add the ops around that as well. Right now, we've got
> >>the runtime pm hooks crossing two parts of the API.
> >
> >Sure, will do that then, and remove the runtime pm hooks from map/unmap.
>
> I missed this earlier -
> We are adding runtime pm hooks in the 'iommu_ops' callbacks and not really
> to
> 'tlb_ops'. So how the runtime pm hooks crossing the paths?
> '.map/.unmap' iommu_ops don't call '.flush_iotlb_all' or '.iotlb_sync'
> iommu_ops
> anywhere.
>
> E.g., only callers to domain->ops->flush_iotlb_all() are:
> iommu_dma_flush_iotlb_all(), or iommu_flush_tlb_all() which are not in
> map/unmap paths.
Yes, sorry, I got confused here and completely misled you. In which case,
your original patch is ok because it intercepts the core IOMMU API via
iommu_ops. Apologies.
At that level, should we also annotate arm_smmu_iova_to_phys_hard()
for the iova_to_phys() implementation?
With that detail and clock bits sorted out, we should be able to get this
queued at last.
Will