LinuxLists.cc - [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

2019-08-29 22:47:42

Subject: [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

Add Nvidia SMMUv2 implementation and model info.

Signed-off-by: Krishna Reddy <[email protected]>
---
MAINTAINERS | 2 +
drivers/iommu/Makefile | 2 +-
drivers/iommu/arm-smmu-impl.c | 2 +
drivers/iommu/arm-smmu-nvidia.c | 97 +++++++++++++++++++++++++++++++++++++++++
drivers/iommu/arm-smmu.c | 2 +
drivers/iommu/arm-smmu.h | 2 +
6 files changed, 106 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/arm-smmu-nvidia.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 289fb06..b9d59e51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15785,9 +15785,11 @@ F: drivers/i2c/busses/i2c-tegra.c

TEGRA IOMMU DRIVERS
M: Thierry Reding <[email protected]>
+R: Krishna Reddy <[email protected]>
L: [email protected]
S: Supported
F: drivers/iommu/tegra*
+F: drivers/iommu/arm-smmu-nvidia.c

TEGRA KBC DRIVER
M: Laxman Dewangan <[email protected]>
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index a2729aa..7f5489e 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
-obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
+obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
index 5c87a38..e5e595f 100644
--- a/drivers/iommu/arm-smmu-impl.c
+++ b/drivers/iommu/arm-smmu-impl.c
@@ -162,6 +162,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
break;
case CAVIUM_SMMUV2:
return cavium_smmu_impl_init(smmu);
+ case NVIDIA_SMMUV2:
+ return nvidia_smmu_impl_init(smmu);
default:
break;
}
diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c
new file mode 100644
index 0000000..d93ceda
--- /dev/null
+++ b/drivers/iommu/arm-smmu-nvidia.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Nvidia ARM SMMU v2 implementation quirks
+// Copyright (C) 2019 NVIDIA CORPORATION. All rights reserved.
+
+#define pr_fmt(fmt) "nvidia-smmu: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "arm-smmu.h"
+
+#define NUM_SMMU_INSTANCES 3
+
+struct nvidia_smmu {
+ struct arm_smmu_device smmu;
+ int num_inst;
+ void __iomem *bases[NUM_SMMU_INSTANCES];
+};
+
+#define to_nsmmu(s) container_of(s, struct nvidia_smmu, smmu)
+
+#define nsmmu_page(smmu, inst, page) \
+ (((inst) ? to_nsmmu(smmu)->bases[(inst)] : smmu->base) + \
+ ((page) << smmu->pgshift))
+
+static u32 nsmmu_read_reg(struct arm_smmu_device *smmu,
+ int page, int offset)
+{
+ return readl_relaxed(nsmmu_page(smmu, 0, page) + offset);
+}
+
+static void nsmmu_write_reg(struct arm_smmu_device *smmu,
+ int page, int offset, u32 val)
+{
+ int i;
+
+ for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
+ writel_relaxed(val, nsmmu_page(smmu, i, page) + offset);
+}
+
+static u64 nsmmu_read_reg64(struct arm_smmu_device *smmu,
+ int page, int offset)
+{
+ return readq_relaxed(nsmmu_page(smmu, 0, page) + offset);
+}
+
+static void nsmmu_write_reg64(struct arm_smmu_device *smmu,
+ int page, int offset, u64 val)
+{
+ int i;
+
+ for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
+ writeq_relaxed(val, nsmmu_page(smmu, i, page) + offset);
+}
+
+static const struct arm_smmu_impl nsmmu_impl = {
+ .read_reg = nsmmu_read_reg,
+ .write_reg = nsmmu_write_reg,
+ .read_reg64 = nsmmu_read_reg64,
+ .write_reg64 = nsmmu_write_reg64,
+};
+
+struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
+{
+ int i;
+ struct nvidia_smmu *nsmmu;
+ struct resource *res;
+ struct device *dev = smmu->dev;
+ struct platform_device *pdev = to_platform_device(smmu->dev);
+
+ nsmmu = devm_kzalloc(smmu->dev, sizeof(*nsmmu), GFP_KERNEL);
+ if (!nsmmu)
+ return ERR_PTR(-ENOMEM);
+
+ nsmmu->smmu = *smmu;
+ /* Instance 0 is ioremapped by arm-smmu.c */
+ nsmmu->num_inst = 1;
+
+ for (i = 1; i < NUM_SMMU_INSTANCES; i++) {
+ res = platform_get_resource(pdev, IORESOURCE_MEM, i);
+ if (!res)
+ break;
+ nsmmu->bases[i] = devm_ioremap_resource(dev, res);
+ if (IS_ERR(nsmmu->bases[i]))
+ return (struct arm_smmu_device *)nsmmu->bases[i];
+ nsmmu->num_inst++;
+ }
+
+ nsmmu->smmu.impl = &nsmmu_impl;
+ devm_kfree(smmu->dev, smmu);
+ pr_info("Nvidia SMMUv2, Instances=%d\n", nsmmu->num_inst);
+
+ return &nsmmu->smmu;
+}
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5b93c79..46e1641 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1871,6 +1871,7 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
+ARM_SMMU_MATCH_DATA(nvidia_smmuv2, ARM_SMMU_V2, NVIDIA_SMMUV2);

static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
@@ -1880,6 +1881,7 @@ static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
+ { .compatible = "nvidia,smmu-v2", .data = &nvidia_smmuv2 },
{ },
};

diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
index b19b6ca..9645bf1 100644
--- a/drivers/iommu/arm-smmu.h
+++ b/drivers/iommu/arm-smmu.h
@@ -220,6 +220,7 @@ enum arm_smmu_implementation {
ARM_MMU500,
CAVIUM_SMMUV2,
QCOM_SMMUV2,
+ NVIDIA_SMMUV2,
};

struct arm_smmu_device {
@@ -398,5 +399,6 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))

struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
+struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu);

#endif /* _ARM_SMMU_H */
--
2.1.4

2019-08-30 15:05:32

by Robin Murphy

[permalink] [raw]

Subject: Re: [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

On 29/08/2019 23:47, Krishna Reddy wrote:
> Add Nvidia SMMUv2 implementation and model info.
>
> Signed-off-by: Krishna Reddy <[email protected]>
> ---
> MAINTAINERS | 2 +
> drivers/iommu/Makefile | 2 +-
> drivers/iommu/arm-smmu-impl.c | 2 +
> drivers/iommu/arm-smmu-nvidia.c | 97 +++++++++++++++++++++++++++++++++++++++++
> drivers/iommu/arm-smmu.c | 2 +
> drivers/iommu/arm-smmu.h | 2 +
> 6 files changed, 106 insertions(+), 1 deletion(-)
> create mode 100644 drivers/iommu/arm-smmu-nvidia.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 289fb06..b9d59e51 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -15785,9 +15785,11 @@ F: drivers/i2c/busses/i2c-tegra.c
>
> TEGRA IOMMU DRIVERS
> M: Thierry Reding <[email protected]>
> +R: Krishna Reddy <[email protected]>
> L: [email protected]
> S: Supported
> F: drivers/iommu/tegra*
> +F: drivers/iommu/arm-smmu-nvidia.c
>
> TEGRA KBC DRIVER
> M: Laxman Dewangan <[email protected]>
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index a2729aa..7f5489e 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -13,7 +13,7 @@ obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
> obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
> obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
> obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
> -obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o
> +obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
> obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
> obj-$(CONFIG_DMAR_TABLE) += dmar.o
> obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
> diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c
> index 5c87a38..e5e595f 100644
> --- a/drivers/iommu/arm-smmu-impl.c
> +++ b/drivers/iommu/arm-smmu-impl.c
> @@ -162,6 +162,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
> break;
> case CAVIUM_SMMUV2:
> return cavium_smmu_impl_init(smmu);
> + case NVIDIA_SMMUV2:
> + return nvidia_smmu_impl_init(smmu);
> default:
> break;
> }
> diff --git a/drivers/iommu/arm-smmu-nvidia.c b/drivers/iommu/arm-smmu-nvidia.c
> new file mode 100644
> index 0000000..d93ceda
> --- /dev/null
> +++ b/drivers/iommu/arm-smmu-nvidia.c
> @@ -0,0 +1,97 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +// Nvidia ARM SMMU v2 implementation quirks
> +// Copyright (C) 2019 NVIDIA CORPORATION. All rights reserved.
> +
> +#define pr_fmt(fmt) "nvidia-smmu: " fmt
> +
> +#include <linux/bitfield.h>
> +#include <linux/delay.h>
> +#include <linux/of.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +
> +#include "arm-smmu.h"
> +
> +#define NUM_SMMU_INSTANCES 3
> +
> +struct nvidia_smmu {
> + struct arm_smmu_device smmu;
> + int num_inst;
> + void __iomem *bases[NUM_SMMU_INSTANCES];
> +};
> +
> +#define to_nsmmu(s) container_of(s, struct nvidia_smmu, smmu)
> +
> +#define nsmmu_page(smmu, inst, page) \
> + (((inst) ? to_nsmmu(smmu)->bases[(inst)] : smmu->base) + \
> + ((page) << smmu->pgshift))
> +
> +static u32 nsmmu_read_reg(struct arm_smmu_device *smmu,
> + int page, int offset)
> +{
> + return readl_relaxed(nsmmu_page(smmu, 0, page) + offset);
> +}
> +
> +static void nsmmu_write_reg(struct arm_smmu_device *smmu,
> + int page, int offset, u32 val)
> +{
> + int i;
> +
> + for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
> + writel_relaxed(val, nsmmu_page(smmu, i, page) + offset);
> +}
> +
> +static u64 nsmmu_read_reg64(struct arm_smmu_device *smmu,
> + int page, int offset)
> +{
> + return readq_relaxed(nsmmu_page(smmu, 0, page) + offset);
> +}
> +
> +static void nsmmu_write_reg64(struct arm_smmu_device *smmu,
> + int page, int offset, u64 val)
> +{
> + int i;
> +
> + for (i = 0; i < to_nsmmu(smmu)->num_inst; i++)
> + writeq_relaxed(val, nsmmu_page(smmu, i, page) + offset);
> +}
> +
> +static const struct arm_smmu_impl nsmmu_impl = {
> + .read_reg = nsmmu_read_reg,
> + .write_reg = nsmmu_write_reg,
> + .read_reg64 = nsmmu_read_reg64,
> + .write_reg64 = nsmmu_write_reg64,
> +};
> +
> +struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu)
> +{
> + int i;
> + struct nvidia_smmu *nsmmu;
> + struct resource *res;
> + struct device *dev = smmu->dev;
> + struct platform_device *pdev = to_platform_device(smmu->dev);
> +
> + nsmmu = devm_kzalloc(smmu->dev, sizeof(*nsmmu), GFP_KERNEL);
> + if (!nsmmu)
> + return ERR_PTR(-ENOMEM);
> +
> + nsmmu->smmu = *smmu;
> + /* Instance 0 is ioremapped by arm-smmu.c */
> + nsmmu->num_inst = 1;
> +
> + for (i = 1; i < NUM_SMMU_INSTANCES; i++) {
> + res = platform_get_resource(pdev, IORESOURCE_MEM, i);
> + if (!res)
> + break;
> + nsmmu->bases[i] = devm_ioremap_resource(dev, res);
> + if (IS_ERR(nsmmu->bases[i]))
> + return (struct arm_smmu_device *)nsmmu->bases[i];
> + nsmmu->num_inst++;
> + }
> +
> + nsmmu->smmu.impl = &nsmmu_impl;
> + devm_kfree(smmu->dev, smmu);
> + pr_info("Nvidia SMMUv2, Instances=%d\n", nsmmu->num_inst);
> +
> + return &nsmmu->smmu;
> +}
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 5b93c79..46e1641 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -1871,6 +1871,7 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
> ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
> ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
> ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
> +ARM_SMMU_MATCH_DATA(nvidia_smmuv2, ARM_SMMU_V2, NVIDIA_SMMUV2);

From the previous discussions, I got the impression that other than the
'novel' way they're integrated, the actual SMMU implementations were
unmodified Arm MMU-500s. Is that the case, or have I misread something?

Robin.

>
> static const struct of_device_id arm_smmu_of_match[] = {
> { .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
> @@ -1880,6 +1881,7 @@ static const struct of_device_id arm_smmu_of_match[] = {
> { .compatible = "arm,mmu-500", .data = &arm_mmu500 },
> { .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
> { .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
> + { .compatible = "nvidia,smmu-v2", .data = &nvidia_smmuv2 },
> { },
> };
>
> diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h
> index b19b6ca..9645bf1 100644
> --- a/drivers/iommu/arm-smmu.h
> +++ b/drivers/iommu/arm-smmu.h
> @@ -220,6 +220,7 @@ enum arm_smmu_implementation {
> ARM_MMU500,
> CAVIUM_SMMUV2,
> QCOM_SMMUV2,
> + NVIDIA_SMMUV2,
> };
>
> struct arm_smmu_device {
> @@ -398,5 +399,6 @@ static inline void arm_smmu_writeq(struct arm_smmu_device *smmu, int page,
> arm_smmu_writeq((s), ARM_SMMU_CB((s), (n)), (o), (v))
>
> struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu);
> +struct arm_smmu_device *nvidia_smmu_impl_init(struct arm_smmu_device *smmu);
>
> #endif /* _ARM_SMMU_H */
>

2019-08-30 18:18:26

by Krishna Reddy

[permalink] [raw]

Subject: RE: [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

>> +ARM_SMMU_MATCH_DATA(nvidia_smmuv2, ARM_SMMU_V2, NVIDIA_SMMUV2);

> From the previous discussions, I got the impression that other than the 'novel' way they're integrated, the actual SMMU implementations were unmodified Arm MMU-500s. Is that the case, or have I misread something?

The ARM MMU-500 implementation is unmodified. It is the way the are integrated and used together(for interleaved accesses) is different from regular ARM MMU-500.
I have added it to get the model number and to be able differentiate the SMMU implementation in arm-smmu-impl.c.

-KR

2019-09-02 13:42:35

by Robin Murphy

[permalink] [raw]

Subject: Re: [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

On 30/08/2019 19:16, Krishna Reddy wrote:
>>> +ARM_SMMU_MATCH_DATA(nvidia_smmuv2, ARM_SMMU_V2, NVIDIA_SMMUV2);
>
>> From the previous discussions, I got the impression that other than the 'novel' way they're integrated, the actual SMMU implementations were unmodified Arm MMU-500s. Is that the case, or have I misread something?
>
> The ARM MMU-500 implementation is unmodified. It is the way the are integrated and used together(for interleaved accesses) is different from regular ARM MMU-500.
> I have added it to get the model number and to be able differentiate the SMMU implementation in arm-smmu-impl.c.

In that case, I would rather keep smmu->model representing the MMU-500
microarchitecture - since you'll still want to pick up errata
workarounds etc. for that - and detect the Tegra integration via an
explicit of_device_is_compatible() check in arm_smmu_impl_init(). For
comparison, under ACPI we'd probably have to detect integration details
by looking at table headers, separately from the IORT "Model" field, so
I'd prefer if the DT vs. ACPI handling didn't diverge more than necessary.

Of course, that immediately opens the question of how best to combine
arm_mmu500_impl with nsmmu_impl, but hey, one step at a time :)

Robin.

2019-09-03 01:08:57

by Krishna Reddy

[permalink] [raw]

Subject: RE: [PATCH 1/7] iommu/arm-smmu: add Nvidia SMMUv2 implementation

>>> +ARM_SMMU_MATCH_DATA(nvidia_smmuv2, ARM_SMMU_V2, NVIDIA_SMMUV2);

>> The ARM MMU-500 implementation is unmodified. It is the way the are integrated and used together(for interleaved accesses) is different from regular ARM MMU-500.
>> I have added it to get the model number and to be able differentiate the SMMU implementation in arm-smmu-impl.c.

>In that case, I would rather keep smmu->model representing the MMU-500 microarchitecture -
>since you'll still want to pick up errata workarounds etc. for that - and detect the Tegra integration via an explicit of_device_is_compatible()
> check in arm_smmu_impl_init().

Looks good to me.

>For comparison, under ACPI we'd probably have to detect integration details by looking at table headers, separately
> from the IORT "Model" field, so I'd prefer if the DT vs. ACPI handling didn't diverge more than necessary.

ACPI support for T194 can be added based on need in subsequent patches. For now, I am updating it for DT support.

-KR