2021-03-28 23:36:42

by Dmitry Osipenko

[permalink] [raw]
Subject: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

All consumer-grade Android and Chromebook devices show a splash screen
on boot and then display is left enabled when kernel is booted. This
behaviour is unacceptable in a case of implicit IOMMU domains to which
devices are attached during kernel boot since devices, like display
controller, may perform DMA at that time. We can work around this problem
by deferring the enable of SMMU translation for a specific devices,
like a display controller, until the first IOMMU mapping is created,
which works good enough in practice because by that time h/w is already
stopped.

Signed-off-by: Dmitry Osipenko <[email protected]>
---
drivers/iommu/tegra-smmu.c | 71 ++++++++++++++++++++++++++++++++++++++
1 file changed, 71 insertions(+)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 602aab98c079..af1e4b5adb27 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -60,6 +60,8 @@ struct tegra_smmu_as {
dma_addr_t pd_dma;
unsigned id;
u32 attr;
+ bool display_attached[2];
+ bool attached_devices_need_sync;
};

static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom)
@@ -78,6 +80,10 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
return readl(smmu->regs + offset);
}

+/* all Tegra SoCs use the same group IDs for displays */
+#define SMMU_SWGROUP_DC 1
+#define SMMU_SWGROUP_DCB 2
+
#define SMMU_CONFIG 0x010
#define SMMU_CONFIG_ENABLE (1 << 0)

@@ -253,6 +259,20 @@ static inline void smmu_flush(struct tegra_smmu *smmu)
smmu_readl(smmu, SMMU_PTB_ASID);
}

+static int smmu_swgroup_to_display_id(unsigned int swgroup)
+{
+ switch (swgroup) {
+ case SMMU_SWGROUP_DC:
+ return 0;
+
+ case SMMU_SWGROUP_DCB:
+ return 1;
+
+ default:
+ return -1;
+ }
+}
+
static int tegra_smmu_alloc_asid(struct tegra_smmu *smmu, unsigned int *idp)
{
unsigned long id;
@@ -318,6 +338,9 @@ static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
as->domain.geometry.aperture_end = 0xffffffff;
as->domain.geometry.force_aperture = true;

+ /* work around implicit attachment of devices with active DMA */
+ as->attached_devices_need_sync = true;
+
return &as->domain;
}

@@ -410,6 +433,31 @@ static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup,
}
}

+static void tegra_smmu_attach_deferred_devices(struct iommu_domain *domain)
+{
+ struct tegra_smmu_as *as = to_smmu_as(domain);
+
+ if (!as->attached_devices_need_sync)
+ return;
+
+ if (as->display_attached[0] || as->display_attached[1]) {
+ struct tegra_smmu *smmu = as->smmu;
+ unsigned int i;
+
+ for (i = 0; i < smmu->soc->num_clients; i++) {
+ const struct tegra_mc_client *client = &smmu->soc->clients[i];
+ const int disp_id = smmu_swgroup_to_display_id(client->swgroup);
+
+ if (disp_id < 0 || !as->display_attached[disp_id])
+ continue;
+
+ tegra_smmu_enable(smmu, client->swgroup, as->id);
+ }
+ }
+
+ as->attached_devices_need_sync = false;
+}
+
static int tegra_smmu_as_prepare(struct tegra_smmu *smmu,
struct tegra_smmu_as *as)
{
@@ -495,10 +543,26 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain,
return -ENOENT;

for (index = 0; index < fwspec->num_ids; index++) {
+ const unsigned int swgroup = fwspec->ids[index];
+ const int disp_id = smmu_swgroup_to_display_id(swgroup);
+
err = tegra_smmu_as_prepare(smmu, as);
if (err)
goto disable;

+ if (disp_id >= 0) {
+ as->display_attached[disp_id] = true;
+
+ /*
+ * In most cases display is performing DMA before
+ * driver is initialized by showing a splash screen
+ * and in this case we should defer the h/w attachment
+ * until the first mapping is created by display driver.
+ */
+ if (as->attached_devices_need_sync)
+ continue;
+ }
+
tegra_smmu_enable(smmu, fwspec->ids[index], as->id);
}

@@ -527,6 +591,12 @@ static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *de
return;

for (index = 0; index < fwspec->num_ids; index++) {
+ const unsigned int swgroup = fwspec->ids[index];
+ const int disp_id = smmu_swgroup_to_display_id(swgroup);
+
+ if (disp_id >= 0)
+ as->display_attached[disp_id] = false;
+
tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
tegra_smmu_as_unprepare(smmu, as);
}
@@ -762,6 +832,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
int ret;

spin_lock_irqsave(&as->lock, flags);
+ tegra_smmu_attach_deferred_devices(domain);
ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags);
spin_unlock_irqrestore(&as->lock, flags);

--
2.30.2


2021-03-28 23:41:04

by Dmitry Osipenko

[permalink] [raw]
Subject: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

The previous commit fixes problem where display client was attaching too
early to IOMMU during kernel boot in a multi-platform kernel configuration
which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
revert it.

Signed-off-by: Dmitry Osipenko <[email protected]>
---
drivers/iommu/tegra-smmu.c | 71 +-------------------------------------
1 file changed, 1 insertion(+), 70 deletions(-)

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index af1e4b5adb27..572a4544ae88 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -869,69 +869,10 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
}

-static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
-{
- struct platform_device *pdev;
- struct tegra_mc *mc;
-
- pdev = of_find_device_by_node(np);
- if (!pdev)
- return NULL;
-
- mc = platform_get_drvdata(pdev);
- if (!mc)
- return NULL;
-
- return mc->smmu;
-}
-
-static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
- struct of_phandle_args *args)
-{
- const struct iommu_ops *ops = smmu->iommu.ops;
- int err;
-
- err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops);
- if (err < 0) {
- dev_err(dev, "failed to initialize fwspec: %d\n", err);
- return err;
- }
-
- err = ops->of_xlate(dev, args);
- if (err < 0) {
- dev_err(dev, "failed to parse SW group ID: %d\n", err);
- iommu_fwspec_free(dev);
- return err;
- }
-
- return 0;
-}
-
static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
{
- struct device_node *np = dev->of_node;
- struct tegra_smmu *smmu = NULL;
- struct of_phandle_args args;
- unsigned int index = 0;
- int err;
-
- while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
- &args) == 0) {
- smmu = tegra_smmu_find(args.np);
- if (smmu) {
- err = tegra_smmu_configure(smmu, dev, &args);
-
- if (err < 0) {
- of_node_put(args.np);
- return ERR_PTR(err);
- }
- }
-
- of_node_put(args.np);
- index++;
- }
+ struct tegra_smmu *smmu = dev_iommu_priv_get(dev);

- smmu = dev_iommu_priv_get(dev);
if (!smmu)
return ERR_PTR(-ENODEV);

@@ -1158,16 +1099,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
if (!smmu)
return ERR_PTR(-ENOMEM);

- /*
- * This is a bit of a hack. Ideally we'd want to simply return this
- * value. However the IOMMU registration process will attempt to add
- * all devices to the IOMMU when bus_set_iommu() is called. In order
- * not to rely on global variables to track the IOMMU instance, we
- * set it here so that it can be looked up from the .probe_device()
- * callback via the IOMMU device's .drvdata field.
- */
- mc->smmu = smmu;
-
size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);

smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);
--
2.30.2

2021-04-01 08:59:21

by Nicolin Chen

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
> The previous commit fixes problem where display client was attaching too
> early to IOMMU during kernel boot in a multi-platform kernel configuration
> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
> revert it.

Sorry for the late reply. I have been busy with downstream tasks.

I will give them a try by the end of the week. Yet, probably it'd
be better to include Guillaume also as he has the Nyan platform.

2021-04-02 14:44:02

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

01.04.2021 11:55, Nicolin Chen пишет:
> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>> The previous commit fixes problem where display client was attaching too
>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>> revert it.
>
> Sorry for the late reply. I have been busy with downstream tasks.
>
> I will give them a try by the end of the week. Yet, probably it'd
> be better to include Guillaume also as he has the Nyan platform.
>

Indeed, thanks. Although, I'm pretty sure that it's the same issue which
I reproduced on Nexus 7.

Guillaume, could you please give a test to these patches on Nyan Big?
There should be no EMEM errors in the kernel log with this patches.

https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215

2021-04-08 09:47:59

by Nicolin Chen

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

On Mon, Mar 29, 2021 at 02:32:55AM +0300, Dmitry Osipenko wrote:
> All consumer-grade Android and Chromebook devices show a splash screen
> on boot and then display is left enabled when kernel is booted. This
> behaviour is unacceptable in a case of implicit IOMMU domains to which
> devices are attached during kernel boot since devices, like display
> controller, may perform DMA at that time. We can work around this problem
> by deferring the enable of SMMU translation for a specific devices,
> like a display controller, until the first IOMMU mapping is created,
> which works good enough in practice because by that time h/w is already
> stopped.
>
> Signed-off-by: Dmitry Osipenko <[email protected]>

For both patches:
Acked-by: Nicolin Chen <[email protected]>
Tested-by: Nicolin Chen <[email protected]>

The WAR looks good to me. Perhaps Thierry would give some input.

Another topic:
I think this may help work around the mc-errors, which we have
been facing on Tegra210 also when we enable IOMMU_DOMAIN_DMA.
(attached a test patch rebasing on these two)

However, GPU would also report errors using DMA domain:

nouveau 57000000.gpu: acr: firmware unavailable
nouveau 57000000.gpu: pmu: firmware unavailable
nouveau 57000000.gpu: gr: firmware unavailable
tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffbe200: Security violation (TrustZone violation)
nouveau 57000000.gpu: DRM: failed to create kernel channel, -22
tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffad000: Security violation (TrustZone violation)
nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []
nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []

Looking at the address, seems that GPU allocated memory in 32-bit
physical address space behind SMMU, so a violation happened after
turning on DMA domain I guess...


Attachments:
(No filename) (1.83 kB)
dma_domain.patch (2.43 kB)
Download all attachments

2021-04-08 12:41:29

by Thierry Reding

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

On Mon, Mar 29, 2021 at 02:32:55AM +0300, Dmitry Osipenko wrote:
> All consumer-grade Android and Chromebook devices show a splash screen
> on boot and then display is left enabled when kernel is booted. This
> behaviour is unacceptable in a case of implicit IOMMU domains to which
> devices are attached during kernel boot since devices, like display
> controller, may perform DMA at that time. We can work around this problem
> by deferring the enable of SMMU translation for a specific devices,
> like a display controller, until the first IOMMU mapping is created,
> which works good enough in practice because by that time h/w is already
> stopped.
>
> Signed-off-by: Dmitry Osipenko <[email protected]>
> ---
> drivers/iommu/tegra-smmu.c | 71 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 71 insertions(+)

In general I do see why we would want to enable this. However, I think
this is a bad idea because it's going to proliferate the bad practice of
not describing things properly in device tree.

Whatever happened to the idea of creating identity mappings based on the
obscure tegra_fb_mem (or whatever it was called) command-line option? Is
that command-line not universally passed to the kernel from bootloaders
that initialize display?

That idealistic objection aside, this seems a bit over-engineered for
the hack that it is. See below.

> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
> index 602aab98c079..af1e4b5adb27 100644
> --- a/drivers/iommu/tegra-smmu.c
> +++ b/drivers/iommu/tegra-smmu.c
> @@ -60,6 +60,8 @@ struct tegra_smmu_as {
> dma_addr_t pd_dma;
> unsigned id;
> u32 attr;
> + bool display_attached[2];
> + bool attached_devices_need_sync;
> };
>
> static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom)
> @@ -78,6 +80,10 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
> return readl(smmu->regs + offset);
> }
>
> +/* all Tegra SoCs use the same group IDs for displays */
> +#define SMMU_SWGROUP_DC 1
> +#define SMMU_SWGROUP_DCB 2
> +
> #define SMMU_CONFIG 0x010
> #define SMMU_CONFIG_ENABLE (1 << 0)
>
> @@ -253,6 +259,20 @@ static inline void smmu_flush(struct tegra_smmu *smmu)
> smmu_readl(smmu, SMMU_PTB_ASID);
> }
>
> +static int smmu_swgroup_to_display_id(unsigned int swgroup)
> +{
> + switch (swgroup) {
> + case SMMU_SWGROUP_DC:
> + return 0;
> +
> + case SMMU_SWGROUP_DCB:
> + return 1;
> +
> + default:
> + return -1;
> + }
> +}
> +

Why do we need to have this two-level mapping? Do we even need to care
about the specific swgroups IDs? Can we not just simply check at attach
time if the client that's being attached is a display client and then
set atteched_devices_need_sync = true?

Thierry


Attachments:
(No filename) (2.76 kB)
signature.asc (849.00 B)
Download all attachments

2021-04-08 13:27:48

by Thierry Reding

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

On Thu, Apr 08, 2021 at 02:42:42AM -0700, Nicolin Chen wrote:
> On Mon, Mar 29, 2021 at 02:32:55AM +0300, Dmitry Osipenko wrote:
> > All consumer-grade Android and Chromebook devices show a splash screen
> > on boot and then display is left enabled when kernel is booted. This
> > behaviour is unacceptable in a case of implicit IOMMU domains to which
> > devices are attached during kernel boot since devices, like display
> > controller, may perform DMA at that time. We can work around this problem
> > by deferring the enable of SMMU translation for a specific devices,
> > like a display controller, until the first IOMMU mapping is created,
> > which works good enough in practice because by that time h/w is already
> > stopped.
> >
> > Signed-off-by: Dmitry Osipenko <[email protected]>
>
> For both patches:
> Acked-by: Nicolin Chen <[email protected]>
> Tested-by: Nicolin Chen <[email protected]>
>
> The WAR looks good to me. Perhaps Thierry would give some input.
>
> Another topic:
> I think this may help work around the mc-errors, which we have
> been facing on Tegra210 also when we enable IOMMU_DOMAIN_DMA.
> (attached a test patch rebasing on these two)

Ugh... that's exactly what I was afraid of. Now everybody is going to
think that we can just work around this issue with driver-specific SMMU
hacks...

> However, GPU would also report errors using DMA domain:
>
> nouveau 57000000.gpu: acr: firmware unavailable
> nouveau 57000000.gpu: pmu: firmware unavailable
> nouveau 57000000.gpu: gr: firmware unavailable
> tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffbe200: Security violation (TrustZone violation)
> nouveau 57000000.gpu: DRM: failed to create kernel channel, -22
> tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffad000: Security violation (TrustZone violation)
> nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []
> nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []
>
> Looking at the address, seems that GPU allocated memory in 32-bit
> physical address space behind SMMU, so a violation happened after
> turning on DMA domain I guess...

The problem with GPU is... extra complicated. You're getting these
faults because you're enabling the IOMMU-backed DMA API, which then
causes the Nouveau driver allocate buffers using the DMA API instead of
explicitly allocating pages and then mapping them using the IOMMU API.
However, there are additional patches needed to teach Nouveau about how
to deal with SMMU and those haven't been merged yet. I've got prototypes
of this, but before the whole framebuffer carveout passing work makes
progress there's little sense in moving individual pieces forward.

One more not to try and cut corners. We know what the right solution is,
even if it takes a lot of work. I'm willing to ack this patch, or some
version of it, but only as a way of working around things we have no
realistic chance of fixing properly anymore. I still think it would be
best if we could derive identity mappings from command-line arguments on
these platforms because I think most of them will actually set that, and
then the solution becomes at least uniform at the SMMU level.

For Tegra210 I've already laid out a path to a solution that's going to
be generic and extend to Tegra186 and later as well.

Thierry


Attachments:
(No filename) (3.30 kB)
signature.asc (849.00 B)
Download all attachments

2021-04-08 14:10:13

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

08.04.2021 15:40, Thierry Reding пишет:
> On Mon, Mar 29, 2021 at 02:32:55AM +0300, Dmitry Osipenko wrote:
>> All consumer-grade Android and Chromebook devices show a splash screen
>> on boot and then display is left enabled when kernel is booted. This
>> behaviour is unacceptable in a case of implicit IOMMU domains to which
>> devices are attached during kernel boot since devices, like display
>> controller, may perform DMA at that time. We can work around this problem
>> by deferring the enable of SMMU translation for a specific devices,
>> like a display controller, until the first IOMMU mapping is created,
>> which works good enough in practice because by that time h/w is already
>> stopped.
>>
>> Signed-off-by: Dmitry Osipenko <[email protected]>
>> ---
>> drivers/iommu/tegra-smmu.c | 71 ++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 71 insertions(+)
>
> In general I do see why we would want to enable this. However, I think
> this is a bad idea because it's going to proliferate the bad practice of
> not describing things properly in device tree.
>
> Whatever happened to the idea of creating identity mappings based on the
> obscure tegra_fb_mem (or whatever it was called) command-line option? Is
> that command-line not universally passed to the kernel from bootloaders
> that initialize display?

This is still a good idea! The command-line isn't universally passed
just because it's up to a user to override the cmdline and then we get a
hang (a very slow boot in reality) on T30 since display client takes out
the whole memory bus with the constant SMMU faults. For example I don't
have that cmdline option in my current setups.

> That idealistic objection aside, this seems a bit over-engineered for
> the hack that it is. See below.
>
>> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
>> index 602aab98c079..af1e4b5adb27 100644
>> --- a/drivers/iommu/tegra-smmu.c
>> +++ b/drivers/iommu/tegra-smmu.c
>> @@ -60,6 +60,8 @@ struct tegra_smmu_as {
>> dma_addr_t pd_dma;
>> unsigned id;
>> u32 attr;
>> + bool display_attached[2];
>> + bool attached_devices_need_sync;
>> };
>>
>> static struct tegra_smmu_as *to_smmu_as(struct iommu_domain *dom)
>> @@ -78,6 +80,10 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
>> return readl(smmu->regs + offset);
>> }
>>
>> +/* all Tegra SoCs use the same group IDs for displays */
>> +#define SMMU_SWGROUP_DC 1
>> +#define SMMU_SWGROUP_DCB 2
>> +
>> #define SMMU_CONFIG 0x010
>> #define SMMU_CONFIG_ENABLE (1 << 0)
>>
>> @@ -253,6 +259,20 @@ static inline void smmu_flush(struct tegra_smmu *smmu)
>> smmu_readl(smmu, SMMU_PTB_ASID);
>> }
>>
>> +static int smmu_swgroup_to_display_id(unsigned int swgroup)
>> +{
>> + switch (swgroup) {
>> + case SMMU_SWGROUP_DC:
>> + return 0;
>> +
>> + case SMMU_SWGROUP_DCB:
>> + return 1;
>> +
>> + default:
>> + return -1;
>> + }
>> +}
>> +
>
> Why do we need to have this two-level mapping? Do we even need to care
> about the specific swgroups IDs?

It's not clear to me what you're meaning here, the swgroup IDs are used
here for determining whether client belongs to a display controller.

> Can we not just simply check at attach
> time if the client that's being attached is a display client and then
> set atteched_devices_need_sync = true?

The reason I made atteched_devices_need_sync opt-out for display clients
instead of
opt-in is to make it clear and easy to override this option once we will
support the identity mappings.

- attached_devices_need_sync = true;
+ attached_devices_need_sync = no_identity_mapping_for_display;

2021-04-08 14:22:24

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

08.04.2021 16:26, Thierry Reding пишет:
> On Thu, Apr 08, 2021 at 02:42:42AM -0700, Nicolin Chen wrote:
>> On Mon, Mar 29, 2021 at 02:32:55AM +0300, Dmitry Osipenko wrote:
>>> All consumer-grade Android and Chromebook devices show a splash screen
>>> on boot and then display is left enabled when kernel is booted. This
>>> behaviour is unacceptable in a case of implicit IOMMU domains to which
>>> devices are attached during kernel boot since devices, like display
>>> controller, may perform DMA at that time. We can work around this problem
>>> by deferring the enable of SMMU translation for a specific devices,
>>> like a display controller, until the first IOMMU mapping is created,
>>> which works good enough in practice because by that time h/w is already
>>> stopped.
>>>
>>> Signed-off-by: Dmitry Osipenko <[email protected]>
>>
>> For both patches:
>> Acked-by: Nicolin Chen <[email protected]>
>> Tested-by: Nicolin Chen <[email protected]>
>>
>> The WAR looks good to me. Perhaps Thierry would give some input.

Nicolin, thank you very much for the help!

>> Another topic:
>> I think this may help work around the mc-errors, which we have
>> been facing on Tegra210 also when we enable IOMMU_DOMAIN_DMA.
>> (attached a test patch rebasing on these two)
>
> Ugh... that's exactly what I was afraid of. Now everybody is going to
> think that we can just work around this issue with driver-specific SMMU
> hacks...
>
>> However, GPU would also report errors using DMA domain:
>>
>> nouveau 57000000.gpu: acr: firmware unavailable
>> nouveau 57000000.gpu: pmu: firmware unavailable
>> nouveau 57000000.gpu: gr: firmware unavailable
>> tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffbe200: Security violation (TrustZone violation)
>> nouveau 57000000.gpu: DRM: failed to create kernel channel, -22
>> tegra-mc 70019000.memory-controller: gpusrd: read @0x00000000fffad000: Security violation (TrustZone violation)
>> nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []
>> nouveau 57000000.gpu: fifo: SCHED_ERROR 20 []
>>
>> Looking at the address, seems that GPU allocated memory in 32-bit
>> physical address space behind SMMU, so a violation happened after
>> turning on DMA domain I guess...
>
> The problem with GPU is... extra complicated. You're getting these
> faults because you're enabling the IOMMU-backed DMA API, which then
> causes the Nouveau driver allocate buffers using the DMA API instead of
> explicitly allocating pages and then mapping them using the IOMMU API.
> However, there are additional patches needed to teach Nouveau about how
> to deal with SMMU and those haven't been merged yet. I've got prototypes
> of this, but before the whole framebuffer carveout passing work makes
> progress there's little sense in moving individual pieces forward.
>
> One more not to try and cut corners. We know what the right solution is,
> even if it takes a lot of work. I'm willing to ack this patch, or some
> version of it, but only as a way of working around things we have no
> realistic chance of fixing properly anymore. I still think it would be
> best if we could derive identity mappings from command-line arguments on
> these platforms because I think most of them will actually set that, and
> then the solution becomes at least uniform at the SMMU level.
>
> For Tegra210 I've already laid out a path to a solution that's going to
> be generic and extend to Tegra186 and later as well.

We still have issues in the DRM and other drivers that don't allow us to
flip ON the IOMMU_DOMAIN_DMA support.

My patch addresses the issue with the ARM_DMA_USE_IOMMU option, which
allocates the unmanaged domain for DMA purposes on ARM32, causing the
trouble in the multiplatform kernel configuration since it's not
possible to opt-out from ARM_DMA_USE_IOMMU in this case. Perhaps this
needs to be clarified in the commit message.

https://elixir.bootlin.com/linux/v5.12-rc6/source/arch/arm/mm/dma-mapping.c#L2078

https://elixir.bootlin.com/linux/v5.12-rc6/source/drivers/iommu/iommu.c#L1929

2021-04-09 12:56:32

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 1/2] iommu/tegra-smmu: Defer attachment of display clients

08.04.2021 17:07, Dmitry Osipenko пишет:
>> Whatever happened to the idea of creating identity mappings based on the
>> obscure tegra_fb_mem (or whatever it was called) command-line option? Is
>> that command-line not universally passed to the kernel from bootloaders
>> that initialize display?
> This is still a good idea! The command-line isn't universally passed
> just because it's up to a user to override the cmdline and then we get a
> hang (a very slow boot in reality) on T30 since display client takes out
> the whole memory bus with the constant SMMU faults. For example I don't
> have that cmdline option in my current setups.
>

Thinking a bit more about the identity.. have you consulted with the
memory h/w people about whether it's always safe to enable ASID in a
middle of DMA request?

2021-04-23 15:04:33

by Guillaume Tucker

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

On 02/04/2021 15:40, Dmitry Osipenko wrote:
> 01.04.2021 11:55, Nicolin Chen пишет:
>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>>> The previous commit fixes problem where display client was attaching too
>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>>> revert it.
>>
>> Sorry for the late reply. I have been busy with downstream tasks.
>>
>> I will give them a try by the end of the week. Yet, probably it'd
>> be better to include Guillaume also as he has the Nyan platform.
>>
>
> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
> I reproduced on Nexus 7.
>
> Guillaume, could you please give a test to these patches on Nyan Big?
> There should be no EMEM errors in the kernel log with this patches.
>
> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215

So sorry for the very late reply. I have tried the patches but
hit some issues on linux-next, it's not reaching a login prompt
with next-20210422. So I then tried with next-20210419 which
does boot but shows the IOMMU error:

<6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
<4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping

https://lava.collabora.co.uk/scheduler/job/3570052#L1120

The branch I'm using with the patches applied can be found here:

https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/

Hope this helps, let me know if you need anything else to be
tested.

Best wishes,
Guillaume

2021-04-23 15:24:20

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

23.04.2021 18:01, Guillaume Tucker пишет:
> On 02/04/2021 15:40, Dmitry Osipenko wrote:
>> 01.04.2021 11:55, Nicolin Chen пишет:
>>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>>>> The previous commit fixes problem where display client was attaching too
>>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>>>> revert it.
>>>
>>> Sorry for the late reply. I have been busy with downstream tasks.
>>>
>>> I will give them a try by the end of the week. Yet, probably it'd
>>> be better to include Guillaume also as he has the Nyan platform.
>>>
>>
>> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
>> I reproduced on Nexus 7.
>>
>> Guillaume, could you please give a test to these patches on Nyan Big?
>> There should be no EMEM errors in the kernel log with this patches.
>>
>> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215
>
> So sorry for the very late reply. I have tried the patches but
> hit some issues on linux-next, it's not reaching a login prompt
> with next-20210422. So I then tried with next-20210419 which
> does boot but shows the IOMMU error:
>
> <6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
> <4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping
>
> https://lava.collabora.co.uk/scheduler/job/3570052#L1120
>
> The branch I'm using with the patches applied can be found here:
>
> https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/
>
> Hope this helps, let me know if you need anything else to be
> tested.


Hello Guillaume,

The current linux-next doesn't boot on all ARM (AFAIK), the older
next-20210413 works. The above message should be unrelated to the boot
problem. It should be okay to ignore that message as it should be
harmless in yours case.

2021-04-24 20:30:42

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

23.04.2021 18:23, Dmitry Osipenko пишет:
> 23.04.2021 18:01, Guillaume Tucker пишет:
>> On 02/04/2021 15:40, Dmitry Osipenko wrote:
>>> 01.04.2021 11:55, Nicolin Chen пишет:
>>>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>>>>> The previous commit fixes problem where display client was attaching too
>>>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>>>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>>>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>>>>> revert it.
>>>>
>>>> Sorry for the late reply. I have been busy with downstream tasks.
>>>>
>>>> I will give them a try by the end of the week. Yet, probably it'd
>>>> be better to include Guillaume also as he has the Nyan platform.
>>>>
>>>
>>> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
>>> I reproduced on Nexus 7.
>>>
>>> Guillaume, could you please give a test to these patches on Nyan Big?
>>> There should be no EMEM errors in the kernel log with this patches.
>>>
>>> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215
>>
>> So sorry for the very late reply. I have tried the patches but
>> hit some issues on linux-next, it's not reaching a login prompt
>> with next-20210422. So I then tried with next-20210419 which
>> does boot but shows the IOMMU error:
>>
>> <6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
>> <4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping
>>
>> https://lava.collabora.co.uk/scheduler/job/3570052#L1120
>>
>> The branch I'm using with the patches applied can be found here:
>>
>> https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/
>>
>> Hope this helps, let me know if you need anything else to be
>> tested.
>
>
> Hello Guillaume,
>
> The current linux-next doesn't boot on all ARM (AFAIK), the older
> next-20210413 works. The above message should be unrelated to the boot
> problem. It should be okay to ignore that message as it should be
> harmless in yours case.
>

Although, the 20210419 should be good.

Thierry, do you know what those SOR and Nouveau issues are about?

2021-04-24 20:42:53

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

24.04.2021 23:27, Dmitry Osipenko пишет:
> 23.04.2021 18:23, Dmitry Osipenko пишет:
>> 23.04.2021 18:01, Guillaume Tucker пишет:
>>> On 02/04/2021 15:40, Dmitry Osipenko wrote:
>>>> 01.04.2021 11:55, Nicolin Chen пишет:
>>>>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>>>>>> The previous commit fixes problem where display client was attaching too
>>>>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>>>>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>>>>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>>>>>> revert it.
>>>>>
>>>>> Sorry for the late reply. I have been busy with downstream tasks.
>>>>>
>>>>> I will give them a try by the end of the week. Yet, probably it'd
>>>>> be better to include Guillaume also as he has the Nyan platform.
>>>>>
>>>>
>>>> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
>>>> I reproduced on Nexus 7.
>>>>
>>>> Guillaume, could you please give a test to these patches on Nyan Big?
>>>> There should be no EMEM errors in the kernel log with this patches.
>>>>
>>>> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215
>>>
>>> So sorry for the very late reply. I have tried the patches but
>>> hit some issues on linux-next, it's not reaching a login prompt
>>> with next-20210422. So I then tried with next-20210419 which
>>> does boot but shows the IOMMU error:
>>>
>>> <6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
>>> <4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping
>>>
>>> https://lava.collabora.co.uk/scheduler/job/3570052#L1120
>>>
>>> The branch I'm using with the patches applied can be found here:
>>>
>>> https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/
>>>
>>> Hope this helps, let me know if you need anything else to be
>>> tested.
>>
>>
>> Hello Guillaume,
>>
>> The current linux-next doesn't boot on all ARM (AFAIK), the older
>> next-20210413 works. The above message should be unrelated to the boot
>> problem. It should be okay to ignore that message as it should be
>> harmless in yours case.
>>
>
> Although, the 20210419 should be good.
>
> Thierry, do you know what those SOR and Nouveau issues are about?
>

I don't see DRM driver being loaded at all and no errors related to it
in the log. This means that likely some of the DRM sub-drivers is stuck
in deferred probe.

Guillaume, could you please show contents of
/sys/kernel/debug/devices_deferred ?

These messages also don't look good:

tegra-xusb-padctl 7009f000.padctl: failed to setup XUSB ports: -22
tegra-xusb-padctl: probe of 7009f000.padctl failed with error -22

I don't have T124 and all these problems should be specific to the T124
platform. Somebody with a direct access to hardware should look into it.

2021-04-26 07:15:43

by Thierry Reding

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

On Sat, Apr 24, 2021 at 11:27:10PM +0300, Dmitry Osipenko wrote:
> 23.04.2021 18:23, Dmitry Osipenko пишет:
> > 23.04.2021 18:01, Guillaume Tucker пишет:
> >> On 02/04/2021 15:40, Dmitry Osipenko wrote:
> >>> 01.04.2021 11:55, Nicolin Chen пишет:
> >>>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
> >>>>> The previous commit fixes problem where display client was attaching too
> >>>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
> >>>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
> >>>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
> >>>>> revert it.
> >>>>
> >>>> Sorry for the late reply. I have been busy with downstream tasks.
> >>>>
> >>>> I will give them a try by the end of the week. Yet, probably it'd
> >>>> be better to include Guillaume also as he has the Nyan platform.
> >>>>
> >>>
> >>> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
> >>> I reproduced on Nexus 7.
> >>>
> >>> Guillaume, could you please give a test to these patches on Nyan Big?
> >>> There should be no EMEM errors in the kernel log with this patches.
> >>>
> >>> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215
> >>
> >> So sorry for the very late reply. I have tried the patches but
> >> hit some issues on linux-next, it's not reaching a login prompt
> >> with next-20210422. So I then tried with next-20210419 which
> >> does boot but shows the IOMMU error:
> >>
> >> <6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
> >> <4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping
> >>
> >> https://lava.collabora.co.uk/scheduler/job/3570052#L1120
> >>
> >> The branch I'm using with the patches applied can be found here:
> >>
> >> https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/
> >>
> >> Hope this helps, let me know if you need anything else to be
> >> tested.
> >
> >
> > Hello Guillaume,
> >
> > The current linux-next doesn't boot on all ARM (AFAIK), the older
> > next-20210413 works. The above message should be unrelated to the boot
> > problem. It should be okay to ignore that message as it should be
> > harmless in yours case.
> >
>
> Although, the 20210419 should be good.
>
> Thierry, do you know what those SOR and Nouveau issues are about?

There's a use-after-free (though it's really a use-before-init) issue in
linux-next at the moment, but a fix has been suggested. The fix for this
along with an additional leak plug is here:

http://patchwork.ozlabs.org/project/linux-tegra/list/?series=240569

I'm not aware of any Nouveau issues. What version and platform are those
happening on? Are there any logs? I can't seem to find them in this
thread.

Thierry


Attachments:
(No filename) (2.82 kB)
signature.asc (849.00 B)
Download all attachments

2021-04-26 07:49:57

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v1 2/2] iommu/tegra-smmu: Revert workaround that was needed for Nyan Big Chromebook

26.04.2021 10:14, Thierry Reding пишет:
> On Sat, Apr 24, 2021 at 11:27:10PM +0300, Dmitry Osipenko wrote:
>> 23.04.2021 18:23, Dmitry Osipenko пишет:
>>> 23.04.2021 18:01, Guillaume Tucker пишет:
>>>> On 02/04/2021 15:40, Dmitry Osipenko wrote:
>>>>> 01.04.2021 11:55, Nicolin Chen пишет:
>>>>>> On Mon, Mar 29, 2021 at 02:32:56AM +0300, Dmitry Osipenko wrote:
>>>>>>> The previous commit fixes problem where display client was attaching too
>>>>>>> early to IOMMU during kernel boot in a multi-platform kernel configuration
>>>>>>> which enables CONFIG_ARM_DMA_USE_IOMMU=y. The workaround that helped to
>>>>>>> defer the IOMMU attachment for Nyan Big Chromebook isn't needed anymore,
>>>>>>> revert it.
>>>>>>
>>>>>> Sorry for the late reply. I have been busy with downstream tasks.
>>>>>>
>>>>>> I will give them a try by the end of the week. Yet, probably it'd
>>>>>> be better to include Guillaume also as he has the Nyan platform.
>>>>>>
>>>>>
>>>>> Indeed, thanks. Although, I'm pretty sure that it's the same issue which
>>>>> I reproduced on Nexus 7.
>>>>>
>>>>> Guillaume, could you please give a test to these patches on Nyan Big?
>>>>> There should be no EMEM errors in the kernel log with this patches.
>>>>>
>>>>> https://patchwork.ozlabs.org/project/linux-tegra/list/?series=236215
>>>>
>>>> So sorry for the very late reply. I have tried the patches but
>>>> hit some issues on linux-next, it's not reaching a login prompt
>>>> with next-20210422. So I then tried with next-20210419 which
>>>> does boot but shows the IOMMU error:
>>>>
>>>> <6>[ 2.995341] tegra-dc 54200000.dc: Adding to iommu group 1
>>>> <4>[ 3.001070] Failed to attached device 54200000.dc to IOMMU_mapping
>>>>
>>>> https://lava.collabora.co.uk/scheduler/job/3570052#L1120
>>>>
>>>> The branch I'm using with the patches applied can be found here:
>>>>
>>>> https://gitlab.collabora.com/gtucker/linux/-/commits/next-20210419-nyan-big-drm-read/
>>>>
>>>> Hope this helps, let me know if you need anything else to be
>>>> tested.
>>>
>>>
>>> Hello Guillaume,
>>>
>>> The current linux-next doesn't boot on all ARM (AFAIK), the older
>>> next-20210413 works. The above message should be unrelated to the boot
>>> problem. It should be okay to ignore that message as it should be
>>> harmless in yours case.
>>>
>>
>> Although, the 20210419 should be good.
>>
>> Thierry, do you know what those SOR and Nouveau issues are about?
>
> There's a use-after-free (though it's really a use-before-init) issue in
> linux-next at the moment, but a fix has been suggested. The fix for this
> along with an additional leak plug is here:
>
> http://patchwork.ozlabs.org/project/linux-tegra/list/?series=240569

Nice, thank you. Maybe Guillaume could give it a test.

> I'm not aware of any Nouveau issues. What version and platform are those
> happening on? Are there any logs? I can't seem to find them in this
> thread.

This all is on Nyan Big using linux-next-20210419. There is a link to the full log above.

I see this Nouveau failure:

<6>[ 19.323113] nouveau 57000000.gpu: Adding to iommu group 2
<6>[ 19.323615] nouveau 57000000.gpu: NVIDIA GK20A (0ea000a1)
<6>[ 19.323668] nouveau 57000000.gpu: imem: using IOMMU
<3>[ 19.323795] nouveau 57000000.gpu: gr ctor failed: -2
<4>[ 19.323983] nouveau: probe of 57000000.gpu failed with error -2