This series of patches adds a new mappings node to debugfs for
tegra-smmu driver. The first five patches are all preparational
changes for PATCH-6, based on Thierry's review feedback against
v5: https://lkml.org/lkml/2021/3/16/447
Changelog
v6:
* Added PATCH1-3 for better naming conventions
* Added PATCH4-5 to embed previous struct tegra_smmu_group_debug
into struct tegra_smmu_group
* Dropped parentheses at SMMU_PTE_ATTR_SHIFT
* Dropped swgrp->reg print
* Replaced ptb_reg contents with as->attr and as->pd_dma
* Added "index" and "count" in the PD entries for readability
* Removed Dmitry's Tested-by and Reviewed-by for the big change
from v5 to v6.
v5: https://lkml.org/lkml/2021/3/15/2473
* Fixed a typo in commit message
* Split a long line into two lines
* Rearranged variable defines by length
* Added Tested-by and Reviewed-by from Dmitry
v4: https://lkml.org/lkml/2021/3/14/429
* Changed %d to %u for unsigned variables
* Fixed print format mismatch warnings on ARM32
v3: https://lkml.org/lkml/2021/3/14/30
* Fixed PHYS and IOVA print formats
* Changed variables to unsigned int type
* Changed the table outputs to be compact
v2: https://lkml.org/lkml/2021/3/9/1382
* Expanded mutex range to the entire function
* Added as->lock to protect pagetable walkthrough
* Replaced devm_kzalloc with devm_kcalloc for group_debug
* Added "PTE RANGE" and "SIZE" columns to group contiguous mappings
* Dropped as->count check
* Added WARN_ON when as->count mismatches pd[pd_index]
v1: https://lkml.org/lkml/2020/9/26/70
Nicolin Chen (6):
iommu/tegra-smmu: Rename struct iommu_group *group to *grp
iommu/tegra-smmu: Rename struct tegra_smmu_group_soc *soc to
*group_soc
iommu/tegra-smmu: Rename struct tegra_smmu_swgroup *group to *swgrp
iommu/tegra-smmu: Use swgrp pointer instead of swgroup id
iommu/tegra-smmu: Attach as pointer to tegra_smmu_group
iommu/tegra-smmu: Add pagetable mappings to debugfs
drivers/iommu/tegra-smmu.c | 312 +++++++++++++++++++++++++++++++------
1 file changed, 262 insertions(+), 50 deletions(-)
--
2.17.1
This patch changes in struct tegra_smmu_group to use swgrp
pointer instead of swgroup, as a preparational change for
the "mappings" debugfs feature.
Signed-off-by: Nicolin Chen <[email protected]>
---
drivers/iommu/tegra-smmu.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 0f3883045ffa..8fd4985ac91e 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -23,8 +23,8 @@ struct tegra_smmu_group {
struct list_head list;
struct tegra_smmu *smmu;
const struct tegra_smmu_group_soc *group_soc;
+ const struct tegra_smmu_swgroup *swgrp;
struct iommu_group *grp;
- unsigned int swgroup;
};
struct tegra_smmu {
@@ -897,18 +897,22 @@ static struct iommu_group *tegra_smmu_device_group(struct device *dev)
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
const struct tegra_smmu_group_soc *group_soc;
+ const struct tegra_smmu_swgroup *swgrp;
unsigned int swgroup = fwspec->ids[0];
struct tegra_smmu_group *group;
struct iommu_group *grp;
+ /* Find swgrp according to the swgroup id */
+ swgrp = tegra_smmu_find_swgrp(smmu, swgroup);
+
/* Find group_soc associating with swgroup */
group_soc = tegra_smmu_find_group_soc(smmu, swgroup);
mutex_lock(&smmu->lock);
- /* Find existing iommu_group associating with swgroup or group_soc */
+ /* Find existing iommu_group associating with swgrp or group_soc */
list_for_each_entry(group, &smmu->groups, list)
- if ((group->swgroup == swgroup) ||
+ if ((swgrp && group->swgrp == swgrp) ||
(group_soc && group->group_soc == group_soc)) {
grp = iommu_group_ref_get(group->grp);
mutex_unlock(&smmu->lock);
@@ -923,7 +927,7 @@ static struct iommu_group *tegra_smmu_device_group(struct device *dev)
INIT_LIST_HEAD(&group->list);
group->group_soc = group_soc;
- group->swgroup = swgroup;
+ group->swgrp = swgrp;
group->smmu = smmu;
if (dev_is_pci(dev))
--
2.17.1
This patch dumps all active mapping entries from pagetable
to a debugfs directory named "mappings".
Attaching an example:
SWGROUP: hc
as->id: 0
as->attr: R|W|N
as->pd_dma: 0x0000000080c03000
{
[index: 1023] 0xf0080c3e (count: 2)
{
PTE RANGE | ATTR | PHYS | IOVA | SIZE
[#1022, #1023] | 0x5 | 0x000000010bbf1000 | 0x00000000ffffe000 | 0x2000
}
}
Total PDE count: 1
Total PTE count: 2
Signed-off-by: Nicolin Chen <[email protected]>
---
drivers/iommu/tegra-smmu.c | 145 +++++++++++++++++++++++++++++++++++++
1 file changed, 145 insertions(+)
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 68c34a4a0ecc..aac977e181f6 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -46,6 +46,7 @@ struct tegra_smmu {
struct list_head list;
struct dentry *debugfs;
+ struct dentry *debugfs_mappings;
struct iommu_device iommu; /* IOMMU Core code handle */
};
@@ -153,6 +154,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
#define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
SMMU_PDE_NONSECURE)
+#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
+ SMMU_PTE_NONSECURE)
+#define SMMU_PTE_ATTR_SHIFT 29
static unsigned int iova_pd_index(unsigned long iova)
{
@@ -164,6 +168,12 @@ static unsigned int iova_pt_index(unsigned long iova)
return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
}
+static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
+{
+ return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
+ ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
+}
+
static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
{
addr >>= 12;
@@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
mutex_unlock(&smmu->lock);
}
+static const struct file_operations tegra_smmu_debugfs_mappings_fops;
+
static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
struct tegra_smmu_as *as,
unsigned int swgroup)
@@ -517,6 +529,12 @@ static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
dev_warn(smmu->dev,
"overwriting group->as for swgroup: %s\n", swgrp->name);
group->as = as;
+
+ if (smmu->debugfs_mappings)
+ debugfs_create_file(group->swgrp->name, 0444,
+ smmu->debugfs_mappings, group,
+ &tegra_smmu_debugfs_mappings_fops);
+
break;
}
@@ -541,6 +559,12 @@ static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
if (group->swgrp != swgrp)
continue;
group->as = NULL;
+
+ if (smmu->debugfs_mappings) {
+ d = debugfs_lookup(group->swgrp->name, smmu->debugfs_mappings);
+ debugfs_remove(d);
+ }
+
break;
}
@@ -1124,6 +1148,125 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data)
DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
+static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
+{
+ struct tegra_smmu_group *group = s->private;
+ const struct tegra_smmu_swgroup *swgrp;
+ struct tegra_smmu_as *as;
+ struct tegra_smmu *smmu;
+ unsigned int pd_index;
+ unsigned int pt_index;
+ unsigned long flags;
+ u64 pte_count = 0;
+ u32 pde_count = 0;
+ u32 *pd, val;
+
+ if (!group || !group->as || !group->swgrp)
+ return 0;
+
+ swgrp = group->swgrp;
+ smmu = group->smmu;
+ as = group->as;
+
+ mutex_lock(&smmu->lock);
+
+ val = smmu_readl(smmu, swgrp->reg) & SMMU_ASID_ENABLE;
+ if (!val)
+ goto unlock;
+
+ pd = page_address(as->pd);
+ if (!pd)
+ goto unlock;
+
+ seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
+ seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
+ as->attr & SMMU_PD_READABLE ? 'R' : '-',
+ as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
+ as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
+ &as->pd_dma);
+ seq_puts(s, "{\n");
+
+ spin_lock_irqsave(&as->lock, flags);
+
+ for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) {
+ struct page *pt_page;
+ unsigned int i;
+ u32 *addr;
+
+ /* An empty PDE should not have a pte use count */
+ WARN_ON_ONCE(!pd[pd_index] ^ !as->count[pd_index]);
+
+ /* Skip this empty PDE */
+ if (!pd[pd_index])
+ continue;
+
+ pde_count++;
+ pte_count += as->count[pd_index];
+ seq_printf(s, "\t[index: %u] 0x%x (count: %d)\n",
+ pd_index, pd[pd_index], as->count[pd_index]);
+ pt_page = as->pts[pd_index];
+ addr = page_address(pt_page);
+
+ seq_puts(s, "\t{\n");
+ seq_printf(s, "\t\t%-14s | %-4s | %-10s%s | %-10s%s | %-11s\n",
+ "PTE RANGE", "ATTR",
+ "PHYS", sizeof(phys_addr_t) > 4 ? " " : "",
+ "IOVA", sizeof(dma_addr_t) > 4 ? " " : "",
+ "SIZE");
+ for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index += i) {
+ size_t size = SMMU_SIZE_PT;
+ dma_addr_t iova;
+ phys_addr_t pa;
+
+ i = 1;
+
+ if (!addr[pt_index])
+ continue;
+
+ iova = pd_pt_index_iova(pd_index, pt_index);
+ pa = SMMU_PFN_PHYS(addr[pt_index] & ~SMMU_PTE_ATTR);
+
+ /* Check contiguous mappings and increase size */
+ while (pt_index + i < SMMU_NUM_PTE) {
+ dma_addr_t next_iova;
+ phys_addr_t next_pa;
+
+ if (!addr[pt_index + i])
+ break;
+
+ next_iova = pd_pt_index_iova(pd_index, pt_index + i);
+ next_pa = SMMU_PFN_PHYS(addr[pt_index + i] & ~SMMU_PTE_ATTR);
+
+ /* Break at the end of a linear mapping */
+ if ((next_iova - iova != SMMU_SIZE_PT * i) ||
+ (next_pa - pa != SMMU_SIZE_PT * i))
+ break;
+
+ i++;
+ }
+
+ seq_printf(s, "\t\t[#%-4u, #%-4u] | 0x%-2x | %pa | %pad | 0x%-9zx\n",
+ pt_index, pt_index + i - 1,
+ addr[pt_index] >> SMMU_PTE_ATTR_SHIFT,
+ &pa, &iova, size * i);
+ }
+ seq_puts(s, "\t}\n");
+ }
+
+ spin_unlock_irqrestore(&as->lock, flags);
+
+ seq_puts(s, "}\n");
+ seq_printf(s, "Total PDE count: %u\n", pde_count);
+ seq_printf(s, "Total PTE count: %llu\n", pte_count);
+
+unlock:
+ mutex_unlock(&smmu->lock);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(tegra_smmu_debugfs_mappings);
+
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
smmu->debugfs = debugfs_create_dir("smmu", NULL);
@@ -1134,6 +1277,8 @@ static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
&tegra_smmu_swgroups_fops);
debugfs_create_file("clients", S_IRUGO, smmu->debugfs, smmu,
&tegra_smmu_clients_fops);
+
+ smmu->debugfs_mappings = debugfs_create_dir("mappings", smmu->debugfs);
}
static void tegra_smmu_debugfs_exit(struct tegra_smmu *smmu)
--
2.17.1
14.09.2021 04:38, Nicolin Chen пишет:
> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> +{
> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> +}
We know that IOVA is fixed to u32 for this controller. Can we avoid all
these dma_addr_t castings? It should make code cleaner a tad, IMO.
On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> 14.09.2021 04:38, Nicolin Chen пишет:
> > +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> > +{
> > + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> > + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> > +}
>
> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> these dma_addr_t castings? It should make code cleaner a tad, IMO.
Tegra210 actually supports 34-bit IOVA...
14.09.2021 21:49, Nicolin Chen пишет:
> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>> 14.09.2021 04:38, Nicolin Chen пишет:
>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>> +{
>>> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>> +}
>>
>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
>
> Tegra210 actually supports 34-bit IOVA...
>
It doesn't. 34-bit is PA, 32-bit is VA.
Quote from T210 TRM:
"The SMMU is a centralized virtual-to-physical translation for MSS. It
maps a 32-bit virtual address to a 34-bit physical address. If the
client address is 40 bits then bits 39:32 are ignored."
Even if it supported more than 32bit, then the returned ulong is 32bit,
which doesn't make sense.
On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
> 14.09.2021 21:49, Nicolin Chen пишет:
> > On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> >> 14.09.2021 04:38, Nicolin Chen пишет:
> >>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> >>> +{
> >>> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>> +}
> >>
> >> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> >> these dma_addr_t castings? It should make code cleaner a tad, IMO.
> >
> > Tegra210 actually supports 34-bit IOVA...
> >
>
> It doesn't. 34-bit is PA, 32-bit is VA.
>
> Quote from T210 TRM:
>
> "The SMMU is a centralized virtual-to-physical translation for MSS. It
> maps a 32-bit virtual address to a 34-bit physical address. If the
> client address is 40 bits then bits 39:32 are ignored."
If you scroll down by a couple of sections, you can see 34-bit
virtual addresses in section 18.6.1.2; and if checking one ASID
register, you can see it mention the extra two bits va[33:32].
However, the driver currently sets its geometry.aperture_end to
32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
so I think it should be safe to remove the castings here. I'll
wait for a couple of days and see if there'd be other comments
for me to address in next version.
> Even if it supported more than 32bit, then the returned ulong is 32bit,
> which doesn't make sense.
On ARM64 (Tegra210), isn't ulong 64-bit?
15.09.2021 07:38, Nicolin Chen пишет:
> On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
>> 14.09.2021 21:49, Nicolin Chen пишет:
>>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>>>> 14.09.2021 04:38, Nicolin Chen пишет:
>>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>>>> +{
>>>>> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>>>> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>>>> +}
>>>>
>>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
>>>
>>> Tegra210 actually supports 34-bit IOVA...
>>>
>>
>> It doesn't. 34-bit is PA, 32-bit is VA.
>>
>> Quote from T210 TRM:
>>
>> "The SMMU is a centralized virtual-to-physical translation for MSS. It
>> maps a 32-bit virtual address to a 34-bit physical address. If the
>> client address is 40 bits then bits 39:32 are ignored."
>
> If you scroll down by a couple of sections, you can see 34-bit
> virtual addresses in section 18.6.1.2; and if checking one ASID
> register, you can see it mention the extra two bits va[33:32].
Thanks for the pointer. It says that only certain memory clients allow
to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
split into 4GB areas and there are additional bitfields which configure
the ASID mapping of each 4GB area. Still each ASID is 32bit.
This is what TRM says:
"For the GPU and other clients with 34-bit address interfaces, the ASID
registers are extended to point to four ASIDs. The SMMU supports 4GB of
virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
extends the virtual address space of a client to 16GB."
> However, the driver currently sets its geometry.aperture_end to
> 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
> so I think it should be safe to remove the castings here. I'll
> wait for a couple of days and see if there'd be other comments
> for me to address in next version.
You will need to read the special "ASID Assignment Register" which
supports 4 sub-ASIDs to translate the PA address into the actual VA. By
default all clients are limited to a single ASID and upstream kernel
doesn't support programming of 34bit VAs. So doesn't worth the effort to
fully translate the VA, IMO.
>> Even if it supported more than 32bit, then the returned ulong is 32bit,
>> which doesn't make sense.
>
> On ARM64 (Tegra210), isn't ulong 64-bit?
Yes, indeed.
15.09.2021 15:09, Dmitry Osipenko пишет:
> 15.09.2021 07:38, Nicolin Chen пишет:
>> On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
>>> 14.09.2021 21:49, Nicolin Chen пишет:
>>>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
>>>>> 14.09.2021 04:38, Nicolin Chen пишет:
>>>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
>>>>>> +{
>>>>>> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
>>>>>> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
>>>>>> +}
>>>>>
>>>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
>>>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
>>>>
>>>> Tegra210 actually supports 34-bit IOVA...
>>>>
>>>
>>> It doesn't. 34-bit is PA, 32-bit is VA.
>>>
>>> Quote from T210 TRM:
>>>
>>> "The SMMU is a centralized virtual-to-physical translation for MSS. It
>>> maps a 32-bit virtual address to a 34-bit physical address. If the
>>> client address is 40 bits then bits 39:32 are ignored."
>>
>> If you scroll down by a couple of sections, you can see 34-bit
>> virtual addresses in section 18.6.1.2; and if checking one ASID
>> register, you can see it mention the extra two bits va[33:32].
>
> Thanks for the pointer. It says that only certain memory clients allow
> to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
> split into 4GB areas and there are additional bitfields which configure
> the ASID mapping of each 4GB area. Still each ASID is 32bit.
>
> This is what TRM says:
>
> "For the GPU and other clients with 34-bit address interfaces, the ASID
> registers are extended to point to four ASIDs. The SMMU supports 4GB of
> virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
> extends the virtual address space of a client to 16GB."
>
>> However, the driver currently sets its geometry.aperture_end to
>> 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
>> so I think it should be safe to remove the castings here. I'll
>> wait for a couple of days and see if there'd be other comments
>> for me to address in next version.
>
> You will need to read the special "ASID Assignment Register" which
> supports 4 sub-ASIDs to translate the PA address into the actual VA. By
* VA to PA
> default all clients are limited to a single ASID and upstream kernel
> doesn't support programming of 34bit VAs. So doesn't worth the effort to
> fully translate the VA, IMO.
>
>>> Even if it supported more than 32bit, then the returned ulong is 32bit,
>>> which doesn't make sense.
>>
>> On ARM64 (Tegra210), isn't ulong 64-bit?
>
> Yes, indeed.
>
On Wed, Sep 15, 2021 at 03:09:48PM +0300, Dmitry Osipenko wrote:
> 15.09.2021 07:38, Nicolin Chen пишет:
> > On Tue, Sep 14, 2021 at 10:20:30PM +0300, Dmitry Osipenko wrote:
> >> 14.09.2021 21:49, Nicolin Chen пишет:
> >>> On Tue, Sep 14, 2021 at 04:29:15PM +0300, Dmitry Osipenko wrote:
> >>>> 14.09.2021 04:38, Nicolin Chen пишет:
> >>>>> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> >>>>> +{
> >>>>> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> >>>>> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> >>>>> +}
> >>>>
> >>>> We know that IOVA is fixed to u32 for this controller. Can we avoid all
> >>>> these dma_addr_t castings? It should make code cleaner a tad, IMO.
> >>>
> >>> Tegra210 actually supports 34-bit IOVA...
> >>>
> >>
> >> It doesn't. 34-bit is PA, 32-bit is VA.
> >>
> >> Quote from T210 TRM:
> >>
> >> "The SMMU is a centralized virtual-to-physical translation for MSS. It
> >> maps a 32-bit virtual address to a 34-bit physical address. If the
> >> client address is 40 bits then bits 39:32 are ignored."
> >
> > If you scroll down by a couple of sections, you can see 34-bit
> > virtual addresses in section 18.6.1.2; and if checking one ASID
> > register, you can see it mention the extra two bits va[33:32].
>
> Thanks for the pointer. It says that only certain memory clients allow
> to combine 4 ASIDs to form 34bit VA space. In this case the PA space is
> split into 4GB areas and there are additional bitfields which configure
> the ASID mapping of each 4GB area. Still each ASID is 32bit.
True.
> This is what TRM says:
>
> "For the GPU and other clients with 34-bit address interfaces, the ASID
> registers are extended to point to four ASIDs. The SMMU supports 4GB of
> virtual address space per ASID, so mapping addr[33:32] into ASID[1:0]
> extends the virtual address space of a client to 16GB."
>
> > However, the driver currently sets its geometry.aperture_end to
> > 32-bit, and we can only get 32-bit IOVAs using PDE and PTE only,
> > so I think it should be safe to remove the castings here. I'll
> > wait for a couple of days and see if there'd be other comments
> > for me to address in next version.
>
> You will need to read the special "ASID Assignment Register" which
> supports 4 sub-ASIDs to translate the PA address into the actual VA. By
> default all clients are limited to a single ASID and upstream kernel
> doesn't support programming of 34bit VAs. So doesn't worth the effort to
> fully translate the VA, IMO.
Yea. It'd be easier to just stay in 32-bit. I will remove those
castings in the next version, waiting for Thierry taking a look
at this v6 first.
On Mon, Sep 13, 2021 at 06:38:56PM -0700, Nicolin Chen wrote:
> This patch changes in struct tegra_smmu_group to use swgrp
> pointer instead of swgroup, as a preparational change for
> the "mappings" debugfs feature.
>
> Signed-off-by: Nicolin Chen <[email protected]>
> ---
> drivers/iommu/tegra-smmu.c | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
Seems fine:
Acked-by: Thierry Reding <[email protected]>
On Mon, Sep 13, 2021 at 06:38:58PM -0700, Nicolin Chen wrote:
> This patch dumps all active mapping entries from pagetable
> to a debugfs directory named "mappings".
>
> Attaching an example:
>
> SWGROUP: hc
> as->id: 0
> as->attr: R|W|N
> as->pd_dma: 0x0000000080c03000
> {
> [index: 1023] 0xf0080c3e (count: 2)
> {
> PTE RANGE | ATTR | PHYS | IOVA | SIZE
> [#1022, #1023] | 0x5 | 0x000000010bbf1000 | 0x00000000ffffe000 | 0x2000
> }
> }
> Total PDE count: 1
> Total PTE count: 2
>
> Signed-off-by: Nicolin Chen <[email protected]>
> ---
> drivers/iommu/tegra-smmu.c | 145 +++++++++++++++++++++++++++++++++++++
> 1 file changed, 145 insertions(+)
>
> diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
> index 68c34a4a0ecc..aac977e181f6 100644
> --- a/drivers/iommu/tegra-smmu.c
> +++ b/drivers/iommu/tegra-smmu.c
> @@ -46,6 +46,7 @@ struct tegra_smmu {
> struct list_head list;
>
> struct dentry *debugfs;
> + struct dentry *debugfs_mappings;
>
> struct iommu_device iommu; /* IOMMU Core code handle */
> };
> @@ -153,6 +154,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset)
>
> #define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \
> SMMU_PDE_NONSECURE)
> +#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \
> + SMMU_PTE_NONSECURE)
> +#define SMMU_PTE_ATTR_SHIFT 29
>
> static unsigned int iova_pd_index(unsigned long iova)
> {
> @@ -164,6 +168,12 @@ static unsigned int iova_pt_index(unsigned long iova)
> return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1);
> }
>
> +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index)
> +{
> + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT |
> + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT;
> +}
> +
> static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr)
> {
> addr >>= 12;
> @@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
> mutex_unlock(&smmu->lock);
> }
>
> +static const struct file_operations tegra_smmu_debugfs_mappings_fops;
Could the implementation be moved up here to avoid the forward
declaration?
> +
> static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
> struct tegra_smmu_as *as,
> unsigned int swgroup)
> @@ -517,6 +529,12 @@ static void tegra_smmu_attach_as(struct tegra_smmu *smmu,
> dev_warn(smmu->dev,
> "overwriting group->as for swgroup: %s\n", swgrp->name);
> group->as = as;
> +
> + if (smmu->debugfs_mappings)
> + debugfs_create_file(group->swgrp->name, 0444,
> + smmu->debugfs_mappings, group,
> + &tegra_smmu_debugfs_mappings_fops);
> +
> break;
> }
>
> @@ -541,6 +559,12 @@ static void tegra_smmu_detach_as(struct tegra_smmu *smmu,
> if (group->swgrp != swgrp)
> continue;
> group->as = NULL;
> +
> + if (smmu->debugfs_mappings) {
> + d = debugfs_lookup(group->swgrp->name, smmu->debugfs_mappings);
> + debugfs_remove(d);
> + }
> +
> break;
> }
>
> @@ -1124,6 +1148,125 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data)
>
> DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
>
> +static int tegra_smmu_debugfs_mappings_show(struct seq_file *s, void *data)
> +{
> + struct tegra_smmu_group *group = s->private;
> + const struct tegra_smmu_swgroup *swgrp;
> + struct tegra_smmu_as *as;
> + struct tegra_smmu *smmu;
> + unsigned int pd_index;
> + unsigned int pt_index;
> + unsigned long flags;
> + u64 pte_count = 0;
> + u32 pde_count = 0;
> + u32 *pd, val;
> +
> + if (!group || !group->as || !group->swgrp)
> + return 0;
> +
> + swgrp = group->swgrp;
> + smmu = group->smmu;
> + as = group->as;
> +
> + mutex_lock(&smmu->lock);
> +
> + val = smmu_readl(smmu, swgrp->reg) & SMMU_ASID_ENABLE;
> + if (!val)
> + goto unlock;
> +
> + pd = page_address(as->pd);
> + if (!pd)
> + goto unlock;
> +
> + seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
> + seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
> + as->attr & SMMU_PD_READABLE ? 'R' : '-',
> + as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
> + as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
> + &as->pd_dma);
> + seq_puts(s, "{\n");
Maybe this can be more compact by putting the name, ID, attributes and
base address onto a single line? Maybe also use "'-' : 'S'" for the
non-secure attribute to keep in line with what you've done for readable
and writable attributes.
Then again, this is going to be very verbose output anyway, so maybe it
isn't worth it.
Thierry
On Thu, Oct 07, 2021 at 07:13:25PM +0200, Thierry Reding wrote:
> > @@ -496,6 +506,8 @@ static void tegra_smmu_as_unprepare(struct tegra_smmu *smmu,
> > mutex_unlock(&smmu->lock);
> > }
> >
> > +static const struct file_operations tegra_smmu_debugfs_mappings_fops;
>
> Could the implementation be moved up here to avoid the forward
> declaration?
I thought that keeping all debugfs fops together would be preferable.
But yes, I will move it if you prefer no-additional forward declare.
> > + seq_printf(s, "\nSWGROUP: %s\n", swgrp->name);
> > + seq_printf(s, "as->id: %d\nas->attr: %c|%c|%s\nas->pd_dma: %pad\n", as->id,
> > + as->attr & SMMU_PD_READABLE ? 'R' : '-',
> > + as->attr & SMMU_PD_WRITABLE ? 'W' : '-',
> > + as->attr & SMMU_PD_NONSECURE ? "NS" : "S",
> > + &as->pd_dma);
> > + seq_puts(s, "{\n");
>
> Maybe this can be more compact by putting the name, ID, attributes and
> base address onto a single line? Maybe also use "'-' : 'S'" for the
> non-secure attribute to keep in line with what you've done for readable
> and writable attributes.
Okay. Will change that.
> Then again, this is going to be very verbose output anyway, so maybe it
> isn't worth it.
Are you saying the whole debugfs thing or just attributes? Yet, for
either case, I don't think so, as mappings info would help for sure
from our past experience while the attributes are just one line...