Remove struct arm_smmu_s1_cfg. This is really just a CD table with a
bit of extra information. Enhance the existing CD table structure,
struct arm_smmu_ctx_desc_cfg, with max_cds_bits and replace all usages
of arm_smmu_s1_cfg with arm_smmu_ctx_desc_cfg.
Compute the other values that were stored in s1cfg directly from
existing arm_smmu_ctx_desc_cfg.
For clarity, use the name "cd_table" for the variables pointing to
arm_smmu_ctx_desc_cfg in the new code instead of cdcfg. A later patch
will make this fully consistent.
Reviewed-by: Jason Gunthorpe <[email protected]>
Reviewed-by: Nicolin Chen <[email protected]>
Signed-off-by: Michael Shavit <[email protected]>
---
(no changes since v3)
Changes in v3:
- Updated commit messages again
- Replace more usages of cdcfg with cdtable (lines that were already
touched by this commit anyways).
Changes in v2:
- Updated commit message
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 45 +++++++++++----------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 10 ++---
2 files changed, 26 insertions(+), 29 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bb277ff86f65f..ded613aedbb04 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1033,9 +1033,9 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
- if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ if (!cdcfg->l1_desc)
return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
@@ -1071,7 +1071,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
bool cd_live;
__le64 *cdptr;
- if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits)))
return -E2BIG;
cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
@@ -1138,19 +1138,16 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
size_t l1size;
size_t max_contexts;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
- max_contexts = 1 << cfg->s1cdmax;
+ max_contexts = 1 << cdcfg->max_cds_bits;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cdcfg->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
@@ -1186,7 +1183,7 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
int i;
size_t size, l1size;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
if (cdcfg->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
@@ -1276,7 +1273,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
- struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1291,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
- s1_cfg = &smmu_domain->s1_cfg;
+ cd_table = &smmu_domain->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1322,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
@@ -1344,7 +1341,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
- if (s1_cfg) {
+ if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
@@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
- FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+ val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX,
+ cd_table->max_cds_bits) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT,
+ cd_table->l1_desc ?
+ STRTAB_STE_0_S1FMT_64K_L2 :
+ STRTAB_STE_0_S1FMT_LINEAR);
}
if (s2_cfg) {
@@ -2082,11 +2083,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
/* Free the CD and ASID, if we allocated them */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- if (cfg->cdcfg.cdtab)
+ if (cd_table->cdtab)
arm_smmu_free_cd_tables(smmu_domain);
arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
@@ -2106,7 +2107,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
@@ -2119,7 +2120,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
if (ret)
goto out_unlock;
- cfg->s1cdmax = master->ssid_bits;
+ cd_table->max_cds_bits = master->ssid_bits;
smmu_domain->stall_enabled = master->stall_enabled;
@@ -2457,7 +2458,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index f841383a55a35..35a93e8858872 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,12 +595,8 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
- struct arm_smmu_ctx_desc_cfg cdcfg;
- u8 s1fmt;
- u8 s1cdmax;
+ /* log2 of the maximum number of CDs supported by this table */
+ u8 max_cds_bits;
};
struct arm_smmu_s2_cfg {
@@ -725,7 +721,7 @@ struct arm_smmu_domain {
union {
struct {
struct arm_smmu_ctx_desc cd;
- struct arm_smmu_s1_cfg s1_cfg;
+ struct arm_smmu_ctx_desc_cfg cd_table;
};
struct arm_smmu_s2_cfg s2_cfg;
};
--
2.41.0.640.ga95def55d0-goog
On Wed, Aug 09, 2023 at 01:11:58AM +0800, Michael Shavit wrote:
> Remove struct arm_smmu_s1_cfg. This is really just a CD table with a
> bit of extra information. Enhance the existing CD table structure,
> struct arm_smmu_ctx_desc_cfg, with max_cds_bits and replace all usages
> of arm_smmu_s1_cfg with arm_smmu_ctx_desc_cfg.
>
> Compute the other values that were stored in s1cfg directly from
> existing arm_smmu_ctx_desc_cfg.
>
> For clarity, use the name "cd_table" for the variables pointing to
> arm_smmu_ctx_desc_cfg in the new code instead of cdcfg. A later patch
> will make this fully consistent.
>
> Reviewed-by: Jason Gunthorpe <[email protected]>
> Reviewed-by: Nicolin Chen <[email protected]>
> Signed-off-by: Michael Shavit <[email protected]>
> ---
Sorry, but I'm having a hard time seeing some of the benefits of this
particular change. Most of the rest of the series looks good, but see
below:
> @@ -1071,7 +1071,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
> bool cd_live;
> __le64 *cdptr;
>
> - if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
> + if (WARN_ON(ssid >= (1 << smmu_domain->cd_table.max_cds_bits)))
> return -E2BIG;
S1CDMAX is architectural terminology -- it's the name given to bits 63:59
of the STE structure. Why is "max_cds_bits" better?
> cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
> @@ -1138,19 +1138,16 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
> size_t l1size;
> size_t max_contexts;
> struct arm_smmu_device *smmu = smmu_domain->smmu;
> - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
> - struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
> + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
>
> - max_contexts = 1 << cfg->s1cdmax;
> + max_contexts = 1 << cdcfg->max_cds_bits;
>
> if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
> max_contexts <= CTXDESC_L2_ENTRIES) {
> - cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
> cdcfg->num_l1_ents = max_contexts;
>
> l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
> } else {
> - cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
And here we're dropping the S1FMT setting from the code allocating the
CD tables (i.e. the only code which should be aware of it's configuration)
and now having the low-level STE writing logic here:
> @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> !master->stall_enabled)
> dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
>
> - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
> - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> + FIELD_PREP(STRTAB_STE_0_S1CDMAX,
> + cd_table->max_cds_bits) |
> + FIELD_PREP(STRTAB_STE_0_S1FMT,
> + cd_table->l1_desc ?
> + STRTAB_STE_0_S1FMT_64K_L2 :
> + STRTAB_STE_0_S1FMT_LINEAR);
magically know that we're using 64k tables.
Why is this an improvement to the driver?
Will
On Wed, Aug 09, 2023 at 03:55:43PM +0100, Will Deacon wrote:
> On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote:
> > On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote:
> >
> > > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> > > > !master->stall_enabled)
> > > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> > > >
> > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
> > > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> > > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX,
> > > > + cd_table->max_cds_bits) |
> > > > + FIELD_PREP(STRTAB_STE_0_S1FMT,
> > > > + cd_table->l1_desc ?
> > > > + STRTAB_STE_0_S1FMT_64K_L2 :
> > > > + STRTAB_STE_0_S1FMT_LINEAR);
> > >
> > > magically know that we're using 64k tables.
> > >
> > > Why is this an improvement to the driver?
> >
> > Put the above in a function
> >
> > arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste)
> >
> > And it makes more sense.
>
> Sorry, but I'm not seeing it :/
>
> > We don't need the driver to precompute the "s1_cfg" parameters and
> > store them in a redundant struct along side the ctx_desc_cfg when we
> > can compute those same values on the fly with no cost.
>
> But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2
> constant is hardcoded here.
So it would be hard coded in arm_smmu_get_cd_ste() because that
reflects the current state of CD table code.
> If we want to use 4k leaf tables in some cases, how would you add
> that? Such a change shouldn't need the low-level strtab creation
> code to change.
You would modify arm_smmu_ctx_desc_cfg to teach it about the different
format. It would gain some (enum?) member that specifies the CD table
layout and geometry. arm_smmu_get_cd_ste() will interpret that member
and generate the correct STE for the specifc cd table.
It is a standard OOP sort of practice that the object self-describes
and has accessors to export its internal definition. In this case the
STE bits are part of/derived from the internal definition of the CD
table.
Jason
On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote:
>
> > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> > > !master->stall_enabled)
> > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> > >
> > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
> > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX,
> > > + cd_table->max_cds_bits) |
> > > + FIELD_PREP(STRTAB_STE_0_S1FMT,
> > > + cd_table->l1_desc ?
> > > + STRTAB_STE_0_S1FMT_64K_L2 :
> > > + STRTAB_STE_0_S1FMT_LINEAR);
> >
> > magically know that we're using 64k tables.
> >
> > Why is this an improvement to the driver?
>
> Put the above in a function
>
> arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste)
>
> And it makes more sense.
Sorry, but I'm not seeing it :/
> We don't need the driver to precompute the "s1_cfg" parameters and
> store them in a redundant struct along side the ctx_desc_cfg when we
> can compute those same values on the fly with no cost.
But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2
constant is hardcoded here. If we want to use 4k leaf tables in some
cases, how would you add that? Such a change shouldn't need the low-level
strtab creation code to change.
Will
On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote:
> > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> > !master->stall_enabled)
> > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> >
> > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
> > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > + FIELD_PREP(STRTAB_STE_0_S1CDMAX,
> > + cd_table->max_cds_bits) |
> > + FIELD_PREP(STRTAB_STE_0_S1FMT,
> > + cd_table->l1_desc ?
> > + STRTAB_STE_0_S1FMT_64K_L2 :
> > + STRTAB_STE_0_S1FMT_LINEAR);
>
> magically know that we're using 64k tables.
>
> Why is this an improvement to the driver?
Put the above in a function
arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste)
And it makes more sense.
We don't need the driver to precompute the "s1_cfg" parameters and
store them in a redundant struct along side the ctx_desc_cfg when we
can compute those same values on the fly with no cost.
Jason
On Wed, Aug 09, 2023 at 05:22:54PM +0100, Will Deacon wrote:
> > > If we want to use 4k leaf tables in some cases, how would you add
> > > that? Such a change shouldn't need the low-level strtab creation
> > > code to change.
> >
> > You would modify arm_smmu_ctx_desc_cfg to teach it about the different
> > format. It would gain some (enum?) member that specifies the CD table
> > layout and geometry. arm_smmu_get_cd_ste() will interpret that member
> > and generate the correct STE for the specifc cd table.
>
> Sounds a lot like the existing s1fmt field. Can we keep it?
If you are OK with the dead code, I don't object. But let's put it in
the struct arm_smmu_ctx_desc_cfg.
Jason
On Wed, Aug 09, 2023 at 01:26:41PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 09, 2023 at 05:22:54PM +0100, Will Deacon wrote:
>
> > > > If we want to use 4k leaf tables in some cases, how would you add
> > > > that? Such a change shouldn't need the low-level strtab creation
> > > > code to change.
> > >
> > > You would modify arm_smmu_ctx_desc_cfg to teach it about the different
> > > format. It would gain some (enum?) member that specifies the CD table
> > > layout and geometry. arm_smmu_get_cd_ste() will interpret that member
> > > and generate the correct STE for the specifc cd table.
> >
> > Sounds a lot like the existing s1fmt field. Can we keep it?
>
> If you are OK with the dead code, I don't object. But let's put it in
> the struct arm_smmu_ctx_desc_cfg.
Ok, we have a deal!
Will
On Wed, Aug 09, 2023 at 12:08:02PM -0300, Jason Gunthorpe wrote:
> On Wed, Aug 09, 2023 at 03:55:43PM +0100, Will Deacon wrote:
> > On Wed, Aug 09, 2023 at 10:59:33AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Aug 09, 2023 at 02:49:41PM +0100, Will Deacon wrote:
> > >
> > > > > @@ -1360,10 +1357,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
> > > > > !master->stall_enabled)
> > > > > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
> > > > >
> > > > > - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > > > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > > > - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
> > > > > - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
> > > > > + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > > > > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > > > > + FIELD_PREP(STRTAB_STE_0_S1CDMAX,
> > > > > + cd_table->max_cds_bits) |
> > > > > + FIELD_PREP(STRTAB_STE_0_S1FMT,
> > > > > + cd_table->l1_desc ?
> > > > > + STRTAB_STE_0_S1FMT_64K_L2 :
> > > > > + STRTAB_STE_0_S1FMT_LINEAR);
> > > >
> > > > magically know that we're using 64k tables.
> > > >
> > > > Why is this an improvement to the driver?
> > >
> > > Put the above in a function
> > >
> > > arm_smmu_get_cd_ste(struct arm_smmu_ctx_desc_cfg *cdtab, void *ste)
> > >
> > > And it makes more sense.
> >
> > Sorry, but I'm not seeing it :/
> >
> > > We don't need the driver to precompute the "s1_cfg" parameters and
> > > store them in a redundant struct along side the ctx_desc_cfg when we
> > > can compute those same values on the fly with no cost.
> >
> > But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2
> > constant is hardcoded here.
>
> So it would be hard coded in arm_smmu_get_cd_ste() because that
> reflects the current state of CD table code.
>
> > If we want to use 4k leaf tables in some cases, how would you add
> > that? Such a change shouldn't need the low-level strtab creation
> > code to change.
>
> You would modify arm_smmu_ctx_desc_cfg to teach it about the different
> format. It would gain some (enum?) member that specifies the CD table
> layout and geometry. arm_smmu_get_cd_ste() will interpret that member
> and generate the correct STE for the specifc cd table.
Sounds a lot like the existing s1fmt field. Can we keep it?
Will
On Thu, Aug 10, 2023 at 05:33:53PM +0800, Michael Shavit wrote:
> > > > Sounds a lot like the existing s1fmt field. Can we keep it?
> > >
> > > If you are OK with the dead code, I don't object. But let's put it in
> > > the struct arm_smmu_ctx_desc_cfg.
> >
> > Ok, we have a deal!
>
> What dead code? Is the deal here that we keep the field, but still
> infer the value to write from (cd_table->l1_desc == null) in
> arm_smmu_write_strtab_ent??
Keep the field and write it directly when populating the ste (i.e. don't
infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'.
Will
> > > Sounds a lot like the existing s1fmt field. Can we keep it?
> >
> > If you are OK with the dead code, I don't object. But let's put it in
> > the struct arm_smmu_ctx_desc_cfg.
>
> Ok, we have a deal!
What dead code? Is the deal here that we keep the field, but still
infer the value to write from (cd_table->l1_desc == null) in
arm_smmu_write_strtab_ent??
On Thu, Aug 10, 2023 at 10:43:33AM +0100, Will Deacon wrote:
> On Thu, Aug 10, 2023 at 05:33:53PM +0800, Michael Shavit wrote:
> > > > > Sounds a lot like the existing s1fmt field. Can we keep it?
> > > >
> > > > If you are OK with the dead code, I don't object. But let's put it in
> > > > the struct arm_smmu_ctx_desc_cfg.
> > >
> > > Ok, we have a deal!
> >
> > What dead code? Is the deal here that we keep the field, but still
> > infer the value to write from (cd_table->l1_desc == null) in
> > arm_smmu_write_strtab_ent??
>
> Keep the field and write it directly when populating the ste (i.e. don't
> infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'.
Yes - the 'dead code' is that we introduce storage for a field that is
always a known constant (STRTAB_STE_0_S1FMT_64K_L2).
Jason
> > > What dead code? Is the deal here that we keep the field, but still
> > > infer the value to write from (cd_table->l1_desc == null) in
> > > arm_smmu_write_strtab_ent??
> >
> > Keep the field and write it directly when populating the ste (i.e. don't
> > infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'.
>
> Yes - the 'dead code' is that we introduce storage for a field that is
> always a known constant (STRTAB_STE_0_S1FMT_64K_L2).
I'm not sure we're on the same page here. s1fmt could contain either
`STRTAB_STE_0_S1FMT_64K_L2` or `STRTAB_STE_0_S1FMT_LINEAR`, and this
value will be directly copied in arm_smmu_write_strtab_ent.
On Fri, Aug 11, 2023 at 01:15:14AM +0800, Michael Shavit wrote:
> > > > What dead code? Is the deal here that we keep the field, but still
> > > > infer the value to write from (cd_table->l1_desc == null) in
> > > > arm_smmu_write_strtab_ent??
> > >
> > > Keep the field and write it directly when populating the ste (i.e. don't
> > > infer anything), but the field moves into 'struct arm_smmu_ctx_desc_cfg'.
> >
> > Yes - the 'dead code' is that we introduce storage for a field that is
> > always a known constant (STRTAB_STE_0_S1FMT_64K_L2).
>
> I'm not sure we're on the same page here. s1fmt could contain either
> `STRTAB_STE_0_S1FMT_64K_L2` or `STRTAB_STE_0_S1FMT_LINEAR`, and this
> value will be directly copied in arm_smmu_write_strtab_ent.
Ah, I did not check this closely, Will said:
> But the computation isn't happening -- the STRTAB_STE_0_S1FMT_64K_L2
> constant is hardcoded here.
So the nuanced answer is that computation is happening because today
the format of the CD table (linear vs 64k) is encoded in l1_desc:
+ cd_table->l1_desc ?
+ STRTAB_STE_0_S1FMT_64K_L2 :
+ STRTAB_STE_0_S1FMT_LINEAR);
So I would suggest that along with adding s1fmt to
arm_smmu_ctx_desc_cfg you also go and normalize the usage:
@@ -1030,7 +1030,7 @@ static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (!cd_table->l1_desc)
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
Jason