Resending the patches by Zhen Lei <[email protected]> that add
support for SMMU's ECMDQ feature.
I have tested these patches on Marvell SoC that has the ECMDQ feature.
Zhen Lei (4):
iommu/arm-smmu-v3: Add support for ECMDQ register mode
iommu/arm-smmu-v3: Ensure that a set of associated commands are
inserted in the same ECMDQ
iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared
ECMDQ
iommu/arm-smmu-v3: Add support for less than one ECMDQ per core
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 419 +++++++++++++++++++-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 37 ++
2 files changed, 448 insertions(+), 8 deletions(-)
--
2.34.1
From: Zhen Lei <[email protected]>
Ensure that each core exclusively occupies an ECMDQ and all of them are
enabled during initialization. During this initialization process, any
errors will result in a fallback to using normal CMDQ.
When GERROR is triggered by ECMDQ, all ECMDQs need to be traversed: the
ECMDQs with errors will be processed and the ECMDQs without errors will
be skipped directly.
Compared with register SMMU_CMDQ_PROD, register SMMU_ECMDQ_PROD has one
more 'EN' bit and one more 'ERRACK' bit. Therefore, an extra member
'ecmdq_prod' is added to record the values of these two bits. Each time
register SMMU_ECMDQ_PROD is updated, the value of 'ecmdq_prod' is ORed.
After the error indicated by SMMU_GERROR.CMDQP_ERR is fixed, the 'ERRACK'
bit needs to be toggled to resume the corresponding ECMDQ. Therefore, a
rwlock is used to protect the write operation to bit 'ERRACK' during error
handling and the read operation to bit 'ERRACK' during command insertion.
Signed-off-by: Zhen Lei <[email protected]>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 210 +++++++++++++++++++-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 36 ++++
2 files changed, 245 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9b0dc3505601..dfb5bf8cbcf9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -347,6 +347,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
{
+ if (smmu->ecmdq_enabled) {
+ struct arm_smmu_ecmdq *ecmdq;
+
+ ecmdq = *this_cpu_ptr(smmu->ecmdq);
+
+ return &ecmdq->cmdq;
+ }
+
return &smmu->cmdq;
}
@@ -429,6 +437,38 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
}
+static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu)
+{
+ int i;
+ u32 prod, cons;
+ struct arm_smmu_queue *q;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ unsigned long flags;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ prod = readl_relaxed(q->prod_reg);
+ cons = readl_relaxed(q->cons_reg);
+ if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0)
+ continue;
+
+ __arm_smmu_cmdq_skip_err(smmu, q);
+
+ write_lock_irqsave(&q->ecmdq_lock, flags);
+ q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK;
+ q->ecmdq_prod |= cons & ECMDQ_CONS_ERR;
+
+ prod = readl_relaxed(q->prod_reg);
+ prod &= ~ECMDQ_PROD_ERRACK;
+ prod |= cons & ECMDQ_CONS_ERR;
+ writel(prod, q->prod_reg);
+ write_unlock_irqrestore(&q->ecmdq_lock, flags);
+ }
+}
+
/*
* Command queue locking.
* This is a form of bastardised rwlock with the following major changes:
@@ -825,7 +865,13 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
* d. Advance the hardware prod pointer
* Control dependency ordering from the entries becoming valid.
*/
- writel_relaxed(prod, cmdq->q.prod_reg);
+ if (smmu->ecmdq_enabled) {
+ read_lock(&cmdq->q.ecmdq_lock);
+ writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
+ read_unlock(&cmdq->q.ecmdq_lock);
+ } else {
+ writel_relaxed(prod, cmdq->q.prod_reg);
+ }
/*
* e. Tell the next owner we're done
@@ -1701,6 +1747,9 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
if (active & GERROR_CMDQ_ERR)
arm_smmu_cmdq_skip_err(smmu);
+ if (active & GERROR_CMDQP_ERR)
+ arm_smmu_ecmdq_skip_err(smmu);
+
writel(gerror, smmu->base + ARM_SMMU_GERRORN);
return IRQ_HANDLED;
}
@@ -2957,6 +3006,20 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
return 0;
}
+static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq)
+{
+ unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+ if (!cmdq->valid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
@@ -3307,6 +3370,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
{
+ int i;
int ret;
u32 reg, enables;
struct arm_smmu_cmdq_ent cmd;
@@ -3351,6 +3415,28 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE);
+ writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD);
+ writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS);
+
+ /* enable ecmdq */
+ writel(ECMDQ_PROD_EN, q->prod_reg);
+ ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i);
+ smmu->ecmdq_enabled = 0;
+ break;
+ }
+ }
+
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
@@ -3476,6 +3562,115 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
}
break;
}
+};
+
+static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
+{
+ int cpu;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ if (num_possible_cpus() <= smmu->nr_ecmdq) {
+ ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
+ if (!ecmdq)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
+
+ /* A core requires at most one ECMDQ */
+ smmu->nr_ecmdq = num_possible_cpus();
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
+{
+ int ret, cpu;
+ u32 i, nump, numq, gap;
+ u32 reg, shift_increment;
+ u64 addr, smmu_dma_base;
+ void __iomem *cp_regs, *cp_base;
+
+ /* IDR6 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
+ nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
+ numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
+ smmu->nr_ecmdq = nump * numq;
+ gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
+
+ smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
+ cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
+ if (!cp_regs)
+ return -ENOMEM;
+
+ for (i = 0; i < nump; i++) {
+ u64 val, pre_addr;
+
+ val = readq_relaxed(cp_regs + 32 * i);
+ if (!(val & ECMDQ_CP_PRESET)) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
+ return -EFAULT;
+ }
+
+ if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
+ return -EFAULT;
+ }
+
+ pre_addr = val & ECMDQ_CP_ADDR;
+ }
+
+ addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
+ iounmap(cp_regs);
+
+ cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
+ if (!cp_base)
+ return -ENOMEM;
+
+ smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
+ if (!smmu->ecmdq)
+ return -ENOMEM;
+
+ ret = arm_smmu_ecmdq_layout(smmu);
+ if (ret)
+ return ret;
+
+ shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
+
+ addr = 0;
+ for_each_possible_cpu(cpu) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
+ ecmdq->base = cp_base + addr;
+
+ q = &ecmdq->cmdq.q;
+
+ q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
+ ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
+ ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
+ if (ret)
+ return ret;
+
+ q->ecmdq_prod = ECMDQ_PROD_EN;
+ rwlock_init(&q->ecmdq_lock);
+
+ ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
+ return ret;
+ }
+
+ addr += gap;
+ }
+
+ return 0;
}
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
@@ -3588,6 +3783,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
return -ENXIO;
}
+ if (reg & IDR1_ECMDQ)
+ smmu->features |= ARM_SMMU_FEAT_ECMDQ;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
@@ -3695,6 +3893,16 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
+
+ if (smmu->features & ARM_SMMU_FEAT_ECMDQ) {
+ int err;
+
+ err = arm_smmu_ecmdq_probe(smmu);
+ if (err) {
+ dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err);
+ smmu->ecmdq_enabled = 0;
+ }
+ }
return 0;
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index dcab85698a4e..1f8777817e31 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -41,6 +41,7 @@
#define IDR0_S2P (1 << 0)
#define ARM_SMMU_IDR1 0x4
+#define IDR1_ECMDQ (1 << 31)
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
@@ -113,6 +114,7 @@
#define ARM_SMMU_IRQ_CTRLACK 0x54
#define ARM_SMMU_GERROR 0x60
+#define GERROR_CMDQP_ERR (1 << 9)
#define GERROR_SFM_ERR (1 << 8)
#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
@@ -158,6 +160,26 @@
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_IDR6 0x190
+#define IDR6_LOG2NUMP GENMASK(27, 24)
+#define IDR6_LOG2NUMQ GENMASK(19, 16)
+#define IDR6_BA_DOORBELLS GENMASK(9, 0)
+
+#define ARM_SMMU_ECMDQ_BASE 0x00
+#define ARM_SMMU_ECMDQ_PROD 0x08
+#define ARM_SMMU_ECMDQ_CONS 0x0c
+#define ECMDQ_MAX_SZ_SHIFT 8
+#define ECMDQ_PROD_EN (1 << 31)
+#define ECMDQ_CONS_ENACK (1 << 31)
+#define ECMDQ_CONS_ERR (1 << 23)
+#define ECMDQ_PROD_ERRACK (1 << 23)
+
+#define ARM_SMMU_ECMDQ_CP_BASE 0x4000
+#define ECMDQ_CP_ADDR GENMASK_ULL(51, 12)
+#define ECMDQ_CP_CMDQGS GENMASK_ULL(2, 1)
+#define ECMDQ_CP_PRESET (1UL << 0)
+#define ECMDQ_CP_RRESET_SIZE 0x10000
+
#define ARM_SMMU_REG_SZ 0xe00
/* Common MSI config fields */
@@ -527,6 +549,8 @@ struct arm_smmu_ll_queue {
struct arm_smmu_queue {
struct arm_smmu_ll_queue llq;
int irq; /* Wired interrupt */
+ u32 ecmdq_prod;
+ rwlock_t ecmdq_lock;
__le64 *base;
dma_addr_t base_dma;
@@ -552,6 +576,11 @@ struct arm_smmu_cmdq {
atomic_t lock;
};
+struct arm_smmu_ecmdq {
+ struct arm_smmu_cmdq cmdq;
+ void __iomem *base;
+};
+
struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
int num;
@@ -646,6 +675,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
+#define ARM_SMMU_FEAT_ECMDQ (1 << 20)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -654,6 +684,12 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
u32 options;
+ union {
+ u32 nr_ecmdq;
+ u32 ecmdq_enabled;
+ };
+ struct arm_smmu_ecmdq *__percpu *ecmdq;
+
struct arm_smmu_cmdq cmdq;
struct arm_smmu_evtq evtq;
struct arm_smmu_priq priq;
--
2.34.1
On Fri, 21 Jul 2023 02:35:09 -0400
Tanmay Jagdale <[email protected]> wrote:
> Resending the patches by Zhen Lei <[email protected]> that add
> support for SMMU's ECMDQ feature.
>
> I have tested these patches on Marvell SoC that has the ECMDQ feature.
Hi Tammay,
If sending someone else's series you are 'handling' the patches, so should
add your own SoB after the original author's one. Also, given you've tested them, a
Tested-by tag would make sense.
Any perf numbers you can share would also help push this forwards by showing
why people care. I believe Leizhen's numbers were emulation based, so
not as good as real hardware for making the justification for the added
complexity!
Jonathan
>
> Zhen Lei (4):
> iommu/arm-smmu-v3: Add support for ECMDQ register mode
> iommu/arm-smmu-v3: Ensure that a set of associated commands are
> inserted in the same ECMDQ
> iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared
> ECMDQ
> iommu/arm-smmu-v3: Add support for less than one ECMDQ per core
>
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 419 +++++++++++++++++++-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 37 ++
> 2 files changed, 448 insertions(+), 8 deletions(-)
>
On Fri, Jul 21, 2023 at 02:35:09AM -0400, Tanmay Jagdale wrote:
> Resending the patches by Zhen Lei <[email protected]> that add
> support for SMMU's ECMDQ feature.
>
> I have tested these patches on Marvell SoC that has the ECMDQ
> feature.
Last time this came up Robin was interested in real world performance
gains
Do you have data to share that is motivating you to do this work?
Jason
On 2023/7/27 19:13, kernel test robot wrote:
> Hi Tanmay,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on v6.5-rc2]
> [also build test WARNING on linus/master next-20230727]
> [cannot apply to joro-iommu/next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
> base: v6.5-rc2
> patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
> patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
> config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/[email protected]/config)
> compiler: aarch64-linux-gcc (GCC) 12.3.0
> reproduce: (https://download.01.org/0day-ci/archive/20230727/[email protected]/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <[email protected]>
> | Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
>
> sparse warnings: (new ones prefixed by >>)
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct arm_smmu_ecmdq *ecmdq @@ got struct arm_smmu_ecmdq [noderef] __percpu * @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: expected struct arm_smmu_ecmdq *ecmdq
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: got struct arm_smmu_ecmdq [noderef] __percpu *
OK, thanks, the local variable 'ecmdq' need modifier '__percpu'.
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got struct arm_smmu_ecmdq * @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: expected void const [noderef] __percpu *__vpp_verify
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: got struct arm_smmu_ecmdq *
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void const *addr @@ got void [noderef] __iomem *base @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: expected void const *addr
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: got void [noderef] __iomem *base
Perhaps it would be better to add a member to the structure arm_smmu_device
to record the start physical address of the smmu.
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
> arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
> arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
>
> vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>
> 3566
> 3567 static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
> 3568 {
> 3569 int cpu;
> 3570 struct arm_smmu_ecmdq *ecmdq;
> 3571
> 3572 if (num_possible_cpus() <= smmu->nr_ecmdq) {
>> 3573 ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
> 3574 if (!ecmdq)
> 3575 return -ENOMEM;
> 3576
> 3577 for_each_possible_cpu(cpu)
>> 3578 *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
> 3579
> 3580 /* A core requires at most one ECMDQ */
> 3581 smmu->nr_ecmdq = num_possible_cpus();
> 3582
> 3583 return 0;
> 3584 }
> 3585
> 3586 return -ENOSPC;
> 3587 }
> 3588
> 3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
> 3590 {
> 3591 int ret, cpu;
> 3592 u32 i, nump, numq, gap;
> 3593 u32 reg, shift_increment;
> 3594 u64 addr, smmu_dma_base;
> 3595 void __iomem *cp_regs, *cp_base;
> 3596
> 3597 /* IDR6 */
> 3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
> 3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
> 3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
> 3601 smmu->nr_ecmdq = nump * numq;
> 3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
> 3603
>> 3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
> 3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
> 3606 if (!cp_regs)
> 3607 return -ENOMEM;
> 3608
> 3609 for (i = 0; i < nump; i++) {
> 3610 u64 val, pre_addr;
> 3611
> 3612 val = readq_relaxed(cp_regs + 32 * i);
> 3613 if (!(val & ECMDQ_CP_PRESET)) {
> 3614 iounmap(cp_regs);
> 3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
> 3616 return -EFAULT;
> 3617 }
> 3618
> 3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
> 3620 iounmap(cp_regs);
> 3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
> 3622 return -EFAULT;
> 3623 }
> 3624
> 3625 pre_addr = val & ECMDQ_CP_ADDR;
> 3626 }
> 3627
> 3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
> 3629 iounmap(cp_regs);
> 3630
> 3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
> 3632 if (!cp_base)
> 3633 return -ENOMEM;
> 3634
> 3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
> 3636 if (!smmu->ecmdq)
> 3637 return -ENOMEM;
> 3638
> 3639 ret = arm_smmu_ecmdq_layout(smmu);
> 3640 if (ret)
> 3641 return ret;
> 3642
> 3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
> 3644
> 3645 addr = 0;
> 3646 for_each_possible_cpu(cpu) {
> 3647 struct arm_smmu_ecmdq *ecmdq;
> 3648 struct arm_smmu_queue *q;
> 3649
> 3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
> 3651 ecmdq->base = cp_base + addr;
> 3652
> 3653 q = &ecmdq->cmdq.q;
> 3654
> 3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
> 3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
> 3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
> 3658 if (ret)
> 3659 return ret;
> 3660
> 3661 q->ecmdq_prod = ECMDQ_PROD_EN;
> 3662 rwlock_init(&q->ecmdq_lock);
> 3663
> 3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
> 3665 if (ret) {
> 3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
> 3667 return ret;
> 3668 }
> 3669
> 3670 addr += gap;
> 3671 }
> 3672
> 3673 return 0;
> 3674 }
> 3675
>
--
Regards,
Zhen Lei
Hi Tanmay,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230727]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base: v6.5-rc2
patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/[email protected]/config)
compiler: aarch64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230727/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
sparse warnings: (new ones prefixed by >>)
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct arm_smmu_ecmdq *ecmdq @@ got struct arm_smmu_ecmdq [noderef] __percpu * @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: expected struct arm_smmu_ecmdq *ecmdq
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: got struct arm_smmu_ecmdq [noderef] __percpu *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got struct arm_smmu_ecmdq * @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: expected void const [noderef] __percpu *__vpp_verify
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: got struct arm_smmu_ecmdq *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void const *addr @@ got void [noderef] __iomem *base @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: expected void const *addr
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: got void [noderef] __iomem *base
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
3566
3567 static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
3568 {
3569 int cpu;
3570 struct arm_smmu_ecmdq *ecmdq;
3571
3572 if (num_possible_cpus() <= smmu->nr_ecmdq) {
> 3573 ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
3574 if (!ecmdq)
3575 return -ENOMEM;
3576
3577 for_each_possible_cpu(cpu)
> 3578 *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
3579
3580 /* A core requires at most one ECMDQ */
3581 smmu->nr_ecmdq = num_possible_cpus();
3582
3583 return 0;
3584 }
3585
3586 return -ENOSPC;
3587 }
3588
3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
3590 {
3591 int ret, cpu;
3592 u32 i, nump, numq, gap;
3593 u32 reg, shift_increment;
3594 u64 addr, smmu_dma_base;
3595 void __iomem *cp_regs, *cp_base;
3596
3597 /* IDR6 */
3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
3601 smmu->nr_ecmdq = nump * numq;
3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
3603
> 3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
3606 if (!cp_regs)
3607 return -ENOMEM;
3608
3609 for (i = 0; i < nump; i++) {
3610 u64 val, pre_addr;
3611
3612 val = readq_relaxed(cp_regs + 32 * i);
3613 if (!(val & ECMDQ_CP_PRESET)) {
3614 iounmap(cp_regs);
3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
3616 return -EFAULT;
3617 }
3618
3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
3620 iounmap(cp_regs);
3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
3622 return -EFAULT;
3623 }
3624
3625 pre_addr = val & ECMDQ_CP_ADDR;
3626 }
3627
3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
3629 iounmap(cp_regs);
3630
3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
3632 if (!cp_base)
3633 return -ENOMEM;
3634
3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
3636 if (!smmu->ecmdq)
3637 return -ENOMEM;
3638
3639 ret = arm_smmu_ecmdq_layout(smmu);
3640 if (ret)
3641 return ret;
3642
3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
3644
3645 addr = 0;
3646 for_each_possible_cpu(cpu) {
3647 struct arm_smmu_ecmdq *ecmdq;
3648 struct arm_smmu_queue *q;
3649
3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
3651 ecmdq->base = cp_base + addr;
3652
3653 q = &ecmdq->cmdq.q;
3654
3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
3658 if (ret)
3659 return ret;
3660
3661 q->ecmdq_prod = ECMDQ_PROD_EN;
3662 rwlock_init(&q->ecmdq_lock);
3663
3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
3665 if (ret) {
3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
3667 return ret;
3668 }
3669
3670 addr += gap;
3671 }
3672
3673 return 0;
3674 }
3675
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Tanmay,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230728]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base: v6.5-rc2
patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r032-20230727 (https://download.01.org/0day-ci/archive/20230728/[email protected]/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce: (https://download.01.org/0day-ci/archive/20230728/[email protected]/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/
All warnings (new ones prefixed by >>):
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3619:39: warning: variable 'pre_addr' is uninitialized when used here [-Wuninitialized]
3619 | if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
| ^~~~~~~~
include/linux/compiler.h:55:47: note: expanded from macro 'if'
55 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
| ^~~~
include/linux/compiler.h:57:52: note: expanded from macro '__trace_if_var'
57 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
| ^~~~
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3610:20: note: initialize the variable 'pre_addr' to silence this warning
3610 | u64 val, pre_addr;
| ^
| = 0
1 warning generated.
vim +/pre_addr +3619 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
3588
3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
3590 {
3591 int ret, cpu;
3592 u32 i, nump, numq, gap;
3593 u32 reg, shift_increment;
3594 u64 addr, smmu_dma_base;
3595 void __iomem *cp_regs, *cp_base;
3596
3597 /* IDR6 */
3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
3601 smmu->nr_ecmdq = nump * numq;
3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
3603
3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
3606 if (!cp_regs)
3607 return -ENOMEM;
3608
3609 for (i = 0; i < nump; i++) {
3610 u64 val, pre_addr;
3611
3612 val = readq_relaxed(cp_regs + 32 * i);
3613 if (!(val & ECMDQ_CP_PRESET)) {
3614 iounmap(cp_regs);
3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
3616 return -EFAULT;
3617 }
3618
> 3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
3620 iounmap(cp_regs);
3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
3622 return -EFAULT;
3623 }
3624
3625 pre_addr = val & ECMDQ_CP_ADDR;
3626 }
3627
3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
3629 iounmap(cp_regs);
3630
3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
3632 if (!cp_base)
3633 return -ENOMEM;
3634
3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
3636 if (!smmu->ecmdq)
3637 return -ENOMEM;
3638
3639 ret = arm_smmu_ecmdq_layout(smmu);
3640 if (ret)
3641 return ret;
3642
3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
3644
3645 addr = 0;
3646 for_each_possible_cpu(cpu) {
3647 struct arm_smmu_ecmdq *ecmdq;
3648 struct arm_smmu_queue *q;
3649
3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
3651 ecmdq->base = cp_base + addr;
3652
3653 q = &ecmdq->cmdq.q;
3654
3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
3658 if (ret)
3659 return ret;
3660
3661 q->ecmdq_prod = ECMDQ_PROD_EN;
3662 rwlock_init(&q->ecmdq_lock);
3663
3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
3665 if (ret) {
3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
3667 return ret;
3668 }
3669
3670 addr += gap;
3671 }
3672
3673 return 0;
3674 }
3675
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki