2022-07-19 07:35:38

by Can Guo

[permalink] [raw]
Subject: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

From: Asutosh Das <[email protected]>

Adds MCQ support to UFS driver.

Co-developed-by: Can Guo <[email protected]>
Signed-off-by: Asutosh Das <[email protected]>
Signed-off-by: Can Guo <[email protected]>
---
drivers/ufs/core/Makefile | 2 +-
drivers/ufs/core/ufs-mcq.c | 555 +++++++++++++++++++++++++++++++++++++++++++++
drivers/ufs/core/ufshcd.c | 362 ++++++++++++++++++++---------
include/ufs/ufs.h | 1 +
include/ufs/ufshcd.h | 231 ++++++++++++++++++-
include/ufs/ufshci.h | 89 ++++++++
6 files changed, 1133 insertions(+), 107 deletions(-)
create mode 100644 drivers/ufs/core/ufs-mcq.c

diff --git a/drivers/ufs/core/Makefile b/drivers/ufs/core/Makefile
index 62f38c5..4d02e0f 100644
--- a/drivers/ufs/core/Makefile
+++ b/drivers/ufs/core/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0

obj-$(CONFIG_SCSI_UFSHCD) += ufshcd-core.o
-ufshcd-core-y += ufshcd.o ufs-sysfs.o
+ufshcd-core-y += ufshcd.o ufs-sysfs.o ufs-mcq.o
ufshcd-core-$(CONFIG_DEBUG_FS) += ufs-debugfs.o
ufshcd-core-$(CONFIG_SCSI_UFS_BSG) += ufs_bsg.o
ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO) += ufshcd-crypto.o
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
new file mode 100644
index 0000000..5bdee7d
--- /dev/null
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Linux Foundation. All rights reserved.
+ *
+ * Authors:
+ * Asutosh Das <[email protected]>
+ * Can Guo <[email protected]>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <ufs/ufshcd.h>
+#include <asm/unaligned.h>
+
+/* resources */
+static const struct ufshcd_res_info_t ufshcd_res_info[RES_MAX] = {
+ {"ufs_mem", NULL, NULL},
+ {"mcq", NULL, NULL},
+ {"mcq_sqd", NULL, NULL},
+ {"mcq_sqis", NULL, NULL},
+ {"mcq_cqd", NULL, NULL},
+ {"mcq_cqis", NULL, NULL},
+ {"mcq_vs", NULL, NULL},
+};
+
+void ufshcd_mcq_config_mac(struct ufs_hba *hba)
+{
+ u32 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
+
+ val &= ~MCQ_CFG_MAC_MASK;
+ val |= hba->dev_info.bqueuedepth << MCQ_CFG_MAC_OFFSET;
+ ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
+
+static void ufshcd_mcq_init_lrb(struct ufs_hba *hba, struct ufs_hw_queue *hwq,
+ struct ufshcd_lrb *lrb, int i)
+{
+ struct utp_transfer_cmd_desc *cmd_descp = (void *)hwq->ucdl_base_addr;
+ struct utp_transfer_req_desc *utrdlp = hwq->sqe_shadow_addr;
+ dma_addr_t cmd_desc_dma_addr = hwq->ucdl_dma_addr +
+ i * sizeof(struct utp_transfer_cmd_desc);
+ u16 response_offset = offsetof(struct utp_transfer_cmd_desc,
+ response_upiu);
+ u16 prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table);
+
+ lrb->utr_descriptor_ptr = utrdlp + i;
+ lrb->utrd_dma_addr = hwq->sqe_dma_addr +
+ i * sizeof(struct utp_transfer_req_desc);
+ lrb->ucd_req_ptr = (struct utp_upiu_req *)(cmd_descp + i);
+ lrb->ucd_req_dma_addr = cmd_desc_dma_addr;
+ lrb->ucd_rsp_ptr = (struct utp_upiu_rsp *)cmd_descp[i].response_upiu;
+ lrb->ucd_rsp_dma_addr = cmd_desc_dma_addr + response_offset;
+ lrb->ucd_prdt_ptr = cmd_descp[i].prd_table;
+ lrb->ucd_prdt_dma_addr = cmd_desc_dma_addr + prdt_offset;
+}
+
+struct ufshcd_lrb *ufshcd_mcq_find_lrb(struct ufs_hba *hba,
+ struct request *req,
+ struct ufs_hw_queue **hq)
+{
+ struct ufs_hw_queue *h;
+ struct ufshcd_lrb *lrbp;
+ u32 utag, hwq;
+ u8 tag = req->tag;
+
+ utag = blk_mq_unique_tag(req);
+ hwq = blk_mq_unique_tag_to_hwq(utag);
+ h = &hba->uhq[hwq + UFSHCD_MCQ_IO_QUEUE_OFFSET];
+ *hq = h;
+ lrbp = &h->lrb[tag];
+ lrbp->hw_queue_id = h->id;
+
+ return &h->lrb[tag];
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_find_lrb);
+
+int ufshcd_mcq_memory_alloc(struct ufs_hba *hba)
+{
+ struct ufs_hw_queue *hwq;
+ size_t ucdl_size, utrdl_size, cqe_size;
+ int i;
+
+ for_each_hw_queue(hba, i) {
+ hwq = &hba->uhq[i];
+ ucdl_size = (sizeof(struct utp_transfer_cmd_desc) *
+ hwq->max_entries);
+ hwq->ucdl_base_addr = dmam_alloc_coherent(hba->dev, ucdl_size,
+ &hwq->ucdl_dma_addr,
+ GFP_KERNEL);
+ if (!hwq->ucdl_base_addr ||
+ WARN_ON(hwq->ucdl_dma_addr & (PAGE_SIZE - 1))) {
+ dev_err(hba->dev, "MCQ Command Descriptor Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ utrdl_size = sizeof(struct utp_transfer_req_desc) *
+ hwq->max_entries;
+ hwq->sqe_base_addr = dmam_alloc_coherent(hba->dev, utrdl_size,
+ &hwq->sqe_dma_addr,
+ GFP_KERNEL);
+ if (!hwq->sqe_dma_addr || WARN_ON(hwq->sqe_dma_addr &
+ (PAGE_SIZE - 1))) {
+ dev_err(hba->dev, "Alloc SQE failed\n");
+ return -ENOMEM;
+ }
+
+ hwq->sqe_shadow_addr = devm_kzalloc(hba->dev, utrdl_size,
+ GFP_KERNEL);
+ if (!hwq->sqe_shadow_addr) {
+ dev_err(hba->dev, "Alloc SQE shadow failed\n");
+ return -ENOMEM;
+ }
+
+ cqe_size = sizeof(struct cq_entry) * hwq->max_entries,
+ hwq->cqe_base_addr = dmam_alloc_coherent(hba->dev, cqe_size,
+ &hwq->cqe_dma_addr,
+ GFP_KERNEL);
+ if (!hwq->cqe_dma_addr || WARN_ON(hwq->cqe_dma_addr &
+ (PAGE_SIZE - 1))) {
+ dev_err(hba->dev, "Alloc CQE failed\n");
+ return -ENOMEM;
+ }
+
+ hwq->lrb = devm_kcalloc(hba->dev, hwq->max_entries,
+ sizeof(struct ufshcd_lrb), GFP_KERNEL);
+ if (!hwq->lrb) {
+ dev_err(hba->dev, "LRB Memory allocation failed\n");
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_memory_alloc);
+
+int ufshcd_mcq_memory_configure(struct ufs_hba *hba)
+{
+ struct utp_transfer_req_desc *utrdlp;
+ dma_addr_t cmd_desc_dma_base_addr;
+ dma_addr_t cmd_desc_dma_addr;
+ u16 response_offset;
+ u16 prdt_offset;
+ int cmd_desc_size;
+ struct ufs_hw_queue *hwq;
+ int i, j;
+
+ response_offset = offsetof(struct utp_transfer_cmd_desc, response_upiu);
+ prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table);
+
+ cmd_desc_size = sizeof(struct utp_transfer_cmd_desc);
+
+ for_each_hw_queue(hba, i) {
+ hwq = &hba->uhq[i];
+ utrdlp = hwq->sqe_shadow_addr;
+ cmd_desc_dma_base_addr = hwq->ucdl_dma_addr;
+
+ for (j = 0; j < hwq->max_entries; j++) {
+ /*
+ * Configure UTRD with command descriptor base address
+ */
+ cmd_desc_dma_addr =
+ (cmd_desc_dma_base_addr + (cmd_desc_size * j));
+ utrdlp[j].command_desc_base_addr_lo =
+ cpu_to_le32(lower_32_bits(cmd_desc_dma_addr));
+ utrdlp[j].command_desc_base_addr_hi =
+ cpu_to_le32(upper_32_bits(cmd_desc_dma_addr));
+
+ /*
+ * Response upiu and prdt offset should be in DWs
+ */
+ if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) {
+ utrdlp[j].response_upiu_offset =
+ cpu_to_le16(response_offset);
+ utrdlp[j].prd_table_offset =
+ cpu_to_le16(prdt_offset);
+ utrdlp[j].response_upiu_length =
+ cpu_to_le16(ALIGNED_UPIU_SIZE);
+ } else {
+ utrdlp[j].response_upiu_offset =
+ cpu_to_le16(response_offset >> 2);
+ utrdlp[j].prd_table_offset =
+ cpu_to_le16(prdt_offset >> 2);
+ utrdlp[j].response_upiu_length =
+ cpu_to_le16(ALIGNED_UPIU_SIZE >> 2);
+ }
+
+ ufshcd_mcq_init_lrb(hba, hwq, &hwq->lrb[j], j);
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_memory_configure);
+
+#define MCQ_CFG_n(r, i) \
+ (r) + MCQ_QCFG_SIZE * (i)
+#define MCQ_ROP_OFFSET_n(p, i) \
+ hba->mcq_rop[(p)].offset + hba->mcq_rop[(p)].stride * (i)
+
+static inline void __iomem *mcq_rop_base(struct ufs_hba *hba,
+ enum ufshcd_mcq_rop n, int i)
+{
+ struct ufshcd_mcq_rop_info_t *rop = &hba->mcq_rop[n];
+
+ return rop->base + rop->stride * i;
+}
+
+void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba)
+{
+ struct ufs_hw_queue *hwq;
+ u16 qsize;
+ int i;
+
+ for_each_hw_queue(hba, i) {
+ hwq = &hba->uhq[i];
+ hwq->id = i;
+ qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD - 1;
+
+ /* SQLBA */
+ ufsmcq_writel(hba, lower_32_bits(hwq->sqe_dma_addr),
+ MCQ_CFG_n(REG_SQLBA, i));
+ /* SQUBA */
+ ufsmcq_writel(hba, upper_32_bits(hwq->sqe_dma_addr),
+ MCQ_CFG_n(REG_SQUBA, i));
+ /* SQDAO */
+ ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_SQD, i),
+ MCQ_CFG_n(REG_SQDAO, i));
+ /* SQISAO */
+ ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_SQIS, i),
+ MCQ_CFG_n(REG_SQISAO, i));
+
+ /* CQLBA */
+ ufsmcq_writel(hba, lower_32_bits(hwq->cqe_dma_addr),
+ MCQ_CFG_n(REG_CQLBA, i));
+ /* CQUBA */
+ ufsmcq_writel(hba, upper_32_bits(hwq->cqe_dma_addr),
+ MCQ_CFG_n(REG_CQUBA, i));
+ /* CQDAO */
+ ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_CQD, i),
+ MCQ_CFG_n(REG_CQDAO, i));
+ /* CQISAO */
+ ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_CQIS, i),
+ MCQ_CFG_n(REG_CQISAO, i));
+
+ /* Save the base addresses for quicker access */
+ hwq->mcq_sq_hp = mcq_rop_base(hba, ROP_SQD, i) + REG_SQHP;
+ hwq->mcq_sq_tp = mcq_rop_base(hba, ROP_SQD, i) + REG_SQTP;
+ hwq->mcq_cq_hp = mcq_rop_base(hba, ROP_CQD, i) + REG_CQHP;
+ hwq->mcq_cq_tp = mcq_rop_base(hba, ROP_CQD, i) + REG_CQTP;
+
+ /* enable CQIE.TEPS interrupt only for non-poll queues */
+ if (i < hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL])
+ writel(1, mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIE);
+
+ /* cqen|size */
+ ufsmcq_writel(hba, (1 << 31) | qsize, MCQ_CFG_n(REG_CQATTR, i));
+
+ /* sqen|size|cqid */
+ ufsmcq_writel(hba, (1 << 31) | qsize | (i << 16),
+ MCQ_CFG_n(REG_SQATTR, i));
+ }
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_make_queues_operational);
+
+static void ufshcd_mcq_release_resource(struct ufs_hba *hba)
+{
+ struct ufshcd_res_info_t *res;
+ int i;
+
+ for (i = RES_MCQ; i < RES_MAX; i++) {
+ res = &hba->res[i];
+
+ if(res->base) {
+ devm_iounmap(hba->dev, res->base);
+ res->base = NULL;
+ }
+
+ if (res->is_alloc)
+ devm_kfree(hba->dev, res->resource);
+ }
+}
+
+static int ufshcd_mcq_config_resource(struct ufs_hba *hba)
+{
+ struct platform_device *pdev = to_platform_device(hba->dev);
+ struct ufshcd_res_info_t *res;
+ struct resource *res_mem, *res_mcq;
+ int i, ret = 0;
+
+ memcpy(hba->res, ufshcd_res_info, sizeof(ufshcd_res_info));
+
+ for (i = 0; i < RES_MAX; i++) {
+ res = &hba->res[i];
+
+ res->resource = platform_get_resource_byname(pdev,
+ IORESOURCE_MEM,
+ res->name);
+ if (!res->resource) {
+ dev_info(hba->dev, "Resource %s not provided\n", res->name);
+ if (i == RES_MEM)
+ return -ENOMEM;
+ continue;
+ } else if (i == RES_MEM) {
+ res_mem = res->resource;
+ res->base = hba->mmio_base;
+ continue;
+ }
+
+ res->base = devm_ioremap_resource(hba->dev, res->resource);
+ if (IS_ERR(res->base)) {
+ dev_err(hba->dev, "Failed to map res %s, err = %d\n",
+ res->name, (int)PTR_ERR(res->base));
+ res->base = NULL;
+ ret = PTR_ERR(res->base);
+ goto out_err;
+ }
+ }
+
+ res = &hba->res[RES_MCQ];
+ /* MCQ resource provided */
+ if (res->base)
+ goto out;
+
+ /* Manually allocate MCQ resource */
+ res_mcq = res->resource;
+ res_mcq = devm_kzalloc(hba->dev, sizeof(*res_mcq), GFP_KERNEL);
+ if (!res_mcq) {
+ dev_err(hba->dev, "Failed to alloate MCQ resource\n");
+ goto out_err;
+ }
+ res->is_alloc = true;
+
+ res_mcq->start = res_mem->start +
+ mcq_sqattr_offset(hba->mcq_capabilities);
+ res_mcq->end = res_mcq->start + 32 * MCQ_QCFG_SIZE - 1;
+ res_mcq->flags = res_mem->flags;
+ res_mcq->name = "mcq";
+
+ ret = insert_resource(&iomem_resource, res_mcq);
+ if (ret) {
+ dev_err(hba->dev, "Failed to insert MCQ resource %d\n", ret);
+ goto out_err;
+ }
+
+ res->base = devm_ioremap_resource(hba->dev, res_mcq);
+ if (IS_ERR(res->base)) {
+ dev_err(hba->dev, "Map MCQ registers failed, err = %d\n",
+ (int)PTR_ERR(res->base));
+ ret = PTR_ERR(res->base);
+ res->base = NULL;
+ goto out_err;
+ }
+
+out:
+ hba->mcq_base = res->base;
+ return 0;
+
+out_err:
+ ufshcd_mcq_release_resource(hba);
+ return ret;
+}
+
+int ufshcd_mcq_init(struct ufs_hba *hba)
+{
+ struct Scsi_Host *host = hba->host;
+ struct ufs_hw_queue *hwq;
+ int i, ret = 0;
+
+ if (!is_mcq_supported(hba))
+ return 0;
+
+ ret = ufshcd_mcq_config_resource(hba);
+ if (ret) {
+ dev_err(hba->dev, "Failed to config MCQ resource\n");
+ return ret;
+ }
+
+ ret = ufshcd_vops_config_mcq_rop(hba);
+ if (ret) {
+ dev_err(hba->dev, "MCQ Runtime Operation Pointers not configured\n");
+ goto out_err;
+ }
+
+ hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
+ hba->nr_queues[HCTX_TYPE_READ] = 0;
+ hba->nr_queues[HCTX_TYPE_POLL] = 1;
+
+ for (i = 0; i < HCTX_MAX_TYPES; i++)
+ host->nr_hw_queues += hba->nr_queues[i];
+
+ host->can_queue = hba->nutrs;
+ host->cmd_per_lun = hba->nutrs;
+
+ /* One more reserved for dev_cmd_queue */
+ hba->nr_hw_queues = host->nr_hw_queues + 1;
+
+ hba->uhq = devm_kmalloc(hba->dev,
+ hba->nr_hw_queues * sizeof(struct ufs_hw_queue),
+ GFP_KERNEL);
+ if (!hba->uhq) {
+ dev_err(hba->dev, "Alloc ufs hw queue failed\n");
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ for_each_hw_queue(hba, i) {
+ hwq = &hba->uhq[i];
+ hwq->max_entries = hba->nutrs;
+ spin_lock_init(&hwq->sq_lock);
+ spin_lock_init(&hwq->cq_lock);
+ }
+ /* The very first HW queue is to serve dev_cmd */
+ hba->dev_cmd_queue = &hba->uhq[0];
+ /* Give dev_cmd_queue the minimal num of entries */
+ hba->dev_cmd_queue->max_entries = 2;
+
+ /* Selct MCQ mode */
+ ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x1,
+ REG_UFS_MEM_CFG);
+ hba->use_mcq = true;
+
+ return 0;
+
+out_err:
+ ufshcd_mcq_release_resource(hba);
+ devm_kfree(hba->dev, hba->uhq);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_init);
+
+u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i)
+{
+ return readl(mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIS);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_read_cqis);
+
+void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i)
+{
+ writel(val, mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIS);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_write_cqis);
+
+/**
+ * ufshcd_mcq_enable_cq_intr - Enable MCQ completion queue interrupts
+ * @hba: per adapter instance
+ * @intrs: interrupt bits
+ */
+void ufshcd_mcq_enable_cq_intr(struct ufs_hba *hba, u32 intrs)
+{
+ int i;
+ u32 set;
+
+ for_each_hw_queue(hba, i) {
+ /* enable CQIS.TEPS interrupt only for non-poll queues */
+ if (i >= hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL])
+ continue;
+
+ set = readl(mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIE);
+ set |= intrs;
+ writel(set, mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIE);
+ }
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_cq_intr);
+
+/**
+ * ufshcd_mcq_disable_cq_intr - Disable MCQ completion queue interrupts
+ * @hba: per adapter instance
+ * @intrs: interrupt bits
+ */
+void ufshcd_mcq_disable_cq_intr(struct ufs_hba *hba, u32 intrs)
+{
+ int i;
+ u32 set;
+
+ for_each_hw_queue(hba, i) {
+ set = readl(mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIE);
+ set &= ~intrs;
+ writel(set, mcq_rop_base(hba, ROP_CQIS, i) + REG_CQIE);
+ }
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_disable_cq_intr);
+
+void ufshcd_mcq_enable_esi(struct ufs_hba *hba)
+{
+ ufshcd_writel(hba, ufshcd_readl(hba, REG_UFS_MEM_CFG) | 0x2,
+ REG_UFS_MEM_CFG);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_enable_esi);
+
+void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg)
+{
+ ufshcd_writel(hba, msg->address_lo, REG_UFS_ESILBA);
+ ufshcd_writel(hba, msg->address_hi, REG_UFS_ESIUBA);
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_config_esi);
+
+static inline u32 ufshcd_mcq_get_tag(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq,
+ struct cq_entry *cqe)
+{
+ dma_addr_t dma_addr;
+
+ dma_addr = ((u64)le32_to_cpu(cqe->command_desc_base_addr_hi) << 32) |
+ (le32_to_cpu(cqe->command_desc_base_addr_lo) & 0xffffff80);
+
+ return (dma_addr - hwq->ucdl_dma_addr) /
+ sizeof(struct utp_transfer_cmd_desc);
+}
+
+static inline void ufshcd_mcq_process_event(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq)
+{
+ struct cq_entry *cqe = ufshcd_mcq_cur_cqe(hwq);
+ struct ufshcd_lrb *lrbp;
+ u32 tag;
+
+ tag = ufshcd_mcq_get_tag(hba, hwq, cqe);
+
+ lrbp = &hwq->lrb[tag];
+
+ ufshcd_compl_one_lrb(hba, lrbp, cqe);
+}
+
+unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq)
+{
+ unsigned long completed_reqs = 0;
+
+ ufshcd_mcq_update_cq_tp_slot(hwq);
+ while (!ufshcd_mcq_is_cq_empty(hwq)) {
+ ufshcd_mcq_process_event(hba, hwq);
+ ufshcd_mcq_inc_cq_hp_slot(hwq);
+ completed_reqs++;
+ }
+
+ if (completed_reqs)
+ ufshcd_mcq_update_cq_hp(hwq);
+
+ return completed_reqs;
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_nolock);
+
+unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq)
+{
+ unsigned long completed_reqs, flags;
+
+ spin_lock_irqsave(&hwq->cq_lock, flags);
+ completed_reqs = ufshcd_mcq_poll_cqe_nolock(hba, hwq);
+ spin_unlock_irqrestore(&hwq->cq_lock, flags);
+
+ return completed_reqs;
+}
+EXPORT_SYMBOL_GPL(ufshcd_mcq_poll_cqe_lock);
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 2cdc146..429b3e2 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -42,6 +42,11 @@
#define UFSHCD_ENABLE_INTRS (UTP_TRANSFER_REQ_COMPL |\
UTP_TASK_REQ_COMPL |\
UFSHCD_ERROR_MASK)
+
+#define UFSHCD_ENABLE_MCQ_INTRS (UTP_TASK_REQ_COMPL |\
+ UFSHCD_ERROR_MASK |\
+ MCQ_CQ_EVENT_STATUS)
+
/* UIC command timeout, unit: ms */
#define UIC_CMD_TIMEOUT 500

@@ -101,11 +106,13 @@
_ret; \
})

-#define ufshcd_hex_dump(prefix_str, buf, len) do { \
- size_t __len = (len); \
- print_hex_dump(KERN_ERR, prefix_str, \
- __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE,\
- 16, 4, buf, __len, false); \
+#define ufshcd_hex_dump(prefix_str, buf, len) do { \
+ size_t __len = (len); \
+ \
+ print_hex_dump(KERN_ERR, prefix_str, \
+ __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE, \
+ 16, 4, buf, __len, false); \
+ \
} while (0)

int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
@@ -313,10 +320,11 @@ static void ufshcd_scsi_block_requests(struct ufs_hba *hba)
scsi_block_requests(hba->host);
}

-static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
+static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba,
+ struct ufshcd_lrb *lrbp,
enum ufs_trace_str_t str_t)
{
- struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr;
+ struct utp_upiu_req *rq = lrbp->ucd_req_ptr;
struct utp_upiu_header *header;

if (!trace_ufshcd_upiu_enabled())
@@ -325,7 +333,7 @@ static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag,
if (str_t == UFS_CMD_SEND)
header = &rq->header;
else
- header = &hba->lrb[tag].ucd_rsp_ptr->header;
+ header = &lrbp->ucd_rsp_ptr->header;

trace_ufshcd_upiu(dev_name(hba->dev), str_t, header, &rq->sc.cdb,
UFS_TSF_CDB);
@@ -382,13 +390,13 @@ static void ufshcd_add_uic_command_trace(struct ufs_hba *hba,
ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3));
}

-static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
+static void ufshcd_add_command_trace(struct ufs_hba *hba,
+ struct ufshcd_lrb *lrbp,
enum ufs_trace_str_t str_t)
{
u64 lba = 0;
u8 opcode = 0, group_id = 0;
u32 intr, doorbell;
- struct ufshcd_lrb *lrbp = &hba->lrb[tag];
struct scsi_cmnd *cmd = lrbp->cmd;
struct request *rq = scsi_cmd_to_rq(cmd);
int transfer_len = -1;
@@ -397,7 +405,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,
return;

/* trace UPIU also */
- ufshcd_add_cmd_upiu_trace(hba, tag, str_t);
+ ufshcd_add_cmd_upiu_trace(hba, lrbp, str_t);
if (!trace_ufshcd_command_enabled())
return;

@@ -422,7 +430,7 @@ static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag,

intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS);
doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
- trace_ufshcd_command(dev_name(hba->dev), str_t, tag,
+ trace_ufshcd_command(dev_name(hba->dev), str_t, lrbp->task_tag,
doorbell, transfer_len, intr, lba, opcode, group_id);
}

@@ -742,8 +750,11 @@ static inline bool ufshcd_is_device_present(struct ufs_hba *hba)
* This function is used to get the OCS field from UTRD
* Returns the OCS field in the UTRD
*/
-static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp)
+static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp,
+ struct cq_entry *cqe)
{
+ if (cqe)
+ return le32_to_cpu(cqe->status) & MASK_OCS;
return le32_to_cpu(lrbp->utr_descriptor_ptr->header.dword_2) & MASK_OCS;
}

@@ -2134,27 +2145,39 @@ static void ufshcd_update_monitor(struct ufs_hba *hba, const struct ufshcd_lrb *
/**
* ufshcd_send_command - Send SCSI or device management commands
* @hba: per adapter instance
- * @task_tag: Task tag of the command
+ * @lrbp: pointer to lrb
+ * @hwq: hardware queue
*/
static inline
-void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
+void ufshcd_send_command(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
+ struct ufs_hw_queue *hwq)
{
- struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
unsigned long flags;

lrbp->issue_time_stamp = ktime_get();
lrbp->compl_time_stamp = ktime_set(0, 0);
- ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
+ ufshcd_add_command_trace(hba, lrbp, UFS_CMD_SEND);
ufshcd_clk_scaling_start_busy(hba);
if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
ufshcd_start_monitor(hba, lrbp);

- spin_lock_irqsave(&hba->outstanding_lock, flags);
- if (hba->vops && hba->vops->setup_xfer_req)
- hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd);
- __set_bit(task_tag, &hba->outstanding_reqs);
- ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ if (is_mcq_enabled(hba)) {
+ int utrd_size = sizeof(struct utp_transfer_req_desc);
+
+ spin_lock(&hwq->sq_lock);
+ memcpy(hwq->sqe_base_addr + (hwq->sq_tp_slot * utrd_size),
+ lrbp->utr_descriptor_ptr, utrd_size);
+ ufshcd_inc_tp(hwq);
+ spin_unlock(&hwq->sq_lock);
+ } else {
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ if (hba->vops && hba->vops->setup_xfer_req)
+ hba->vops->setup_xfer_req(hba, lrbp->task_tag, !!lrbp->cmd);
+ __set_bit(lrbp->task_tag, &hba->outstanding_reqs);
+ ufshcd_writel(hba, 1 << lrbp->task_tag,
+ REG_UTP_TRANSFER_REQ_DOOR_BELL);
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ }
}

/**
@@ -2242,6 +2265,12 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba)
if (err)
dev_err(hba->dev, "crypto setup failed\n");

+ hba->mcq_sup = (hba->capabilities & MASK_MCQ_SUPPORT) >> 30;
+ if (hba->mcq_sup) {
+ hba->mcq_capabilities = ufshcd_readl(hba, REG_MCQCAP);
+ hba->ext_iid_sup = (hba->mcq_capabilities & MASK_EXT_IID_SUPPORT)
+ >> 10;
+ }
return err;
}

@@ -2558,7 +2587,8 @@ void ufshcd_prepare_utp_scsi_cmd_upiu(struct ufshcd_lrb *lrbp, u8 upiu_flags)
UPIU_TRANSACTION_COMMAND, upiu_flags,
lrbp->lun, lrbp->task_tag);
ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
- UPIU_COMMAND_SET_TYPE_SCSI, 0, 0, 0);
+ UPIU_COMMAND_SET_TYPE_SCSI |
+ (lrbp->hw_queue_id << 4), 0, 0, 0);

/* Total EHS length and Data segment length will be zero */
ucd_req_ptr->header.dword_2 = 0;
@@ -2706,25 +2736,26 @@ static inline bool is_device_wlun(struct scsi_device *sdev)
*/
static int ufshcd_map_queues(struct Scsi_Host *shost)
{
- int i, ret;
+ int i, queue_offset = 0, ret;
+ struct ufs_hba *hba = shost_priv(shost);

for (i = 0; i < shost->nr_maps; i++) {
struct blk_mq_queue_map *map = &shost->tag_set.map[i];

- switch (i) {
- case HCTX_TYPE_DEFAULT:
- case HCTX_TYPE_POLL:
- map->nr_queues = 1;
- break;
- case HCTX_TYPE_READ:
- map->nr_queues = 0;
+ map->nr_queues = hba->nr_queues[i];
+ if (!map->nr_queues)
continue;
- default:
- WARN_ON_ONCE(true);
- }
- map->queue_offset = 0;
+
+ map->queue_offset = queue_offset;
+ if (i == HCTX_TYPE_POLL && !is_mcq_enabled(hba))
+ map->queue_offset = 0;
+
ret = blk_mq_map_queues(map);
- WARN_ON_ONCE(ret);
+
+ if (ret)
+ return ret;
+
+ queue_offset += map->nr_queues;
}

return 0;
@@ -2764,6 +2795,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
int tag = scsi_cmd_to_rq(cmd)->tag;
struct ufshcd_lrb *lrbp;
int err = 0;
+ struct ufs_hw_queue *hwq;

WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag);

@@ -2826,7 +2858,11 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
(hba->clk_gating.state != CLKS_ON));

- lrbp = &hba->lrb[tag];
+ if (is_mcq_enabled(hba))
+ lrbp = ufshcd_mcq_find_lrb(hba, scsi_cmd_to_rq(cmd), &hwq);
+ else
+ lrbp = &hba->lrb[tag];
+
WARN_ON(lrbp->cmd);
lrbp->cmd = cmd;
lrbp->task_tag = tag;
@@ -2848,8 +2884,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
goto out;
}

- ufshcd_send_command(hba, tag);
-
+ ufshcd_send_command(hba, lrbp, hwq);
out:
rcu_read_unlock();

@@ -2966,7 +3001,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
spin_lock_irqsave(hba->host->host_lock, flags);
hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
- err = ufshcd_get_tr_ocs(lrbp);
+ err = ufshcd_get_tr_ocs(lrbp, hba->dev_cmd.cqe);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
}
@@ -2974,7 +3009,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,

if (!time_left) {
err = -ETIMEDOUT;
- dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
+ dev_err(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
/* successfully cleared the command, retry if needed */
@@ -3005,7 +3040,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
enum dev_cmd_type cmd_type, int timeout)
{
DECLARE_COMPLETION_ONSTACK(wait);
- const u32 tag = hba->reserved_slot;
+ u32 tag = hba->reserved_slot;
struct ufshcd_lrb *lrbp;
int err;

@@ -3014,17 +3049,22 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,

down_read(&hba->clk_scaling_lock);

- lrbp = &hba->lrb[tag];
+ if (is_mcq_enabled(hba)) {
+ tag = hba->dev_cmd_queue->sq_tp_slot;
+ lrbp = &hba->dev_cmd_queue->lrb[tag];
+ } else {
+ lrbp = &hba->lrb[tag];
+ }
WARN_ON(lrbp->cmd);
err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag);
if (unlikely(err))
goto out;

hba->dev_cmd.complete = &wait;
+ hba->dev_cmd.cqe = NULL;

ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr);
-
- ufshcd_send_command(hba, tag);
+ ufshcd_send_command(hba, lrbp, hba->dev_cmd_queue);
err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout);
ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP,
(struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
@@ -3620,11 +3660,11 @@ static int ufshcd_get_ref_clk_gating_wait(struct ufs_hba *hba)
* ufshcd_memory_alloc - allocate memory for host memory space data structures
* @hba: per adapter instance
*
- * 1. Allocate DMA memory for Command Descriptor array
- * Each command descriptor consist of Command UPIU, Response UPIU and PRDT
- * 2. Allocate DMA memory for UTP Transfer Request Descriptor List (UTRDL).
- * 3. Allocate DMA memory for UTP Task Management Request Descriptor List
+ * 1. Allocate DMA memory for UTP Task Management Request Descriptor List
* (UTMRDL)
+ * 2. Allocate DMA memory for Command Descriptor array
+ * Each command descriptor consist of Command UPIU, Response UPIU and PRDT
+ * 3. Allocate DMA memory for UTP Transfer Request Descriptor List (UTRDL).
* 4. Allocate memory for local reference block(lrb).
*
* Returns 0 for success, non-zero in case of failure
@@ -3633,6 +3673,25 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
{
size_t utmrdl_size, utrdl_size, ucdl_size;

+ /*
+ * Allocate memory for UTP Task Management descriptors
+ * UFSHCI requires 1024 byte alignment of UTMRD
+ */
+ utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs;
+ hba->utmrdl_base_addr = dmam_alloc_coherent(hba->dev,
+ utmrdl_size,
+ &hba->utmrdl_dma_addr,
+ GFP_KERNEL);
+ if (!hba->utmrdl_base_addr ||
+ WARN_ON(hba->utmrdl_dma_addr & (PAGE_SIZE - 1))) {
+ dev_err(hba->dev,
+ "Task Management Descriptor Memory allocation failed\n");
+ goto out;
+ }
+
+ if (is_mcq_enabled(hba))
+ return ufshcd_mcq_memory_alloc(hba);
+
/* Allocate memory for UTP command descriptors */
ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs);
hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev,
@@ -3669,22 +3728,6 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
goto out;
}

- /*
- * Allocate memory for UTP Task Management descriptors
- * UFSHCI requires 1024 byte alignment of UTMRD
- */
- utmrdl_size = sizeof(struct utp_task_req_desc) * hba->nutmrs;
- hba->utmrdl_base_addr = dmam_alloc_coherent(hba->dev,
- utmrdl_size,
- &hba->utmrdl_dma_addr,
- GFP_KERNEL);
- if (!hba->utmrdl_base_addr ||
- WARN_ON(hba->utmrdl_dma_addr & (PAGE_SIZE - 1))) {
- dev_err(hba->dev,
- "Task Management Descriptor Memory allocation failed\n");
- goto out;
- }
-
/* Allocate memory for local reference block */
hba->lrb = devm_kcalloc(hba->dev,
hba->nutrs, sizeof(struct ufshcd_lrb),
@@ -3711,7 +3754,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba)
* 3. Save the corresponding addresses of UTRD, UCD.CMD, UCD.RSP and UCD.PRDT
* into local reference block.
*/
-static void ufshcd_host_memory_configure(struct ufs_hba *hba)
+static int ufshcd_host_memory_configure(struct ufs_hba *hba)
{
struct utp_transfer_req_desc *utrdlp;
dma_addr_t cmd_desc_dma_addr;
@@ -3721,6 +3764,9 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba)
int cmd_desc_size;
int i;

+ if (is_mcq_enabled(hba))
+ return ufshcd_mcq_memory_configure(hba);
+
utrdlp = hba->utrdl_base_addr;

response_offset =
@@ -3759,6 +3805,8 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba)

ufshcd_init_lrb(hba, &hba->lrb[i], i);
}
+
+ return 0;
}

/**
@@ -4503,7 +4551,10 @@ int ufshcd_make_hba_operational(struct ufs_hba *hba)
u32 reg;

/* Enable required interrupts */
- ufshcd_enable_intr(hba, UFSHCD_ENABLE_INTRS);
+ if (is_mcq_enabled(hba))
+ ufshcd_enable_intr(hba, UFSHCD_ENABLE_MCQ_INTRS);
+ else
+ ufshcd_enable_intr(hba, UFSHCD_ENABLE_INTRS);

/* Configure interrupt aggregation */
if (ufshcd_is_intr_aggr_allowed(hba))
@@ -4511,11 +4562,17 @@ int ufshcd_make_hba_operational(struct ufs_hba *hba)
else
ufshcd_disable_intr_aggr(hba);

- /* Configure UTRL and UTMRL base address registers */
- ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
- REG_UTP_TRANSFER_REQ_LIST_BASE_L);
- ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr),
- REG_UTP_TRANSFER_REQ_LIST_BASE_H);
+ if (is_mcq_enabled(hba)) {
+ ufshcd_mcq_make_queues_operational(hba);
+ } else {
+ /* Configure UTRL base address registers */
+ ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
+ REG_UTP_TRANSFER_REQ_LIST_BASE_L);
+ ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr),
+ REG_UTP_TRANSFER_REQ_LIST_BASE_H);
+ }
+
+ /* Configure UTMRL base address registers */
ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr),
REG_UTP_TASK_REQ_LIST_BASE_L);
ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr),
@@ -5152,14 +5209,15 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status)
* Returns result of the command to notify SCSI midlayer
*/
static inline int
-ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
+ struct cq_entry *cqe)
{
int result = 0;
int scsi_status;
enum utp_ocs ocs;

/* overall command status of utrd */
- ocs = ufshcd_get_tr_ocs(lrbp);
+ ocs = ufshcd_get_tr_ocs(lrbp, cqe);

if (hba->quirks & UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR) {
if (be32_to_cpu(lrbp->ucd_rsp_ptr->header.dword_1) &
@@ -5323,6 +5381,31 @@ static void ufshcd_release_scsi_cmd(struct ufs_hba *hba,
ufshcd_clk_scaling_update_busy(hba);
}

+void ufshcd_compl_one_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
+ struct cq_entry *cqe)
+{
+ struct scsi_cmnd *cmd;
+
+ lrbp->compl_time_stamp = ktime_get();
+ cmd = lrbp->cmd;
+ if (cmd) {
+ if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
+ ufshcd_update_monitor(hba, lrbp);
+ ufshcd_add_command_trace(hba, lrbp, UFS_CMD_COMP);
+ cmd->result = ufshcd_transfer_rsp_status(hba, lrbp, cqe);
+ ufshcd_release_scsi_cmd(hba, lrbp);
+ /* Do not touch lrbp after scsi done */
+ scsi_done(cmd);
+ } else if ((lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
+ lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) &&
+ hba->dev_cmd.complete) {
+ hba->dev_cmd.cqe = cqe;
+ ufshcd_add_command_trace(hba, lrbp, UFS_DEV_COMP);
+ complete(hba->dev_cmd.complete);
+ ufshcd_clk_scaling_update_busy(hba);
+ }
+}
+
/**
* __ufshcd_transfer_req_compl - handle SCSI and query command completion
* @hba: per adapter instance
@@ -5332,30 +5415,11 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
unsigned long completed_reqs)
{
struct ufshcd_lrb *lrbp;
- struct scsi_cmnd *cmd;
int index;

for_each_set_bit(index, &completed_reqs, hba->nutrs) {
lrbp = &hba->lrb[index];
- lrbp->compl_time_stamp = ktime_get();
- cmd = lrbp->cmd;
- if (cmd) {
- if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
- ufshcd_update_monitor(hba, lrbp);
- ufshcd_add_command_trace(hba, index, UFS_CMD_COMP);
- cmd->result = ufshcd_transfer_rsp_status(hba, lrbp);
- ufshcd_release_scsi_cmd(hba, lrbp);
- /* Do not touch lrbp after scsi done */
- scsi_done(cmd);
- } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE ||
- lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) {
- if (hba->dev_cmd.complete) {
- ufshcd_add_command_trace(hba, index,
- UFS_DEV_COMP);
- complete(hba->dev_cmd.complete);
- ufshcd_clk_scaling_update_busy(hba);
- }
- }
+ ufshcd_compl_one_lrb(hba, lrbp, NULL);
}
}

@@ -5366,9 +5430,15 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
{
struct ufs_hba *hba = shost_priv(shost);
- unsigned long completed_reqs, flags;
+ struct ufs_hw_queue *hwq;
+ unsigned long completed_reqs = 0, flags;
u32 tr_doorbell;

+ if (is_mcq_enabled(hba)) {
+ hwq = &hba->uhq[queue_num + UFSHCD_MCQ_IO_QUEUE_OFFSET];
+ return ufshcd_mcq_poll_cqe_lock(hba, hwq);
+ }
+
spin_lock_irqsave(&hba->outstanding_lock, flags);
tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
completed_reqs = ~tr_doorbell & hba->outstanding_reqs;
@@ -5392,7 +5462,7 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
* IRQ_HANDLED - If interrupt is valid
* IRQ_NONE - If invalid interrupt
*/
-static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
+irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
{
/* Resetting interrupt aggregation counters first and reading the
* DOOR_BELL afterward allows us to handle all the completed requests.
@@ -5416,6 +5486,43 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)

return IRQ_HANDLED;
}
+EXPORT_SYMBOL_GPL(ufshcd_transfer_req_compl);
+
+/**
+ * ufshcd_handle_mcq_cq_events - handle MCQ completion queue events
+ * @hba: per adapter instance
+ *
+ * Returns
+ * IRQ_HANDLED - If interrupt is valid
+ * IRQ_NONE - If invalid interrupt
+ */
+static irqreturn_t ufshcd_handle_mcq_cq_events(struct ufs_hba *hba)
+{
+ struct ufs_hw_queue *hwq;
+ unsigned long outstanding_cqs;
+ unsigned int nr_queues;
+ int i, ret;
+ u32 events;
+
+ ret = ufshcd_vops_get_outstanding_cqs(hba, &outstanding_cqs);
+ if (ret)
+ outstanding_cqs = (1U << hba->nr_hw_queues) - 1;
+
+ /* Exclude the poll queues */
+ nr_queues = hba->nr_hw_queues - hba->nr_queues[HCTX_TYPE_POLL];
+ for_each_set_bit(i, &outstanding_cqs, nr_queues) {
+ hwq = &hba->uhq[i];
+
+ events = ufshcd_mcq_read_cqis(hba, i);
+ if (events)
+ ufshcd_mcq_write_cqis(hba, events, i);
+
+ if (events & UFSHCD_MCQ_CQIS_TEPS)
+ ufshcd_mcq_poll_cqe_nolock(hba, hwq);
+ }
+
+ return IRQ_HANDLED;
+}

int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask)
{
@@ -5947,10 +6054,23 @@ static void ufshcd_exception_event_handler(struct work_struct *work)
ufshcd_scsi_unblock_requests(hba);
}

-/* Complete requests that have door-bell cleared */
+/*
+ * Complete requests that have door-bell cleared and/or pending completion
+ * entries on completion queues if MCQ is enabled
+ */
static void ufshcd_complete_requests(struct ufs_hba *hba)
{
- ufshcd_transfer_req_compl(hba);
+ struct ufs_hw_queue *hwq;
+ int i;
+
+ if (is_mcq_enabled(hba)) {
+ for_each_hw_queue(hba, i) {
+ hwq = &hba->uhq[i];
+ ufshcd_mcq_poll_cqe_lock(hba, hwq);
+ }
+ } else {
+ ufshcd_transfer_req_compl(hba);
+ }
ufshcd_tmc_handler(hba);
}

@@ -6213,7 +6333,6 @@ static void ufshcd_err_handler(struct work_struct *work)
ufshcd_set_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
ufshcd_err_handling_prepare(hba);
- /* Complete requests that have door-bell cleared by h/w */
ufshcd_complete_requests(hba);
spin_lock_irqsave(hba->host->host_lock, flags);
again:
@@ -6605,6 +6724,9 @@ static irqreturn_t ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
if (intr_status & UTP_TRANSFER_REQ_COMPL)
retval |= ufshcd_transfer_req_compl(hba);

+ if (intr_status & MCQ_CQ_EVENT_STATUS)
+ retval |= ufshcd_handle_mcq_cq_events(hba);
+
return retval;
}

@@ -6826,7 +6948,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
enum query_opcode desc_op)
{
DECLARE_COMPLETION_ONSTACK(wait);
- const u32 tag = hba->reserved_slot;
+ u32 tag = hba->reserved_slot;
struct ufshcd_lrb *lrbp;
int err = 0;
u8 upiu_flags;
@@ -6836,7 +6958,12 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,

down_read(&hba->clk_scaling_lock);

- lrbp = &hba->lrb[tag];
+ if (is_mcq_enabled(hba)) {
+ tag = hba->dev_cmd_queue->sq_tp_slot;
+ lrbp = &hba->dev_cmd_queue->lrb[tag];
+ } else {
+ lrbp = &hba->lrb[tag];
+ }
WARN_ON(lrbp->cmd);
lrbp->cmd = NULL;
lrbp->task_tag = tag;
@@ -6869,10 +6996,11 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
memset(lrbp->ucd_rsp_ptr, 0, sizeof(struct utp_upiu_rsp));

hba->dev_cmd.complete = &wait;
+ hba->dev_cmd.cqe = NULL;

ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr);

- ufshcd_send_command(hba, tag);
+ ufshcd_send_command(hba, lrbp, hba->dev_cmd_queue);
/*
* ignore the returning value here - ufshcd_check_query_response is
* bound to fail since dev_cmd.query and dev_cmd.type were left empty.
@@ -7728,6 +7856,9 @@ static int ufs_get_device_desc(struct ufs_hba *hba)
/* getting Specification Version in big endian format */
dev_info->wspecversion = desc_buf[DEVICE_DESC_PARAM_SPEC_VER] << 8 |
desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1];
+
+ dev_info->bqueuedepth = desc_buf[DEVICE_DESC_PARAM_Q_DPTH];
+
b_ufs_feature_sup = desc_buf[DEVICE_DESC_PARAM_UFS_FEAT];

model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
@@ -8184,6 +8315,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
ret = ufshcd_device_params_init(hba);
if (ret)
goto out;
+
+ if (is_mcq_enabled(hba) && hba->dev_info.bqueuedepth)
+ ufshcd_mcq_config_mac(hba);
}

ufshcd_tune_unipro_params(hba);
@@ -9641,6 +9775,10 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
goto out_disable;
}

+ err = ufshcd_mcq_init(hba);
+ if (err)
+ dev_err(hba->dev, "MCQ init failed\n");
+
/* Allocate memory for host memory space */
err = ufshcd_memory_alloc(hba);
if (err) {
@@ -9651,8 +9789,14 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
/* Configure LRB */
ufshcd_host_memory_configure(hba);

- host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
- host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
+ if (!is_mcq_enabled(hba)) {
+ hba->nr_queues[HCTX_TYPE_DEFAULT] = 1;
+ hba->nr_queues[HCTX_TYPE_POLL] = 1;
+ hba->nr_hw_queues = 1;
+
+ host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED;
+ host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED;
+ }
host->max_id = UFSHCD_MAX_ID;
host->max_lun = UFS_MAX_LUNS;
host->max_channel = UFSHCD_MAX_CHANNEL;
@@ -9714,6 +9858,14 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
hba->is_irq_enabled = true;
}

+ if (is_mcq_enabled(hba)) {
+ err = ufshcd_vops_config_mcq_esi(hba);
+ if (err)
+ dev_err(hba->dev, "ESI is not available %d\n", err);
+ else
+ hba->use_mcq_esi = true;
+ }
+
err = scsi_add_host(host, hba->dev);
if (err) {
dev_err(hba->dev, "scsi_add_host failed\n");
diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h
index 1bba3fe..b137aae 100644
--- a/include/ufs/ufs.h
+++ b/include/ufs/ufs.h
@@ -589,6 +589,7 @@ struct ufs_dev_info {
u8 *model;
u16 wspecversion;
u32 clk_gating_wait_us;
+ u8 bqueuedepth;

/* UFS HPB related flag */
bool hpb_enabled;
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 7fe1a92..8e90835 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -17,6 +17,7 @@
#include <linux/blk-mq.h>
#include <linux/devfreq.h>
#include <linux/pm_runtime.h>
+#include <linux/msi.h>
#include <scsi/scsi_device.h>
#include <ufs/unipro.h>
#include <ufs/ufs.h>
@@ -192,6 +193,7 @@ struct ufshcd_lrb {
#endif

bool req_abort_skip;
+ u32 hw_queue_id;
};

/**
@@ -218,6 +220,7 @@ struct ufs_dev_cmd {
struct mutex lock;
struct completion *complete;
struct ufs_query query;
+ struct cq_entry *cqe;
};

/**
@@ -293,6 +296,9 @@ struct ufs_pwr_mode_info {
* @config_scaling_param: called to configure clock scaling parameters
* @program_key: program or evict an inline encryption key
* @event_notify: called to notify important events
+ * @get_outstanding_cqs: called to get oustanding completion queues
+ * @config_mcq_rop: called to config Runtime Operation Pointers
+ * @config_mcq_esi: called to config MCQ ESI handlers
*/
struct ufs_hba_variant_ops {
const char *name;
@@ -331,6 +337,10 @@ struct ufs_hba_variant_ops {
const union ufs_crypto_cfg_entry *cfg, int slot);
void (*event_notify)(struct ufs_hba *hba,
enum ufs_event_type evt, void *data);
+ int (*get_outstanding_cqs)(struct ufs_hba *hba,
+ unsigned long *ocqs);
+ int (*config_mcq_rop)(struct ufs_hba *hba);
+ int (*config_mcq_esi)(struct ufs_hba *hba);
};

/* clock gating state */
@@ -713,6 +723,39 @@ struct ufs_hba_monitor {
bool enabled;
};

+struct ufshcd_res_info_t {
+ char *name;
+ struct resource *resource;
+ bool is_alloc;
+ void __iomem *base;
+};
+
+enum ufshcd_res {
+ RES_MEM,
+ RES_MCQ,
+ RES_MCQ_SQD,
+ RES_MCQ_SQIS,
+ RES_MCQ_CQD,
+ RES_MCQ_CQIS,
+ RES_MCQ_VS,
+ RES_MAX,
+};
+
+/* MCQ Runtime Operation Pointer info structure */
+struct ufshcd_mcq_rop_info_t {
+ unsigned long offset;
+ unsigned long stride;
+ void __iomem *base;
+};
+
+enum ufshcd_mcq_rop {
+ ROP_SQD,
+ ROP_SQIS,
+ ROP_CQD,
+ ROP_CQIS,
+ ROP_MAX,
+};
+
/**
* struct ufs_hba - per adapter private structure
* @mmio_base: UFSHCI base register address
@@ -737,6 +780,7 @@ struct ufs_hba_monitor {
* @outstanding_lock: Protects @outstanding_reqs.
* @outstanding_reqs: Bits representing outstanding transfer requests
* @capabilities: UFS Controller Capabilities
+ * @mcq_capabilities: UFS Multi Command Queue capabilities
* @nutrs: Transfer Request Queue depth supported by controller
* @nutmrs: Task Management Queue depth supported by controller
* @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock.
@@ -818,6 +862,14 @@ struct ufs_hba_monitor {
* device
* @complete_put: whether or not to call ufshcd_rpm_put() from inside
* ufshcd_resume_complete()
+ * @uhq: array of supported hardware queues
+ * @mcq_base: Multi command queue registers base address
+ * @ucd_pool: dma pool of UCD descriptors
+ * @dao_offset: value used to calculate the SQ and CQ DAO
+ * @use_mcq: track if MCQ is enabled
+ * @mcq_sup: track if MCQ is supported by UFSHC
+ * @ext_iid_sup: EXT_IID support by UFS device
+ * @use_mcq_esi: track if MCQ ESI is used
*/
struct ufs_hba {
void __iomem *mmio_base;
@@ -858,6 +910,7 @@ struct ufs_hba {
unsigned long outstanding_reqs;

u32 capabilities;
+ u32 mcq_capabilities;
int nutrs;
int nutmrs;
u32 reserved_slot;
@@ -965,8 +1018,133 @@ struct ufs_hba {
#endif
u32 luns_avail;
bool complete_put;
+ struct ufs_hw_queue *uhq;
+ struct ufs_hw_queue *dev_cmd_queue;
+ unsigned int nr_queues[HCTX_MAX_TYPES];
+ unsigned int nr_hw_queues;
+ void __iomem *mcq_base;
+ struct ufshcd_res_info_t res[RES_MAX];
+ struct ufshcd_mcq_rop_info_t mcq_rop[ROP_MAX];
+ struct dma_pool *ucd_pool;
+ u32 dao_offset;
+ bool use_mcq;
+ bool mcq_sup;
+ bool ext_iid_sup;
+ bool use_mcq_esi;
+};
+
+enum {
+ ESIVEC_CQ,
+ ESIVEC_SQ,
+ ESIVEC_MAX,
+};
+
+/**
+ * @ucdl_base_addr: UFS Command Descriptor base address
+ * @sqe_base_addr: submission queue entry base address
+ * @sqe_shadow_addr: submission queue entry shadow address
+ * @ucdl_dma_addr: UFS Command Descriptor DMA address
+ * @sqe_dma_addr: submission queue dma address
+ * @cqe_base_addr: completion queue base address
+ * @cqe_dma_addr: completion queue dma address
+ * @lrb: array of lrb for this hardware queue
+ * @max_entries: max number of slots in this hardware queue
+ * @sq_tp_slot: current slot to which SQ tail pointer is pointing
+ * @sq_hp_slot: current slot to which SQ head pointer is pointing
+ * @cq_tp_slot: current slot to which CQ tail pointer is pointing
+ * @cq_hp_slot: current slot to which CQ head pointer is pointing
+ */
+struct ufs_hw_queue {
+ struct utp_transfer_cmd_desc *ucdl_base_addr;
+ void *sqe_base_addr;
+ struct utp_transfer_req_desc *sqe_shadow_addr;
+ dma_addr_t ucdl_dma_addr;
+ dma_addr_t sqe_dma_addr;
+ struct cq_entry *cqe_base_addr;
+ dma_addr_t cqe_dma_addr;
+ struct ufshcd_lrb *lrb;
+ u32 max_entries;
+ u32 id;
+
+ void __iomem *mcq_sq_hp;
+ void __iomem *mcq_sq_tp;
+ void __iomem *mcq_cq_hp;
+ void __iomem *mcq_cq_tp;
+
+ spinlock_t sq_lock;
+ u32 sq_tp_slot;
+ u32 sq_hp_slot;
+ spinlock_t cq_lock;
+ u32 cq_tp_slot;
+ u32 cq_hp_slot;
};

+#define for_each_hw_queue(hba, i) \
+ for ((i) = 0; (i) < (hba)->nr_hw_queues; (i) ++)
+
+#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
+
+static inline bool is_mcq_enabled(struct ufs_hba *hba)
+{
+ return hba->use_mcq;
+}
+
+static inline bool is_mcq_supported(struct ufs_hba *hba)
+{
+ return hba->mcq_sup;
+}
+
+static inline bool ufshcd_is_hwq_full(struct ufs_hw_queue *q)
+{
+ return (q->sq_hp_slot == ((q->sq_tp_slot + 1) %
+ q->max_entries));
+}
+
+static inline bool ufshcd_is_hwq_empty(struct ufs_hw_queue *q)
+{
+ return (q->sq_tp_slot == q->sq_hp_slot);
+}
+
+static inline void ufshcd_inc_tp(struct ufs_hw_queue *q)
+{
+ u32 next_slot = ((q->sq_tp_slot + 1) % q->max_entries);
+ u32 val = next_slot * sizeof(struct utp_transfer_req_desc);
+
+ q->sq_tp_slot = next_slot;
+
+ writel(val, q->mcq_sq_tp);
+}
+
+static inline struct cq_entry *ufshcd_mcq_cur_cqe(struct ufs_hw_queue *q)
+{
+ struct cq_entry *cqe = q->cqe_base_addr;
+
+ return cqe + q->cq_hp_slot;
+}
+
+static inline bool ufshcd_mcq_is_cq_empty(struct ufs_hw_queue *q)
+{
+ return q->cq_hp_slot == q->cq_tp_slot;
+}
+
+static inline void ufshcd_mcq_inc_cq_hp_slot(struct ufs_hw_queue *q)
+{
+ q->cq_hp_slot ++;
+ if (q->cq_hp_slot == q->max_entries)
+ q->cq_hp_slot = 0;
+}
+
+static inline void ufshcd_mcq_update_cq_hp(struct ufs_hw_queue *q)
+{
+ writel(q->cq_hp_slot * sizeof(struct cq_entry), q->mcq_cq_hp);
+}
+
+static inline void ufshcd_mcq_update_cq_tp_slot(struct ufs_hw_queue *q)
+{
+ u32 val = readl(q->mcq_cq_tp);
+ q->cq_tp_slot = val / sizeof(struct cq_entry);
+}
+
/* Returns true if clocks can be gated. Otherwise false */
static inline bool ufshcd_is_clkgating_allowed(struct ufs_hba *hba)
{
@@ -1017,11 +1195,16 @@ static inline bool ufshcd_is_wb_allowed(struct ufs_hba *hba)
return hba->caps & UFSHCD_CAP_WB_EN;
}

-#define ufshcd_writel(hba, val, reg) \
+#define ufshcd_writel(hba, val, reg) \
writel((val), (hba)->mmio_base + (reg))
#define ufshcd_readl(hba, reg) \
readl((hba)->mmio_base + (reg))

+#define ufsmcq_writel(hba, val, reg) \
+ writel((val), (hba)->mcq_base + (reg))
+#define ufsmcq_readl(hba, reg) \
+ readl((hba)->mcq_base + (reg))
+
/**
* ufshcd_rmwl - perform read/modify/write for a controller register
* @hba: per adapter instance
@@ -1100,6 +1283,27 @@ extern int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel,
extern int ufshcd_config_pwr_mode(struct ufs_hba *hba,
struct ufs_pa_layer_attr *desired_pwr_mode);
extern int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode);
+extern void ufshcd_compl_one_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
+ struct cq_entry *cqe);
+extern int ufshcd_mcq_init(struct ufs_hba *hba);
+extern void ufshcd_mcq_enable(struct ufs_hba *hba);
+extern int ufshcd_mcq_memory_alloc(struct ufs_hba *hba);
+extern int ufshcd_mcq_memory_configure(struct ufs_hba *hba);
+extern void ufshcd_mcq_config_mac(struct ufs_hba *hba);
+extern u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i);
+extern void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i);
+extern void ufshcd_mcq_enable_cq_intr(struct ufs_hba *hba, u32 intrs);
+extern void ufshcd_mcq_disable_cq_intr(struct ufs_hba *hba, u32 intrs);
+extern void ufshcd_mcq_enable_esi(struct ufs_hba *hba);
+extern void ufshcd_mcq_config_esi(struct ufs_hba *hba, struct msi_msg *msg);
+extern unsigned long ufshcd_mcq_poll_cqe_nolock(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq);
+extern unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba,
+ struct ufs_hw_queue *hwq);
+extern void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba);
+extern struct ufshcd_lrb * ufshcd_mcq_find_lrb(struct ufs_hba *hba,
+ struct request *req,
+ struct ufs_hw_queue **hq);

/* UIC command interfaces for DME primitives */
#define DME_LOCAL 0
@@ -1232,6 +1436,31 @@ static inline int ufshcd_vops_phy_initialization(struct ufs_hba *hba)
return 0;
}

+static inline int ufshcd_vops_get_outstanding_cqs(struct ufs_hba *hba,
+ unsigned long *ocqs)
+{
+ if (hba->vops && hba->vops->get_outstanding_cqs)
+ return hba->vops->get_outstanding_cqs(hba, ocqs);
+
+ return -EOPNOTSUPP;
+}
+
+static inline int ufshcd_vops_config_mcq_rop(struct ufs_hba *hba)
+{
+ if (hba->vops && hba->vops->config_mcq_rop)
+ return hba->vops->config_mcq_rop(hba);
+
+ return -EOPNOTSUPP;
+}
+
+static inline int ufshcd_vops_config_mcq_esi(struct ufs_hba *hba)
+{
+ if (hba->vops && hba->vops->config_mcq_esi)
+ return hba->vops->config_mcq_esi(hba);
+
+ return -EOPNOTSUPP;
+}
+
extern const struct ufs_pm_lvl_states ufs_pm_lvl_states[];

int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index f81aa95..2643efd 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -22,6 +22,7 @@ enum {
/* UFSHCI Registers */
enum {
REG_CONTROLLER_CAPABILITIES = 0x00,
+ REG_MCQCAP = 0x04,
REG_UFS_VERSION = 0x08,
REG_CONTROLLER_DEV_ID = 0x10,
REG_CONTROLLER_PROD_ID = 0x14,
@@ -56,9 +57,24 @@ enum {
REG_UFS_CCAP = 0x100,
REG_UFS_CRYPTOCAP = 0x104,

+ REG_UFS_MEM_CFG = 0x300,
+ REG_UFS_MCQ_CFG = 0x380,
+ REG_UFS_ESILBA = 0x384,
+ REG_UFS_ESIUBA = 0x388,
UFSHCI_CRYPTO_REG_SPACE_SIZE = 0x400,
};

+#define MCQ_CFG_MAC_OFFSET 8
+#define MCQ_CFG_MAC_MASK UFS_MASK(0x1FF, MCQ_CFG_MAC_OFFSET)
+
+#define MCQ_QCFGPTR_MASK 0xff
+#define MCQ_QCFGPTR_UNIT 0x200
+#define mcq_sqattr_offset(c) \
+ (((c) >> 16) & MCQ_QCFGPTR_MASK) * MCQ_QCFGPTR_UNIT
+
+#define MCQ_ENTRY_SIZE_IN_DWORD 8
+#define MCQ_QCFG_SIZE 0x40
+
/* Controller capability masks */
enum {
MASK_TRANSFER_REQUESTS_SLOTS = 0x0000001F,
@@ -68,6 +84,53 @@ enum {
MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000,
MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000,
MASK_CRYPTO_SUPPORT = 0x10000000,
+ MASK_MCQ_SUPPORT = 0x40000000,
+};
+
+/* MCQ capability mask */
+enum {
+ MASK_EXT_IID_SUPPORT = 0x00000400,
+ MASK_ROUND_ROBIN_PRI_SUPP = 0x00000200,
+};
+
+enum {
+ REG_SQATTR = 0x0,
+ REG_SQLBA = 0x4,
+ REG_SQUBA = 0x8,
+ REG_SQDAO = 0xC,
+ REG_SQISAO = 0x10,
+ REG_SQCFG = 0x14,
+
+ REG_CQATTR = 0x20,
+ REG_CQLBA = 0x24,
+ REG_CQUBA = 0x28,
+ REG_CQDAO = 0x2C,
+ REG_CQISAO = 0x30,
+ REG_CQCFG = 0x34,
+};
+
+enum {
+ REG_SQHP = 0x0,
+ REG_SQTP = 0x4,
+ REG_SQRTC = 0x8,
+ REG_SQCTI = 0xC,
+ REG_SQRTS = 0x10,
+};
+
+enum {
+ REG_SQIS = 0x0,
+ REG_SQIE = 0x4,
+};
+
+enum {
+ REG_CQHP = 0x0,
+ REG_CQTP = 0x4,
+};
+
+enum {
+ REG_CQIS = 0x0,
+ REG_CQIE = 0x4,
+ REG_MCQIACR = 0x8,
};

#define UFS_MASK(mask, offset) ((mask) << (offset))
@@ -126,6 +189,8 @@ static inline u32 ufshci_version(u32 major, u32 minor)
#define CONTROLLER_FATAL_ERROR 0x10000
#define SYSTEM_BUS_FATAL_ERROR 0x20000
#define CRYPTO_ENGINE_FATAL_ERROR 0x40000
+#define MCQ_SQ_EVENT_STATUS 0x80000
+#define MCQ_CQ_EVENT_STATUS 0x100000

#define UFSHCD_UIC_HIBERN8_MASK (UIC_HIBERNATE_ENTER |\
UIC_HIBERNATE_EXIT)
@@ -227,6 +292,9 @@ enum {
/* UTMRLRSR - UTP Task Management Request Run-Stop Register 80h */
#define UTP_TASK_REQ_LIST_RUN_STOP_BIT 0x1

+/* CQISy - CQ y Interrupt Status Register */
+#define UFSHCD_MCQ_CQIS_TEPS 0x1
+
/* UICCMD - UIC Command */
#define COMMAND_OPCODE_MASK 0xFF
#define GEN_SELECTOR_INDEX_MASK 0xFFFF
@@ -482,6 +550,27 @@ struct utp_transfer_req_desc {
__le16 prd_table_offset;
};

+struct cq_entry {
+
+ /* DW 0-1 */
+ __le32 command_desc_base_addr_lo;
+ __le32 command_desc_base_addr_hi;
+
+ /* DW 2 */
+ __le16 response_upiu_length;
+ __le16 response_upiu_offset;
+
+ /* DW 3 */
+ __le16 prd_table_length;
+ __le16 prd_table_offset;
+
+ /* DW 4 */
+ __le32 status;
+
+ /* DW 5-7 */
+ u32 reserved[3];
+};
+
/*
* UTMRD structure.
*/
--
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project.


2022-07-19 23:05:56

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/19/22 00:01, Can Guo wrote:
> +/**
> + * @ucdl_base_addr: UFS Command Descriptor base address
> + * @sqe_base_addr: submission queue entry base address
> + * @sqe_shadow_addr: submission queue entry shadow address
> + * @ucdl_dma_addr: UFS Command Descriptor DMA address
> + * @sqe_dma_addr: submission queue dma address
> + * @cqe_base_addr: completion queue base address
> + * @cqe_dma_addr: completion queue dma address
> + * @lrb: array of lrb for this hardware queue
> + * @max_entries: max number of slots in this hardware queue
> + * @sq_tp_slot: current slot to which SQ tail pointer is pointing
> + * @sq_hp_slot: current slot to which SQ head pointer is pointing
> + * @cq_tp_slot: current slot to which CQ tail pointer is pointing
> + * @cq_hp_slot: current slot to which CQ head pointer is pointing
> + */
> +struct ufs_hw_queue {
> + struct utp_transfer_cmd_desc *ucdl_base_addr;
> + void *sqe_base_addr;
> + struct utp_transfer_req_desc *sqe_shadow_addr;
> + dma_addr_t ucdl_dma_addr;
> + dma_addr_t sqe_dma_addr;
> + struct cq_entry *cqe_base_addr;
> + dma_addr_t cqe_dma_addr;
> + struct ufshcd_lrb *lrb;
> + u32 max_entries;
> + u32 id;
> +
> + void __iomem *mcq_sq_hp;
> + void __iomem *mcq_sq_tp;
> + void __iomem *mcq_cq_hp;
> + void __iomem *mcq_cq_tp;
> +
> + spinlock_t sq_lock;
> + u32 sq_tp_slot;
> + u32 sq_hp_slot;
> + spinlock_t cq_lock;
> + u32 cq_tp_slot;
> + u32 cq_hp_slot;
> };
>

Please move all new data structures into a private header that can be
moved into a private header. I think the above data structure can be
moved from a public into a private header (a header that is not shared
with the host drivers).

> +#define for_each_hw_queue(hba, i) \
> + for ((i) = 0; (i) < (hba)->nr_hw_queues; (i) ++)

A macro like the above reduces code readability. Please remove this
macro definition.

> +static inline bool is_mcq_enabled(struct ufs_hba *hba)
> +{
> + return hba->use_mcq;
> +}
> +
> +static inline bool is_mcq_supported(struct ufs_hba *hba)
> +{
> + return hba->mcq_sup;
> +}

The names of the two above functions are longer than their
implementation so it's probably better to remove these function definitions.

> -#define ufshcd_writel(hba, val, reg) \
> +#define ufshcd_writel(hba, val, reg) \
> writel((val), (hba)->mmio_base + (reg))

Is this a whitespace-only change? If so, should that change be in this
patch?

Thanks,

Bart.

2022-07-19 23:10:12

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/19/22 00:01, Can Guo wrote:
> Adds MCQ support to UFS driver.

The description of this patch is too short. It should be explained how
the UFSHCI queues are made visible to the block layer. It should also be
explained which roles are assigned to queues and how (HCTX_TYPE_*). How
the MAXQ configuration register is handled should also be explained.

The host lock is obtained in multiple UFSHCI 3.0 code paths. Information
about the role of the host lock in MCQ code should be provided.

Thanks,

Bart.

2022-07-20 08:09:32

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Can,

I love your patch! Perhaps something to improve:

[auto build test WARNING on jejb-scsi/for-next]
[also build test WARNING on mkp-scsi/for-next next-20220719]
[cannot apply to linus/master v5.19-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: x86_64-randconfig-r036-20220718 (https://download.01.org/0day-ci/archive/20220720/[email protected]/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project dd5635541cd7bbd62cd59b6694dfb759b6e9a0d8)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
git checkout 2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/ufs/core/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All warnings (new ones prefixed by >>):

>> drivers/ufs/core/ufshcd.c:5465:13: warning: no previous prototype for function 'ufshcd_transfer_req_compl' [-Wmissing-prototypes]
irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
^
drivers/ufs/core/ufshcd.c:5465:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
^
static
1 warning generated.


vim +/ufshcd_transfer_req_compl +5465 drivers/ufs/core/ufshcd.c

5456
5457 /**
5458 * ufshcd_transfer_req_compl - handle SCSI and query command completion
5459 * @hba: per adapter instance
5460 *
5461 * Returns
5462 * IRQ_HANDLED - If interrupt is valid
5463 * IRQ_NONE - If invalid interrupt
5464 */
> 5465 irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
5466 {
5467 /* Resetting interrupt aggregation counters first and reading the
5468 * DOOR_BELL afterward allows us to handle all the completed requests.
5469 * In order to prevent other interrupts starvation the DB is read once
5470 * after reset. The down side of this solution is the possibility of
5471 * false interrupt if device completes another request after resetting
5472 * aggregation and before reading the DB.
5473 */
5474 if (ufshcd_is_intr_aggr_allowed(hba) &&
5475 !(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR))
5476 ufshcd_reset_intr_aggr(hba);
5477
5478 if (ufs_fail_completion())
5479 return IRQ_HANDLED;
5480
5481 /*
5482 * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
5483 * do not want polling to trigger spurious interrupt complaints.
5484 */
5485 ufshcd_poll(hba->host, 0);
5486
5487 return IRQ_HANDLED;
5488 }
5489 EXPORT_SYMBOL_GPL(ufshcd_transfer_req_compl);
5490

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-07-20 11:50:46

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Can,

I love your patch! Yet something to improve:

[auto build test ERROR on jejb-scsi/for-next]
[also build test ERROR on mkp-scsi/for-next next-20220719]
[cannot apply to linus/master v5.19-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: i386-randconfig-a005 (https://download.01.org/0day-ci/archive/20220720/[email protected]/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
git checkout 2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/ufs/core/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

drivers/ufs/core/ufs-mcq.c: In function 'ufshcd_mcq_release_resource':
>> drivers/ufs/core/ufs-mcq.c:275:25: error: implicit declaration of function 'devm_iounmap'; did you mean 'pci_iounmap'? [-Werror=implicit-function-declaration]
275 | devm_iounmap(hba->dev, res->base);
| ^~~~~~~~~~~~
| pci_iounmap
cc1: some warnings being treated as errors


vim +275 drivers/ufs/core/ufs-mcq.c

265
266 static void ufshcd_mcq_release_resource(struct ufs_hba *hba)
267 {
268 struct ufshcd_res_info_t *res;
269 int i;
270
271 for (i = RES_MCQ; i < RES_MAX; i++) {
272 res = &hba->res[i];
273
274 if(res->base) {
> 275 devm_iounmap(hba->dev, res->base);
276 res->base = NULL;
277 }
278
279 if (res->is_alloc)
280 devm_kfree(hba->dev, res->resource);
281 }
282 }
283

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-07-20 17:07:24

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Can,

I love your patch! Yet something to improve:

[auto build test ERROR on jejb-scsi/for-next]
[also build test ERROR on mkp-scsi/for-next next-20220720]
[cannot apply to linus/master v5.19-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: i386-randconfig-a005-20220718 (https://download.01.org/0day-ci/archive/20220721/[email protected]/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project dd5635541cd7bbd62cd59b6694dfb759b6e9a0d8)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
git checkout 2b7356bcd24efd2d6b69f04dd9fd010c4256cc7e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash drivers/ufs/

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> drivers/ufs/core/ufs-mcq.c:275:4: error: call to undeclared function 'devm_iounmap'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
devm_iounmap(hba->dev, res->base);
^
1 error generated.


vim +/devm_iounmap +275 drivers/ufs/core/ufs-mcq.c

265
266 static void ufshcd_mcq_release_resource(struct ufs_hba *hba)
267 {
268 struct ufshcd_res_info_t *res;
269 int i;
270
271 for (i = RES_MCQ; i < RES_MAX; i++) {
272 res = &hba->res[i];
273
274 if(res->base) {
> 275 devm_iounmap(hba->dev, res->base);
276 res->base = NULL;
277 }
278
279 if (res->is_alloc)
280 devm_kfree(hba->dev, res->resource);
281 }
282 }
283

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-07-20 18:04:27

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Bart,

On 7/19/2022 4:07 PM, Bart Van Assche wrote:
> On 7/19/22 00:01, Can Guo wrote:
>> Adds MCQ support to UFS driver.
>
> The description of this patch is too short. It should be explained how
> the UFSHCI queues are made visible to the block layer. It should also be
> explained which roles are assigned to queues and how (HCTX_TYPE_*). How
> the MAXQ configuration register is handled should also be explained.
>
> The host lock is obtained in multiple UFSHCI 3.0 code paths. Information
> about the role of the host lock in MCQ code should be provided.
>
> Thanks,
>
> Bart.
Thanks for having taken a look.
I'll check the comments and upload a next version.

-asd

2022-07-22 07:41:22

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> +static int ufshcd_mcq_config_resource(struct ufs_hba *hba)
> +{
> + struct platform_device *pdev = to_platform_device(hba->dev);
> + struct ufshcd_res_info_t *res;
> + struct resource *res_mem, *res_mcq;
> + int i, ret = 0;
> +
> + memcpy(hba->res, ufshcd_res_info, sizeof(ufshcd_res_info));
> +
> + for (i = 0; i < RES_MAX; i++) {
> + res = &hba->res[i];
> +
> + res->resource = platform_get_resource_byname(pdev,
> + IORESOURCE_MEM,
> + res->name);
> + if (!res->resource) {
> + dev_info(hba->dev, "Resource %s not provided\n", res-
> >name);
> + if (i == RES_MEM)
> + return -ENOMEM;
> + continue;
> + } else if (i == RES_MEM) {
> + res_mem = res->resource;
> + res->base = hba->mmio_base;
> + continue;
> + }
> +
> + res->base = devm_ioremap_resource(hba->dev, res->resource);
> + if (IS_ERR(res->base)) {
> + dev_err(hba->dev, "Failed to map res %s, err = %d\n",
> + res->name, (int)PTR_ERR(res->base));
> + res->base = NULL;
> + ret = PTR_ERR(res->base);
> + goto out_err;
> + }
> + }
> +
> + res = &hba->res[RES_MCQ];
> + /* MCQ resource provided */
> + if (res->base)
> + goto out;
> +
> + /* Manually allocate MCQ resource */
Did you consider to force providing the MCQ configuration?

> + res_mcq = res->resource;
> + res_mcq = devm_kzalloc(hba->dev, sizeof(*res_mcq), GFP_KERNEL);
> + if (!res_mcq) {
> + dev_err(hba->dev, "Failed to alloate MCQ resource\n");
> + goto out_err;
> + }
> + res->is_alloc = true;
> +
> + res_mcq->start = res_mem->start +
> + mcq_sqattr_offset(hba->mcq_capabilities);
> + res_mcq->end = res_mcq->start + 32 * MCQ_QCFG_SIZE - 1;
Shouldn't there can be MCQCap.MAXQ queues and no more than 32?


> +int ufshcd_mcq_init(struct ufs_hba *hba)
> +{
> + struct Scsi_Host *host = hba->host;
> + struct ufs_hw_queue *hwq;
> + int i, ret = 0;
> +
> + if (!is_mcq_supported(hba))
> + return 0;
> +
> + ret = ufshcd_mcq_config_resource(hba);
> + if (ret) {
> + dev_err(hba->dev, "Failed to config MCQ resource\n");
> + return ret;
> + }
> +
> + ret = ufshcd_vops_config_mcq_rop(hba);
> + if (ret) {
> + dev_err(hba->dev, "MCQ Runtime Operation Pointers not
> configured\n");
> + goto out_err;
> + }
> +
> + hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
> + hba->nr_queues[HCTX_TYPE_READ] = 0;
> + hba->nr_queues[HCTX_TYPE_POLL] = 1;
> +
> + for (i = 0; i < HCTX_MAX_TYPES; i++)
> + host->nr_hw_queues += hba->nr_queues[i];
> +
> + host->can_queue = hba->nutrs;
> + host->cmd_per_lun = hba->nutrs;
> +
> + /* One more reserved for dev_cmd_queue */
> + hba->nr_hw_queues = host->nr_hw_queues + 1;
Is it possible, since MCQ memory space is *added* to the UTR & UTMR lists,
That we'll keep using the legacy doorbell for query commands?
Wouldn't it will simplify the hw_queue bookkeeping


> -#define ufshcd_hex_dump(prefix_str, buf, len) do { \
> - size_t __len = (len); \
> - print_hex_dump(KERN_ERR, prefix_str, \
> - __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE,\
> - 16, 4, buf, __len, false); \
> +#define ufshcd_hex_dump(prefix_str, buf, len) do { \
> + size_t __len = (len); \
> + \
> + print_hex_dump(KERN_ERR, prefix_str, \
> + __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE, \
> + 16, 4, buf, __len, false); \
> + \
> } while (0)
Should this be part of this patch?

> +#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
Maybe add a comment above: "queue 0 is reserved for query commands" or something
That is if the query commands don't use the legacy doorbell

> +static inline bool ufshcd_is_hwq_full(struct ufs_hw_queue *q)
> +{
> + return (q->sq_hp_slot == ((q->sq_tp_slot + 1) %
> + q->max_entries));
> +}
Isn't sq_tp_slot is already % q->max_entries ?


Thanks,
Avri

2022-07-22 14:54:41

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1];
> +
> + dev_info->bqueuedepth = desc_buf[DEVICE_DESC_PARAM_Q_DPTH];
> +
> b_ufs_feature_sup = desc_buf[DEVICE_DESC_PARAM_UFS_FEAT];
>
> model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME];
> @@ -8184,6 +8315,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba,
> bool init_dev_params)
> ret = ufshcd_device_params_init(hba);
> if (ret)
> goto out;
> +
> + if (is_mcq_enabled(hba) && hba->dev_info.bqueuedepth)
> + ufshcd_mcq_config_mac(hba);
> }
So what happens if the device does not implements the shared queueing architecture.
MCQ is still enabled?

Thanks,
Avri

2022-07-22 18:19:57

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Avri
Thanks for taking a look.

On 7/22/2022 12:31 AM, Avri Altman wrote:
>> +static int ufshcd_mcq_config_resource(struct ufs_hba *hba)
>> +{
>> + struct platform_device *pdev = to_platform_device(hba->dev);
>> + struct ufshcd_res_info_t *res;
>> + struct resource *res_mem, *res_mcq;
>> + int i, ret = 0;
>> +
>> + memcpy(hba->res, ufshcd_res_info, sizeof(ufshcd_res_info));
>> +
>> + for (i = 0; i < RES_MAX; i++) {
>> + res = &hba->res[i];
>> +
>> + res->resource = platform_get_resource_byname(pdev,
>> + IORESOURCE_MEM,
>> + res->name);
>> + if (!res->resource) {
>> + dev_info(hba->dev, "Resource %s not provided\n", res-
>>> name);
>> + if (i == RES_MEM)
>> + return -ENOMEM;
>> + continue;
>> + } else if (i == RES_MEM) {
>> + res_mem = res->resource;
>> + res->base = hba->mmio_base;
>> + continue;
>> + }
>> +
>> + res->base = devm_ioremap_resource(hba->dev, res->resource);
>> + if (IS_ERR(res->base)) {
>> + dev_err(hba->dev, "Failed to map res %s, err = %d\n",
>> + res->name, (int)PTR_ERR(res->base));
>> + res->base = NULL;
>> + ret = PTR_ERR(res->base);
>> + goto out_err;
>> + }
>> + }
>> +
>> + res = &hba->res[RES_MCQ];
>> + /* MCQ resource provided */
>> + if (res->base)
>> + goto out;
>> +
>> + /* Manually allocate MCQ resource */
> Did you consider to force providing the MCQ configuration?
>
>> + res_mcq = res->resource;
>> + res_mcq = devm_kzalloc(hba->dev, sizeof(*res_mcq), GFP_KERNEL);
>> + if (!res_mcq) {
>> + dev_err(hba->dev, "Failed to alloate MCQ resource\n");
>> + goto out_err;
>> + }
>> + res->is_alloc = true;
>> +
>> + res_mcq->start = res_mem->start +
>> + mcq_sqattr_offset(hba->mcq_capabilities);
>> + res_mcq->end = res_mcq->start + 32 * MCQ_QCFG_SIZE - 1;
> Shouldn't there can be MCQCap.MAXQ queues and no more than 32?
>
Yes correct. Will change it in the next version.
>
>> +int ufshcd_mcq_init(struct ufs_hba *hba)
>> +{
>> + struct Scsi_Host *host = hba->host;
>> + struct ufs_hw_queue *hwq;
>> + int i, ret = 0;
>> +
>> + if (!is_mcq_supported(hba))
>> + return 0;
>> +
>> + ret = ufshcd_mcq_config_resource(hba);
>> + if (ret) {
>> + dev_err(hba->dev, "Failed to config MCQ resource\n");
>> + return ret;
>> + }
>> +
>> + ret = ufshcd_vops_config_mcq_rop(hba);
>> + if (ret) {
>> + dev_err(hba->dev, "MCQ Runtime Operation Pointers not
>> configured\n");
>> + goto out_err;
>> + }
>> +
>> + hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>> + hba->nr_queues[HCTX_TYPE_READ] = 0;
>> + hba->nr_queues[HCTX_TYPE_POLL] = 1;
>> +
>> + for (i = 0; i < HCTX_MAX_TYPES; i++)
>> + host->nr_hw_queues += hba->nr_queues[i];
>> +
>> + host->can_queue = hba->nutrs;
>> + host->cmd_per_lun = hba->nutrs;
>> +
>> + /* One more reserved for dev_cmd_queue */
>> + hba->nr_hw_queues = host->nr_hw_queues + 1;
> Is it possible, since MCQ memory space is *added* to the UTR & UTMR lists,
> That we'll keep using the legacy doorbell for query commands?
> Wouldn't it will simplify the hw_queue bookkeeping
>
Umm, I didn't understand this suggestion. Please can you elaborate a bit.
When MCQ mode is selected the Config.QT is set to 1.
So how would we keep using the legacy doorbell for query commands?

>
>> -#define ufshcd_hex_dump(prefix_str, buf, len) do { \
>> - size_t __len = (len); \
>> - print_hex_dump(KERN_ERR, prefix_str, \
>> - __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE,\
>> - 16, 4, buf, __len, false); \
>> +#define ufshcd_hex_dump(prefix_str, buf, len) do { \
>> + size_t __len = (len); \
>> + \
>> + print_hex_dump(KERN_ERR, prefix_str, \
>> + __len > 4 ? DUMP_PREFIX_OFFSET : DUMP_PREFIX_NONE, \
>> + 16, 4, buf, __len, false); \
>> + \
>> } while (0)
> Should this be part of this patch?
>
No it shouldn't. Will remove this.
>> +#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
> Maybe add a comment above: "queue 0 is reserved for query commands" or something
> That is if the query commands don't use the legacy doorbell
>
>> +static inline bool ufshcd_is_hwq_full(struct ufs_hw_queue *q)
>> +{
>> + return (q->sq_hp_slot == ((q->sq_tp_slot + 1) %
>> + q->max_entries));
>> +}
> Isn't sq_tp_slot is already % q->max_entries ?
>
This function is unused in this patchset and I will remove it in the
next version.

>
> Thanks,
> Avri

2022-07-22 18:38:43

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/22/22 00:31, Avri Altman wrote:
>> +#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
> Maybe add a comment above: "queue 0 is reserved for query commands" or something
> That is if the query commands don't use the legacy doorbell

Is it essential to reserve a queue for device management commands?
Wouldn't it be better to have one additional queue for submitting I/O
commands rather than reserving a queue for device management commands?

Thanks,

Bart.

2022-07-22 19:38:41

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> >> +
> >> + /* Manually allocate MCQ resource */
> > Did you consider to force providing the MCQ configuration?
How about this one?

> > Is it possible, since MCQ memory space is *added* to the UTR & UTMR
> > lists, That we'll keep using the legacy doorbell for query commands?
> > Wouldn't it will simplify the hw_queue bookkeeping
> >
> Umm, I didn't understand this suggestion. Please can you elaborate a bit.
> When MCQ mode is selected the Config.QT is set to 1.
> So how would we keep using the legacy doorbell for query commands?
Sorry - my bad.
Thanks for clarifying.

Thanks,
Avri

2022-07-22 20:39:05

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/22/2022 12:37 PM, Avri Altman wrote:
>>>> +
>>>> + /* Manually allocate MCQ resource */
>>> Did you consider to force providing the MCQ configuration?
> How about this one?
>
Sorry missed this comment.
Do you mean to return error if MCQ configuration is undefined?

-asd

2022-07-22 20:39:49

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support


> On 7/22/2022 12:37 PM, Avri Altman wrote:
> >>>> +
> >>>> + /* Manually allocate MCQ resource */
> >>> Did you consider to force providing the MCQ configuration?
> > How about this one?
> >
> Sorry missed this comment.
> Do you mean to return error if MCQ configuration is undefined?
Yes. It's like an unset platform capability.

Thanks,
Avri
>
> -asd

2022-07-22 21:14:32

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/22/2022 1:22 PM, Avri Altman wrote:
>
>> On 7/22/2022 12:37 PM, Avri Altman wrote:
>>>>>> +
>>>>>> + /* Manually allocate MCQ resource */
>>>>> Did you consider to force providing the MCQ configuration?
>>> How about this one?
>>>
>> Sorry missed this comment.
>> Do you mean to return error if MCQ configuration is undefined?
> Yes. It's like an unset platform capability.
>
Thanks, let me consider this.

> Thanks,
> Avri
>>
>> -asd

-asd

2022-07-23 15:17:39

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> +#define MCQ_ROP_OFFSET_n(p, i) \
> + hba->mcq_rop[(p)].offset + hba->mcq_rop[(p)].stride * (i)
Can you please explain the 0x100 stride thing?
Theoretically, each rop set is 48Bytes long, or did I get it wrong?

Thanks,
Avri

2022-07-23 16:06:40

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> +
> + /* sqen|size|cqid */
> + ufsmcq_writel(hba, (1 << 31) | qsize | (i << 16),
> + MCQ_CFG_n(REG_SQATTR, i));
So there is a 1X1 SQ-CQ topology.
Isn't that should be configurable?

Thanks,
Avri

2022-07-23 20:43:41

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> -static void ufshcd_host_memory_configure(struct ufs_hba *hba)
> +static int ufshcd_host_memory_configure(struct ufs_hba *hba)
> {
> struct utp_transfer_req_desc *utrdlp;
> dma_addr_t cmd_desc_dma_addr;
> @@ -3721,6 +3764,9 @@ static void ufshcd_host_memory_configure(struct
> ufs_hba *hba)
> int cmd_desc_size;
> int i;
>
> + if (is_mcq_enabled(hba))
> + return ufshcd_mcq_memory_configure(hba);
> +
> utrdlp = hba->utrdl_base_addr;
>
> response_offset =
> @@ -3759,6 +3805,8 @@ static void ufshcd_host_memory_configure(struct
> ufs_hba *hba)
>
> ufshcd_init_lrb(hba, &hba->lrb[i], i);
If is_mcq_enabled, do you still call ufshcd_init_lrb?

Thanks,
Avri

> }

2022-07-23 21:28:26

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> static inline
> -void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
> +void ufshcd_send_command(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
> + struct ufs_hw_queue *hwq)
> {
> - struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
> unsigned long flags;
>
> lrbp->issue_time_stamp = ktime_get();
> lrbp->compl_time_stamp = ktime_set(0, 0);
> - ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
> + ufshcd_add_command_trace(hba, lrbp, UFS_CMD_SEND);
> ufshcd_clk_scaling_start_busy(hba);
> if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
> ufshcd_start_monitor(hba, lrbp);
>
> - spin_lock_irqsave(&hba->outstanding_lock, flags);
> - if (hba->vops && hba->vops->setup_xfer_req)
> - hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd);
> - __set_bit(task_tag, &hba->outstanding_reqs);
> - ufshcd_writel(hba, 1 << task_tag,
> REG_UTP_TRANSFER_REQ_DOOR_BELL);
> - spin_unlock_irqrestore(&hba->outstanding_lock, flags);
> + if (is_mcq_enabled(hba)) {
> + int utrd_size = sizeof(struct utp_transfer_req_desc);
Maybe we can map some designated ops, so all those if (is_mcq) can be avoided in the data-path.
Also maybe we can constify sizeof(struct utp_transfer_req_desc) which is used now few times.

Thanks,
Avri

2022-07-24 03:29:12

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/22 14:23, Avri Altman wrote:
>> static inline
>> -void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
>> +void ufshcd_send_command(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
>> + struct ufs_hw_queue *hwq)
>> {
>> - struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
>> unsigned long flags;
>>
>> lrbp->issue_time_stamp = ktime_get();
>> lrbp->compl_time_stamp = ktime_set(0, 0);
>> - ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
>> + ufshcd_add_command_trace(hba, lrbp, UFS_CMD_SEND);
>> ufshcd_clk_scaling_start_busy(hba);
>> if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
>> ufshcd_start_monitor(hba, lrbp);
>>
>> - spin_lock_irqsave(&hba->outstanding_lock, flags);
>> - if (hba->vops && hba->vops->setup_xfer_req)
>> - hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd);
>> - __set_bit(task_tag, &hba->outstanding_reqs);
>> - ufshcd_writel(hba, 1 << task_tag,
>> REG_UTP_TRANSFER_REQ_DOOR_BELL);
>> - spin_unlock_irqrestore(&hba->outstanding_lock, flags);
>> + if (is_mcq_enabled(hba)) {
>> + int utrd_size = sizeof(struct utp_transfer_req_desc);
>
> Maybe we can map some designated ops, so all those if (is_mcq) can be avoided in the data-path.
> Also maybe we can constify sizeof(struct utp_transfer_req_desc) which is used now few times.

Hi Avri,

Since conditional branches are significantly faster than indirect
function calls I'm fine with keeping the if (is_mcq) conditionals.

Thanks,

Bart.

2022-07-24 03:32:10

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/22 08:26, Avri Altman wrote:
>> +
>> + /* sqen|size|cqid */
>> + ufsmcq_writel(hba, (1 << 31) | qsize | (i << 16),
>> + MCQ_CFG_n(REG_SQATTR, i));
> So there is a 1X1 SQ-CQ topology.
> Isn't that should be configurable?

Hi Avri,

I'm in favor of starting with a 1:1 SQ:CQ mapping and only adding
support for other approaches if there is a very good reason.

Thanks,

Bart.

2022-07-24 04:09:21

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> @@ -2558,7 +2587,8 @@ void ufshcd_prepare_utp_scsi_cmd_upiu(struct
> ufshcd_lrb *lrbp, u8 upiu_flags)
> UPIU_TRANSACTION_COMMAND, upiu_flags,
> lrbp->lun, lrbp->task_tag);
> ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
> - UPIU_COMMAND_SET_TYPE_SCSI, 0, 0, 0);
> + UPIU_COMMAND_SET_TYPE_SCSI |
> + (lrbp->hw_queue_id << 4), 0, 0, 0);
This is fine, as long as we have 16 queues or less.
Otherwise, we need to fill the EXT_IID as well (only if bEXTIIDEn = 1).

Also, don't we need to do this for query commands as well?
Or at least add a comment that the queue id for query command is 0.

Thanks,
Avri

2022-07-24 04:39:01

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> +
> +/**
> + * @ucdl_base_addr: UFS Command Descriptor base address
> + * @sqe_base_addr: submission queue entry base address
> + * @sqe_shadow_addr: submission queue entry shadow address
When you are editing your commit log, could you please also say something about the shadow queues concept?
And why it is a good idea to maintain 2 sets of addresses, which basically points to the same place?

Thanks,
Avri

2022-07-24 07:31:42

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> +void ufshcd_mcq_config_mac(struct ufs_hba *hba)
> +{
> + u32 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
> +
> + val &= ~MCQ_CFG_MAC_MASK;
> + val |= hba->dev_info.bqueuedepth << MCQ_CFG_MAC_OFFSET;
> + ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
> +}
> +EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
Arbitration scheme is something that I would expect to be configurable.
Or at least, add a comment explicitly saying that you choose a round-robin arbitration scheme.

Thanks,
Avri

2022-07-24 22:21:47

by Bean Huo

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support


Hi Can/Asutosh

A few questions about MCQ configuration:


On Tue, 2022-07-19 at 00:01 -0700, Can Guo wrote:
> From: Asutosh Das <[email protected]>
>
> Adds MCQ support to UFS driver.
>
> Co-developed-by: Can Guo <[email protected]>
> Signed-off-by: Asutosh Das <[email protected]>
> Signed-off-by: Can Guo <[email protected]>
> ---
>
> +void ufshcd_mcq_config_mac(struct ufs_hba *hba)
> +{
> +       u32 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
> +
> +       val &= ~MCQ_CFG_MAC_MASK;
> +       val |= hba->dev_info.bqueuedepth << MCQ_CFG_MAC_OFFSET;
> +       ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);

Here you set MaxActiveCommand to dev_info.bqueuedepth (this limit comes
from UFS devices). I see in the qsize configuration that you want to
set the queue depth in each HW queue to be hba->nutrs (this limit comes
from UFSHCI), should not it be min(device limit, ufshci limit)?

> +}
> +EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
> +
>
...
> +
> +       for_each_hw_queue(hba, i) {
> +               hwq = &hba->uhq[i];
> +               hwq->id = i;
> +               qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD -
> 1;

qsize is hba->nutrs, 32*8-1 = 255 =256DW, per draft spec , should not
be 8DW in 4.0?

> +
> +               /* SQLBA */
> +               ufsmcq_writel(hba, lower_32_bits(hwq->sqe_dma_addr),
> +                             MCQ_CFG_n(REG_SQLBA, i));
> +               /* SQUBA */
> +               ufsmcq_writel(hba, upper_32_bits(hwq->sqe_dma_addr),
> +                             MCQ_CFG_n(REG_SQUBA, i));
> +               /* SQDAO */
> +               ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_SQD, i),
> +                             MCQ_CFG_n(REG_SQDAO, i));
>

...

>        }
> +
> +out:
> +       hba->mcq_base = res->base;
> +       return 0;
> +
> +out_err:
> +       ufshcd_mcq_release_resource(hba);
> +       return ret;
> +}
> +
> +int ufshcd_mcq_init(struct ufs_hba *hba)
> +{
> +       struct Scsi_Host *host = hba->host;
> +       struct ufs_hw_queue *hwq;
> +       int i, ret = 0;
> +
> +       if (!is_mcq_supported(hba))
> +               return 0;
> +
> +       ret = ufshcd_mcq_config_resource(hba);
> +       if (ret) {
> +               dev_err(hba->dev, "Failed to config MCQ resource\n");
> +               return ret;
> +       }
> +
> +       ret = ufshcd_vops_config_mcq_rop(hba);
> +       if (ret) {
> +               dev_err(hba->dev, "MCQ Runtime Operation Pointers not
> configured\n");
> +               goto out_err;
> +       }
> +
> +       hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();

4.0 supports maximum number of queues is 32. for cpus < 32, cpu to
queue will be 1x1, how about cpus > 32?

> +       hba->nr_queues[HCTX_TYPE_READ] = 0;
> +       hba->nr_queues[HCTX_TYPE_POLL] = 1;
> +
> +       for (i = 0; i < HCTX_MAX_TYPES; i++)
> +               host->nr_hw_queues += hba->nr_queues[i];
> +
> +       host->can_queue = hba->nutrs;

Also here, can_queue is inlined with ufshci limitation, not the UFS
device limit.

> +       host->cmd_per_lun = hba->nutrs;
> +
> +       /* One more reserved for dev_cmd_queue */
> +       hba->nr_hw_queues = host->nr_hw_queues + 1;
> +
> +       hba->uhq = devm_kmalloc(hba->dev,
...
>  
>         ufshcd_tune_unipro_params(hba);
> @@ -9641,6 +9775,10 @@ int ufshcd_init(struct ufs_hba *hba, void
> __iomem *mmio_base, unsigned int irq)
>                 goto out_disable;
>         }
>  
> +       err = ufshcd_mcq_init(hba);

The driver will force the customer to use MCQ, how about adding a
configuration option for the customer to choose (like eMMC CMDQ does)?

Kind regards,
Bean


2022-07-25 09:19:40

by Dan Carpenter

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Can,

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Can-Guo/UFS-Multi-Circular-Queue-MCQ/20220719-150436
base: https://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git for-next
config: arc-randconfig-m041-20220718 (https://download.01.org/0day-ci/archive/20220723/2022072319[email protected]/config)
compiler: arceb-elf-gcc (GCC) 12.1.0

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <[email protected]>
Reported-by: Dan Carpenter <[email protected]>

New smatch warnings:
drivers/ufs/core/ufshcd.c:2887 ufshcd_queuecommand() error: uninitialized symbol 'hwq'.
drivers/ufs/core/ufs-mcq.c:315 ufshcd_mcq_config_resource() warn: passing zero to 'PTR_ERR'
drivers/ufs/core/ufs-mcq.c:334 ufshcd_mcq_config_resource() error: potentially dereferencing uninitialized 'res_mem'.
drivers/ufs/core/ufs-mcq.c:330 ufshcd_mcq_config_resource() warn: missing error code 'ret'

Old smatch warnings:
drivers/ufs/core/ufshcd.c:5360 ufshcd_uic_cmd_compl() error: we previously assumed 'hba->active_uic_cmd' could be null (see line 5348)

vim +/hwq +2887 drivers/ufs/core/ufshcd.c

7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2792 static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2793 {
4728ab4a8e6490 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-07-21 2794 struct ufs_hba *hba = shost_priv(host);
3f2c1002e0fcb6 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-08-09 2795 int tag = scsi_cmd_to_rq(cmd)->tag;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2796 struct ufshcd_lrb *lrbp;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2797 int err = 0;
2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 2798 struct ufs_hw_queue *hwq;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2799
eaab9b57305496 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2800 WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag);
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2801
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2802 /*
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2803 * Allows the UFS error handler to wait for prior ufshcd_queuecommand()
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2804 * calls.
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2805 */
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2806 rcu_read_lock();
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2807
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2808 switch (hba->ufshcd_state) {
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2809 case UFSHCD_STATE_OPERATIONAL:
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2810 break;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2811 case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL:
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2812 /*
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2813 * SCSI error handler can call ->queuecommand() while UFS error
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2814 * handler is in progress. Error interrupts could change the
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2815 * state from UFSHCD_STATE_RESET to
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2816 * UFSHCD_STATE_EH_SCHEDULED_NON_FATAL. Prevent requests
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2817 * being issued in that case.
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2818 */
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2819 if (ufshcd_eh_in_progress(hba)) {
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2820 err = SCSI_MLQUEUE_HOST_BUSY;
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2821 goto out;
d489f18ad1fc33 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-10-08 2822 }
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2823 break;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2824 case UFSHCD_STATE_EH_SCHEDULED_FATAL:
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2825 /*
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2826 * pm_runtime_get_sync() is used at error handling preparation
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2827 * stage. If a scsi cmd, e.g. the SSU cmd, is sent from hba's
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2828 * PM ops, it can never be finished if we let SCSI layer keep
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2829 * retrying it, which gets err handler stuck forever. Neither
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2830 * can we let the scsi cmd pass through, because UFS is in bad
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2831 * state, the scsi cmd may eventually time out, which will get
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2832 * err handler blocked for too long. So, just fail the scsi cmd
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2833 * sent from PM ops, err handler can recover PM error anyways.
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2834 */
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2835 if (hba->pm_op_in_progress) {
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2836 hba->force_reset = true;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2837 set_host_byte(cmd, DID_BAD_TARGET);
35c3730a965722 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-10-07 2838 scsi_done(cmd);
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2839 goto out;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2840 }
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2841 fallthrough;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2842 case UFSHCD_STATE_RESET:
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2843 err = SCSI_MLQUEUE_HOST_BUSY;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2844 goto out;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2845 case UFSHCD_STATE_ERROR:
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2846 set_host_byte(cmd, DID_ERROR);
35c3730a965722 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-10-07 2847 scsi_done(cmd);
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2848 goto out;
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2849 }
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2850
7fabb77b3aa016 drivers/scsi/ufs/ufshcd.c Gilad Broner 2017-02-03 2851 hba->req_abort_count = 0;
7fabb77b3aa016 drivers/scsi/ufs/ufshcd.c Gilad Broner 2017-02-03 2852
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2853 err = ufshcd_hold(hba, true);
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2854 if (err) {
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2855 err = SCSI_MLQUEUE_HOST_BUSY;
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2856 goto out;
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2857 }
2dec9475a4028b drivers/scsi/ufs/ufshcd.c Can Guo 2020-08-09 2858 WARN_ON(ufshcd_is_clkgating_allowed(hba) &&
2dec9475a4028b drivers/scsi/ufs/ufshcd.c Can Guo 2020-08-09 2859 (hba->clk_gating.state != CLKS_ON));
1ab27c9cf8b63d drivers/scsi/ufs/ufshcd.c Sahitya Tummala 2014-09-25 2860
2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 2861 if (is_mcq_enabled(hba))
2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 2862 lrbp = ufshcd_mcq_find_lrb(hba, scsi_cmd_to_rq(cmd), &hwq);
2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 2863 else
a45f937110fa6b drivers/scsi/ufs/ufshcd.c Can Guo 2021-05-24 2864 lrbp = &hba->lrb[tag];

hwq not initialized on else path

2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 2865
5a0b0cb9bee767 drivers/scsi/ufs/ufshcd.c Sujit Reddy Thumma 2013-07-30 2866 WARN_ON(lrbp->cmd);
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2867 lrbp->cmd = cmd;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2868 lrbp->task_tag = tag;
0ce147d48a3e33 drivers/scsi/ufs/ufshcd.c Subhash Jadavani 2014-09-25 2869 lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun);
51d1628fc45727 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2022-04-19 2870 lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba);
df043c745ea149 drivers/scsi/ufs/ufshcd.c Satya Tangirala 2020-07-06 2871
3f2c1002e0fcb6 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-08-09 2872 ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp);
df043c745ea149 drivers/scsi/ufs/ufshcd.c Satya Tangirala 2020-07-06 2873
e0b299e36004f5 drivers/scsi/ufs/ufshcd.c Gilad Broner 2017-02-03 2874 lrbp->req_abort_skip = false;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2875
09d9e4d0418766 drivers/scsi/ufs/ufshcd.c Avri Altman 2021-10-30 2876 ufshpb_prep(hba, lrbp);
2fff76f87542fa drivers/scsi/ufs/ufshcd.c Daejun Park 2021-07-12 2877
300bb13f5c7b1d drivers/scsi/ufs/ufshcd.c Joao Pinto 2016-05-11 2878 ufshcd_comp_scsi_upiu(hba, lrbp);
300bb13f5c7b1d drivers/scsi/ufs/ufshcd.c Joao Pinto 2016-05-11 2879
75b1cc4ad63afa drivers/scsi/ufs/ufshcd.c Kiwoong Kim 2016-11-22 2880 err = ufshcd_map_sg(hba, lrbp);
5a0b0cb9bee767 drivers/scsi/ufs/ufshcd.c Sujit Reddy Thumma 2013-07-30 2881 if (err) {
5a0b0cb9bee767 drivers/scsi/ufs/ufshcd.c Sujit Reddy Thumma 2013-07-30 2882 lrbp->cmd = NULL;
17c7d35f141ef6 drivers/scsi/ufs/ufshcd.c Can Guo 2019-12-05 2883 ufshcd_release(hba);
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2884 goto out;
5a0b0cb9bee767 drivers/scsi/ufs/ufshcd.c Sujit Reddy Thumma 2013-07-30 2885 }
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2886
2b7356bcd24efd drivers/ufs/core/ufshcd.c Asutosh Das 2022-07-19 @2887 ufshcd_send_command(hba, lrbp, hwq);
^^^
Sent


7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2888 out:
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2889 rcu_read_unlock();
5675c381ea5136 drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-12-03 2890
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2891 if (ufs_trigger_eh()) {
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2892 unsigned long flags;
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2893
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2894 spin_lock_irqsave(hba->host->host_lock, flags);
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2895 ufshcd_schedule_eh_work(hba);
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2896 spin_unlock_irqrestore(hba->host->host_lock, flags);
88b099006d83b0 drivers/scsi/ufs/ufshcd.c Adrian Hunter 2021-09-17 2897 }
c11a1ae9b8f65e drivers/scsi/ufs/ufshcd.c Bart Van Assche 2021-07-21 2898
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2899 return err;
7a3e97b0dc4bba drivers/scsi/ufs/ufshcd.c Santosh Yaraganavi 2012-02-29 2900 }

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-07-25 16:57:15

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/2022 1:22 PM, Avri Altman wrote:
>> -static void ufshcd_host_memory_configure(struct ufs_hba *hba)
>> +static int ufshcd_host_memory_configure(struct ufs_hba *hba)
>> {
>> struct utp_transfer_req_desc *utrdlp;
>> dma_addr_t cmd_desc_dma_addr;
>> @@ -3721,6 +3764,9 @@ static void ufshcd_host_memory_configure(struct
>> ufs_hba *hba)
>> int cmd_desc_size;
>> int i;
>>
>> + if (is_mcq_enabled(hba))
>> + return ufshcd_mcq_memory_configure(hba);
>> +
>> utrdlp = hba->utrdl_base_addr;
>>
>> response_offset =
>> @@ -3759,6 +3805,8 @@ static void ufshcd_host_memory_configure(struct
>> ufs_hba *hba)
>>
>> ufshcd_init_lrb(hba, &hba->lrb[i], i);
> If is_mcq_enabled, do you still call ufshcd_init_lrb?
>
> Thanks,
> Avri
>
>> }
Another function ufshcd_mcq_init_lrb() is called.

-asd

2022-07-25 17:24:38

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/2022 9:07 PM, Avri Altman wrote:
>> @@ -2558,7 +2587,8 @@ void ufshcd_prepare_utp_scsi_cmd_upiu(struct
>> ufshcd_lrb *lrbp, u8 upiu_flags)
>> UPIU_TRANSACTION_COMMAND, upiu_flags,
>> lrbp->lun, lrbp->task_tag);
>> ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
>> - UPIU_COMMAND_SET_TYPE_SCSI, 0, 0, 0);
>> + UPIU_COMMAND_SET_TYPE_SCSI |
>> + (lrbp->hw_queue_id << 4), 0, 0, 0);
> This is fine, as long as we have 16 queues or less.
> Otherwise, we need to fill the EXT_IID as well (only if bEXTIIDEn = 1).
>
Yes, correct. Will add the code for EXT_IID in the next version.

> Also, don't we need to do this for query commands as well?
> Or at least add a comment that the queue id for query command is 0.
>
Query commands would use the reserved queue whose id = 0.
I agree a comment should be added detailing this.
Will add in the next version.

> Thanks,
> Avri

2022-07-25 17:41:36

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/2022 2:23 PM, Avri Altman wrote:
>> static inline
>> -void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
>> +void ufshcd_send_command(struct ufs_hba *hba, struct ufshcd_lrb *lrbp,
>> + struct ufs_hw_queue *hwq)
>> {
>> - struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
>> unsigned long flags;
>>
>> lrbp->issue_time_stamp = ktime_get();
>> lrbp->compl_time_stamp = ktime_set(0, 0);
>> - ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
>> + ufshcd_add_command_trace(hba, lrbp, UFS_CMD_SEND);
>> ufshcd_clk_scaling_start_busy(hba);
>> if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
>> ufshcd_start_monitor(hba, lrbp);
>>
>> - spin_lock_irqsave(&hba->outstanding_lock, flags);
>> - if (hba->vops && hba->vops->setup_xfer_req)
>> - hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd);
>> - __set_bit(task_tag, &hba->outstanding_reqs);
>> - ufshcd_writel(hba, 1 << task_tag,
>> REG_UTP_TRANSFER_REQ_DOOR_BELL);
>> - spin_unlock_irqrestore(&hba->outstanding_lock, flags);
>> + if (is_mcq_enabled(hba)) {
>> + int utrd_size = sizeof(struct utp_transfer_req_desc);
> Maybe we can map some designated ops, so all those if (is_mcq) can be avoided in the data-path.
Umm, I couldn't find any ops in scsi_host_template {...}. Do you have
any further insight into how this check can be avoided?
> Also maybe we can constify sizeof(struct utp_transfer_req_desc) which is used now few times.
>
Ok, agree to make sizeof(struct utp_transfer_req_desc) a constant in the
next version.

-asd

2022-07-25 17:46:54

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/23/2022 8:26 AM, Avri Altman wrote:
>> +
>> + /* sqen|size|cqid */
>> + ufsmcq_writel(hba, (1 << 31) | qsize | (i << 16),
>> + MCQ_CFG_n(REG_SQATTR, i));
> So there is a 1X1 SQ-CQ topology.
> Isn't that should be configurable?
>
> Thanks,
> Avri
The popular configuration appeared to be 1:1.
Other configurations may be added as needed later, perhaps.

-asd

2022-07-25 17:52:01

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/24/2022 2:54 PM, Bean Huo wrote:
>
> Hi Can/Asutosh
>
> A few questions about MCQ configuration:
>
>

Hello Bean,
Thanks for the review.

> On Tue, 2022-07-19 at 00:01 -0700, Can Guo wrote:
>> From: Asutosh Das <[email protected]>
>>
>> Adds MCQ support to UFS driver.
>>
>> Co-developed-by: Can Guo <[email protected]>
>> Signed-off-by: Asutosh Das <[email protected]>
>> Signed-off-by: Can Guo <[email protected]>
>> ---
>>
>> +void ufshcd_mcq_config_mac(struct ufs_hba *hba)
>> +{
>> +       u32 val = ufshcd_readl(hba, REG_UFS_MCQ_CFG);
>> +
>> +       val &= ~MCQ_CFG_MAC_MASK;
>> +       val |= hba->dev_info.bqueuedepth << MCQ_CFG_MAC_OFFSET;
>> +       ufshcd_writel(hba, val, REG_UFS_MCQ_CFG);
>
> Here you set MaxActiveCommand to dev_info.bqueuedepth (this limit comes
> from UFS devices). I see in the qsize configuration that you want to
> set the queue depth in each HW queue to be hba->nutrs (this limit comes
> from UFSHCI), should not it be min(device limit, ufshci limit)?
>
Yes, looks like it should be. Let me relook this logic.
>> +}
>> +EXPORT_SYMBOL_GPL(ufshcd_mcq_config_mac);
>> +
>>
> ...
>> +
>> +       for_each_hw_queue(hba, i) {
>> +               hwq = &hba->uhq[i];
>> +               hwq->id = i;
>> +               qsize = hwq->max_entries * MCQ_ENTRY_SIZE_IN_DWORD -
>> 1;
>
> qsize is hba->nutrs, 32*8-1 = 255 =256DW, per draft spec , should not
> be 8DW in 4.0?
>
>> +
>> +               /* SQLBA */
>> +               ufsmcq_writel(hba, lower_32_bits(hwq->sqe_dma_addr),
>> +                             MCQ_CFG_n(REG_SQLBA, i));
>> +               /* SQUBA */
>> +               ufsmcq_writel(hba, upper_32_bits(hwq->sqe_dma_addr),
>> +                             MCQ_CFG_n(REG_SQUBA, i));
>> +               /* SQDAO */
>> +               ufsmcq_writel(hba, MCQ_ROP_OFFSET_n(ROP_SQD, i),
>> +                             MCQ_CFG_n(REG_SQDAO, i));
>>
>
> ...
>
>>        }
>> +
>> +out:
>> +       hba->mcq_base = res->base;
>> +       return 0;
>> +
>> +out_err:
>> +       ufshcd_mcq_release_resource(hba);
>> +       return ret;
>> +}
>> +
>> +int ufshcd_mcq_init(struct ufs_hba *hba)
>> +{
>> +       struct Scsi_Host *host = hba->host;
>> +       struct ufs_hw_queue *hwq;
>> +       int i, ret = 0;
>> +
>> +       if (!is_mcq_supported(hba))
>> +               return 0;
>> +
>> +       ret = ufshcd_mcq_config_resource(hba);
>> +       if (ret) {
>> +               dev_err(hba->dev, "Failed to config MCQ resource\n");
>> +               return ret;
>> +       }
>> +
>> +       ret = ufshcd_vops_config_mcq_rop(hba);
>> +       if (ret) {
>> +               dev_err(hba->dev, "MCQ Runtime Operation Pointers not
>> configured\n");
>> +               goto out_err;
>> +       }
>> +
>> +       hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>
> 4.0 supports maximum number of queues is 32. for cpus < 32, cpu to
> queue will be 1x1, how about cpus > 32?
>
Good point. Will check and fix it.
>> +       hba->nr_queues[HCTX_TYPE_READ] = 0;
>> +       hba->nr_queues[HCTX_TYPE_POLL] = 1;
>> +
>> +       for (i = 0; i < HCTX_MAX_TYPES; i++)
>> +               host->nr_hw_queues += hba->nr_queues[i];
>> +
>> +       host->can_queue = hba->nutrs;
>
> Also here, can_queue is inlined with ufshci limitation, not the UFS
> device limit.
>
Yes, will take a look.
>> +       host->cmd_per_lun = hba->nutrs;
>> +
>> +       /* One more reserved for dev_cmd_queue */
>> +       hba->nr_hw_queues = host->nr_hw_queues + 1;
>> +
>> +       hba->uhq = devm_kmalloc(hba->dev,
> ...
>>
>>         ufshcd_tune_unipro_params(hba);
>> @@ -9641,6 +9775,10 @@ int ufshcd_init(struct ufs_hba *hba, void
>> __iomem *mmio_base, unsigned int irq)
>>                 goto out_disable;
>>         }
>>
>> +       err = ufshcd_mcq_init(hba);
>
> The driver will force the customer to use MCQ, how about adding a
> configuration option for the customer to choose (like eMMC CMDQ does)?
>
Let me check what eMMC does and see if that can be adopted here.

> Kind regards,
> Bean
>
>

-asd

2022-07-25 20:25:10

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support


> On 7/23/2022 1:22 PM, Avri Altman wrote:
> >> -static void ufshcd_host_memory_configure(struct ufs_hba *hba)
> >> +static int ufshcd_host_memory_configure(struct ufs_hba *hba)
> >> {
> >> struct utp_transfer_req_desc *utrdlp;
> >> dma_addr_t cmd_desc_dma_addr; @@ -3721,6 +3764,9 @@ static
> >> void ufshcd_host_memory_configure(struct
> >> ufs_hba *hba)
> >> int cmd_desc_size;
> >> int i;
> >>
> >> + if (is_mcq_enabled(hba))
> >> + return ufshcd_mcq_memory_configure(hba);
> >> +
> >> utrdlp = hba->utrdl_base_addr;
> >>
> >> response_offset =
> >> @@ -3759,6 +3805,8 @@ static void
> ufshcd_host_memory_configure(struct
> >> ufs_hba *hba)
> >>
> >> ufshcd_init_lrb(hba, &hba->lrb[i], i);
> > If is_mcq_enabled, do you still call ufshcd_init_lrb?
> >
> > Thanks,
> > Avri
> >
> >> }
> Another function ufshcd_mcq_init_lrb() is called.
But you are still calling ufshcd_init_lrb anyway.

Thanks,
Avri

>
> -asd

2022-07-25 20:36:39

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/25/2022 12:50 PM, Avri Altman wrote:
>
>> On 7/23/2022 1:22 PM, Avri Altman wrote:
>>>> -static void ufshcd_host_memory_configure(struct ufs_hba *hba)
>>>> +static int ufshcd_host_memory_configure(struct ufs_hba *hba)
>>>> {
>>>> struct utp_transfer_req_desc *utrdlp;
>>>> dma_addr_t cmd_desc_dma_addr; @@ -3721,6 +3764,9 @@ static
>>>> void ufshcd_host_memory_configure(struct
>>>> ufs_hba *hba)
>>>> int cmd_desc_size;
>>>> int i;
>>>>
>>>> + if (is_mcq_enabled(hba))
>>>> + return ufshcd_mcq_memory_configure(hba);
>>>> +
>>>> utrdlp = hba->utrdl_base_addr;
>>>>
>>>> response_offset =
>>>> @@ -3759,6 +3805,8 @@ static void
>> ufshcd_host_memory_configure(struct
>>>> ufs_hba *hba)
>>>>
>>>> ufshcd_init_lrb(hba, &hba->lrb[i], i);
>>> If is_mcq_enabled, do you still call ufshcd_init_lrb?
>>>
>>> Thanks,
>>> Avri
>>>
>>>> }
>> Another function ufshcd_mcq_init_lrb() is called.
> But you are still calling ufshcd_init_lrb anyway.
>
I checked the patch, but couldn't find where ufshcd_init_lrb() is being
invoked if mcq is enabled.
Please can you point it to me?

In ufshcd_host_memory_configure(), it goes as:
if (is_mcq_enabled(hba))
return ufshcd_mcq_memory_configure(hba);

And ufshcd_init_lrb() is only invoked from ufshcd_host_memory_configure().

Am I missing something?

> Thanks,
> Avri
>
>>

-asd

2022-07-26 03:21:56

by Can Guo

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Avri,

On 7/23/2022 10:59 PM, Avri Altman wrote:
>> +#define MCQ_ROP_OFFSET_n(p, i) \
>> + hba->mcq_rop[(p)].offset + hba->mcq_rop[(p)].stride * (i)
> Can you please explain the 0x100 stride thing?
> Theoretically, each rop set is 48Bytes long, or did I get it wrong?

In the draft, there are 4 sets of MCQ Operation & Runtime Registers,
i.e. SQD_n, SQIS_n, CQD_n and CQIS_n.

They may be interleaved or segregated, depends on realistic HW designs.

For example, either #1 or #2 can be the case, and QCOM uses case #1, in
which 'stride' means the size (in bytes)

that one register has to hop over to reach the same register on next
round (for example, addr of SQHP_n == (addr of SQHP_0) + (stride * n)).

To allow flexibility, SoC vendors, depends on realistic HW designs, (by
using vops_config_mcq_rop) can manipulate

the 'offset' and 'stride' of each MCQ Operation & Runtime Register such
that SQDAO_n, SQISAO_n, CQDAO_n and

CQISAO_n are programmed with wanted offsets/values.

#1 -

SQHP_0
SQTP_0
SQRTC_0
SQCTI_0
SQRTS_0
SQIS_0
SQIE_0
CQHP_0
CQTP_0
CQIS_0
CQIE_0
CQIACR_0
SQHP_1
SQTP_1
SQRTC_1
SQCTI_1
SQRTS_1
SQIS_1
SQIE_1
CQHP_1
CQTP_1
CQIS_1
CQIE_1
CQIACR_1
...
SQHP_n
SQTP_n
SQRTC_n
SQCTI_n
SQRTS_n
SQIS_n
SQIE_n
CQHP_n
CQTP_n
CQIS_n
CQIE_n
CQIACR_n


#2 -

SQHP_0
SQTP_0
SQRTC_0
SQCTI_0
SQRTS_0
SQHP_1
SQTP_1
SQRTC_1
SQCTI_1
SQRTS_1
...
SQHP_n
SQTP_n
SQRTC_n
SQCTI_n
SQRTS_n


SQIS_0
SQIE_0
SQIS_1
SQIE_1
...
SQIS_n
SQIE_n


CQHP_0
CQTP_0
CQHP_1
CQTP_1
...
CQHP_n
CQTP_n


CQIS_0
CQIE_0
CQIACR_0
CQIS_1
CQIE_1
CQIACR_1
...
CQIS_n
CQIE_n
CQIACR_n


Thanks,

Can Guo.

> Thanks,
> Avri

2022-07-26 06:41:57

by Can Guo

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Avri,

On 7/24/2022 12:07 PM, Avri Altman wrote:
>> @@ -2558,7 +2587,8 @@ void ufshcd_prepare_utp_scsi_cmd_upiu(struct
>> ufshcd_lrb *lrbp, u8 upiu_flags)
>> UPIU_TRANSACTION_COMMAND, upiu_flags,
>> lrbp->lun, lrbp->task_tag);
>> ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
>> - UPIU_COMMAND_SET_TYPE_SCSI, 0, 0, 0);
>> + UPIU_COMMAND_SET_TYPE_SCSI |
>> + (lrbp->hw_queue_id << 4), 0, 0, 0);
> This is fine, as long as we have 16 queues or less.
> Otherwise, we need to fill the EXT_IID as well (only if bEXTIIDEn = 1).
>
> Also, don't we need to do this for query commands as well?
> Or at least add a comment that the queue id for query command is 0.

As per UFS4.0 JEDEC draft, EXT_IID or IID is not required in QUERY
REQUEST/RESPONSE UPIU,

unless my doc is out dated, please let me know.


Thanks,

Can Guo.

>
> Thanks,
> Avri

2022-07-26 06:57:15

by Can Guo

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Bart,

On 7/23/2022 1:58 AM, Bart Van Assche wrote:
> On 7/22/22 00:31, Avri Altman wrote:
>>> +#define UFSHCD_MCQ_IO_QUEUE_OFFSET 1
>> Maybe add a comment above: "queue 0 is reserved for query commands"
>> or something
>> That is if the query commands don't use the  legacy doorbell
>
> Is it essential to reserve a queue for device management commands?
> Wouldn't it be better to have one additional queue for submitting I/O
> commands rather than reserving a queue for device management commands?


Since this is just RFC change, we are trying to make the whole thing
work with minimal efforts.

So we found that having a reserved queue (with only one active command)
for device management

requires much less changes in ufshcd.c, because current device
management commands anyways come

one by one (we have a mutex lock dev_cmd.lock held in
exec_dev_command()) and it is easy to handle/assign

the task tag for device management command by just reading sq_tp_slot.
If you think this needs to be improved,

can you please elaborate your idea? Thanks.


Regards,

Can Guo.

>
> Thanks,
>
> Bart.
>

2022-07-26 07:00:28

by Can Guo

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Avri,

On 7/24/2022 12:32 PM, Avri Altman wrote:
>> +
>> +/**
>> + * @ucdl_base_addr: UFS Command Descriptor base address
>> + * @sqe_base_addr: submission queue entry base address
>> + * @sqe_shadow_addr: submission queue entry shadow address
> When you are editing your commit log, could you please also say something about the shadow queues concept?

Sure, we will add comments in next version.

> And why it is a good idea to maintain 2 sets of addresses, which basically points to the same place?

When block layer chooses one task tag for one command, that tag will be
used to link these pre-allocated data structs -

ucdl[tag]<->lrpb[tag]<->utrd[tag], and the tag chosen by block layer is
random (it does not increase from 0 to n and goes

back to 0 in a circular way). But, in MCQ mode, when we submit the
command to UFSHCI, we need to make sure the SQTP

get increased one slot by one slot (we cannot skip slots). Hence by
keeping shadow utrds (or shadow SQEs), the data struct

linkage ucdl[tag]<->lrpb[tag]<->shadow_sqe[tag] remains same, and we
copy the shadow sqe to the sqe[sq_tp_slot] only

when we finally decide the very sq_tp_slot used to submit this command
in SQTP.

The benefit is that we can 100% leverage the existing initialization
logic of lrbp in ufshcd_queuecommand path without changing a line.

Otherwise, considerable changes would be required to implement the idea
of dynamical SQE assignment (to lrbp).


Thanks,

Can Guo.

>
> Thanks,
> Avri

2022-07-26 09:49:08

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

> Hi Avri,
>
> On 7/24/2022 12:07 PM, Avri Altman wrote:
> >> @@ -2558,7 +2587,8 @@ void ufshcd_prepare_utp_scsi_cmd_upiu(struct
> >> ufshcd_lrb *lrbp, u8 upiu_flags)
> >> UPIU_TRANSACTION_COMMAND, upiu_flags,
> >> lrbp->lun, lrbp->task_tag);
> >> ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
> >> - UPIU_COMMAND_SET_TYPE_SCSI, 0, 0, 0);
> >> + UPIU_COMMAND_SET_TYPE_SCSI |
> >> + (lrbp->hw_queue_id << 4), 0, 0, 0);
> > This is fine, as long as we have 16 queues or less.
> > Otherwise, we need to fill the EXT_IID as well (only if bEXTIIDEn = 1).
> >
> > Also, don't we need to do this for query commands as well?
> > Or at least add a comment that the queue id for query command is 0.
>
> As per UFS4.0 JEDEC draft, EXT_IID or IID is not required in QUERY
> REQUEST/RESPONSE UPIU,
Correct.

Thanks,
Avri

>
> unless my doc is out dated, please let me know.
>
>
> Thanks,
>
> Can Guo.
>
> >
> > Thanks,
> > Avri

2022-07-26 23:04:56

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/25/22 09:35, Asutosh Das (asd) wrote:
> On 7/23/2022 2:23 PM, Avri Altman wrote:
>> Also maybe we can constify sizeof(struct utp_transfer_req_desc) which
>> is used now few times.
>
> Ok, agree to make sizeof(struct utp_transfer_req_desc) a constant in the
> next version.

Please don't. I'm concerned that introducing a symbolic name for that
sizeof() expression will make code harder to read instead of easier.

Thanks,

Bart.

2022-07-28 19:56:08

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/19/22 00:01, Can Guo wrote:
> static int ufshcd_map_queues(struct Scsi_Host *shost)
> {
> - int i, ret;
> + int i, queue_offset = 0, ret;
> + struct ufs_hba *hba = shost_priv(shost);
>
> for (i = 0; i < shost->nr_maps; i++) {
> struct blk_mq_queue_map *map = &shost->tag_set.map[i];
>
> - switch (i) {
> - case HCTX_TYPE_DEFAULT:
> - case HCTX_TYPE_POLL:
> - map->nr_queues = 1;
> - break;
> - case HCTX_TYPE_READ:
> - map->nr_queues = 0;
> + map->nr_queues = hba->nr_queues[i];
> + if (!map->nr_queues)
> continue;
> - default:
> - WARN_ON_ONCE(true);
> - }
> - map->queue_offset = 0;
> +
> + map->queue_offset = queue_offset;
> + if (i == HCTX_TYPE_POLL && !is_mcq_enabled(hba))
> + map->queue_offset = 0;
> +
> ret = blk_mq_map_queues(map);
> - WARN_ON_ONCE(ret);
> +
> + if (ret)
> + return ret;
> +
> + queue_offset += map->nr_queues;
> }

It is not clear to me why the WARN_ON_ONCE(ret) statement has been
changed into "if (ret) return ret;"?

Thanks,

Bart.

2022-07-28 20:02:30

by John Garry

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 19/07/2022 08:01, Can Guo wrote:
> +
> + hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
> + hba->nr_queues[HCTX_TYPE_READ] = 0;
> + hba->nr_queues[HCTX_TYPE_POLL] = 1;
> +
> + for (i = 0; i < HCTX_MAX_TYPES; i++)
> + host->nr_hw_queues += hba->nr_queues[i];
> +
> + host->can_queue = hba->nutrs;
> + host->cmd_per_lun = hba->nutrs;
> +
> + /* One more reserved for dev_cmd_queue */
> + hba->nr_hw_queues = host->nr_hw_queues + 1;
> +

So this would mean that the host can accept .can_queue * .nr_hw_queues
numbers of requests simultaneously - is that true?

thanks,
John

2022-07-28 20:07:48

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hello John,

On 7/28/2022 12:10 PM, John Garry wrote:
> On 19/07/2022 08:01, Can Guo wrote:
>> +
>> +    hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>> +    hba->nr_queues[HCTX_TYPE_READ] = 0;
>> +    hba->nr_queues[HCTX_TYPE_POLL] = 1;
>> +
>> +    for (i = 0; i < HCTX_MAX_TYPES; i++)
>> +        host->nr_hw_queues += hba->nr_queues[i];
>> +
>> +    host->can_queue = hba->nutrs;
>> +    host->cmd_per_lun = hba->nutrs;
>> +
>> +    /* One more reserved for dev_cmd_queue */
>> +    hba->nr_hw_queues = host->nr_hw_queues + 1;
>> +
>
> So this would mean that the host can accept .can_queue * .nr_hw_queues
> numbers of requests simultaneously - is that true?
>
That would mean that .can_queue * .nr_hw_queues numbers of request may
be queued to the host.
Please can you elaborate if you see an issue.

> thanks,
> John

-asd

2022-07-28 20:13:06

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/28/2022 12:38 PM, Bart Van Assche wrote:
> On 7/19/22 00:01, Can Guo wrote:
>>   static int ufshcd_map_queues(struct Scsi_Host *shost)
>>   {
>> -    int i, ret;
>> +    int i, queue_offset = 0, ret;
>> +    struct ufs_hba *hba = shost_priv(shost);
>>       for (i = 0; i < shost->nr_maps; i++) {
>>           struct blk_mq_queue_map *map = &shost->tag_set.map[i];
>> -        switch (i) {
>> -        case HCTX_TYPE_DEFAULT:
>> -        case HCTX_TYPE_POLL:
>> -            map->nr_queues = 1;
>> -            break;
>> -        case HCTX_TYPE_READ:
>> -            map->nr_queues = 0;
>> +        map->nr_queues = hba->nr_queues[i];
>> +        if (!map->nr_queues)
>>               continue;
>> -        default:
>> -            WARN_ON_ONCE(true);
>> -        }
>> -        map->queue_offset = 0;
>> +
>> +        map->queue_offset = queue_offset;
>> +        if (i == HCTX_TYPE_POLL && !is_mcq_enabled(hba))
>> +            map->queue_offset = 0;
>> +
>>           ret = blk_mq_map_queues(map);
>> -        WARN_ON_ONCE(ret);
>> +
>> +        if (ret)
>> +            return ret;
>> +
>> +        queue_offset += map->nr_queues;
>>       }
>
> It is not clear to me why the WARN_ON_ONCE(ret) statement has been
> changed into "if (ret) return ret;"?
>
Looks like blk_mq_map_queues() only returns 0.
Will remove the if (ret) return ret statement in the next version.

> Thanks,
>
> Bart.

2022-07-28 20:34:36

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/28/22 12:15, Asutosh Das (asd) wrote:
> Hello John,
>
> On 7/28/2022 12:10 PM, John Garry wrote:
>> On 19/07/2022 08:01, Can Guo wrote:
>>> +
>>> +    hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>>> +    hba->nr_queues[HCTX_TYPE_READ] = 0;
>>> +    hba->nr_queues[HCTX_TYPE_POLL] = 1;
>>> +
>>> +    for (i = 0; i < HCTX_MAX_TYPES; i++)
>>> +        host->nr_hw_queues += hba->nr_queues[i];
>>> +
>>> +    host->can_queue = hba->nutrs;
>>> +    host->cmd_per_lun = hba->nutrs;
>>> +
>>> +    /* One more reserved for dev_cmd_queue */
>>> +    hba->nr_hw_queues = host->nr_hw_queues + 1;
>>> +
>>
>> So this would mean that the host can accept .can_queue * .nr_hw_queues
>> numbers of requests simultaneously - is that true?
>
> That would mean that .can_queue * .nr_hw_queues numbers of request may
> be queued to the host.
> Please can you elaborate if you see an issue.

Hi Asutosh,

The `host_tagset` flag has been introduced by John and Hannes some time
ago. See also commit bdb01301f3ea ("scsi: Add host and host template
flag 'host_tagset'"). This flag supports sharing tags across hardware
queues and could be used to support UFSHCI 4.0 controllers that do not
support the EXT_IID feature.

In order not to complicate the implementation further, I propose to fall
back to the UFSHCI 3.0 compatibility mode for UFSHCI 4.0 controllers
that do not support the EXT_IID feature.

To answer John's question: the maximum number of outstanding commands is
16 * hba->nutrs if EXT_IID is supported (EXT_IID is a four bits field).
If the hardware queue index is encoded in the EXT_IID field, hba->nutrs
is the number of commands per hardware queue.

Thanks,

Bart.

2022-07-28 21:23:26

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/28/2022 1:29 PM, Bart Van Assche wrote:
> On 7/28/22 12:15, Asutosh Das (asd) wrote:
>> Hello John,
>>
>> On 7/28/2022 12:10 PM, John Garry wrote:
>>> On 19/07/2022 08:01, Can Guo wrote:
>>>> +
>>>> +    hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>>>> +    hba->nr_queues[HCTX_TYPE_READ] = 0;
>>>> +    hba->nr_queues[HCTX_TYPE_POLL] = 1;
>>>> +
>>>> +    for (i = 0; i < HCTX_MAX_TYPES; i++)
>>>> +        host->nr_hw_queues += hba->nr_queues[i];
>>>> +
>>>> +    host->can_queue = hba->nutrs;
>>>> +    host->cmd_per_lun = hba->nutrs;
>>>> +
>>>> +    /* One more reserved for dev_cmd_queue */
>>>> +    hba->nr_hw_queues = host->nr_hw_queues + 1;
>>>> +
>>>
>>> So this would mean that the host can accept .can_queue *
>>> .nr_hw_queues numbers of requests simultaneously - is that true?
>>
>> That would mean that .can_queue * .nr_hw_queues numbers of request may
>> be queued to the host.
>> Please can you elaborate if you see an issue.
>
> Hi Asutosh,
>
> The `host_tagset` flag has been introduced by John and Hannes some time
> ago. See also commit bdb01301f3ea ("scsi: Add host and host template
> flag 'host_tagset'"). This flag supports sharing tags across hardware
> queues and could be used to support UFSHCI 4.0 controllers that do not
> support the EXT_IID feature.
>
> In order not to complicate the implementation further, I propose to fall
> back to the UFSHCI 3.0 compatibility mode for UFSHCI 4.0 controllers
> that do not support the EXT_IID feature.
>
> To answer John's question: the maximum number of outstanding commands is
> 16 * hba->nutrs if EXT_IID is supported (EXT_IID is a four bits field).
> If the hardware queue index is encoded in the EXT_IID field, hba->nutrs
> is the number of commands per hardware queue.
>
> Thanks,
>
> Bart.

Thanks Bart, I wasn't aware of the background of John's Q. I will
consider your proposal and get back.

-asd

2022-07-29 16:47:34

by Asutosh Das

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

Hi Bart,

On 7/28/2022 2:07 PM, Asutosh Das (asd) wrote:
> On 7/28/2022 1:29 PM, Bart Van Assche wrote:
>> On 7/28/22 12:15, Asutosh Das (asd) wrote:
>>> Hello John,
>>>
>>> On 7/28/2022 12:10 PM, John Garry wrote:
>>>> On 19/07/2022 08:01, Can Guo wrote:
>>>>> +
>>>>> +    hba->nr_queues[HCTX_TYPE_DEFAULT] = num_possible_cpus();
>>>>> +    hba->nr_queues[HCTX_TYPE_READ] = 0;
>>>>> +    hba->nr_queues[HCTX_TYPE_POLL] = 1;
>>>>> +
>>>>> +    for (i = 0; i < HCTX_MAX_TYPES; i++)
>>>>> +        host->nr_hw_queues += hba->nr_queues[i];
>>>>> +
>>>>> +    host->can_queue = hba->nutrs;
>>>>> +    host->cmd_per_lun = hba->nutrs;
>>>>> +
>>>>> +    /* One more reserved for dev_cmd_queue */
>>>>> +    hba->nr_hw_queues = host->nr_hw_queues + 1;
>>>>> +
>>>>
>>>> So this would mean that the host can accept .can_queue *
>>>> .nr_hw_queues numbers of requests simultaneously - is that true?
>>>
>>> That would mean that .can_queue * .nr_hw_queues numbers of request
>>> may be queued to the host.
>>> Please can you elaborate if you see an issue.
>>
>> Hi Asutosh,
>>
>> The `host_tagset` flag has been introduced by John and Hannes some
>> time ago. See also commit bdb01301f3ea ("scsi: Add host and host
>> template flag 'host_tagset'"). This flag supports sharing tags across
>> hardware queues and could be used to support UFSHCI 4.0 controllers
>> that do not support the EXT_IID feature.
>>
>> In order not to complicate the implementation further, I propose to
>> fall back to the UFSHCI 3.0 compatibility mode for UFSHCI 4.0
>> controllers that do not support the EXT_IID feature.
>>
>> To answer John's question: the maximum number of outstanding commands
>> is 16 * hba->nutrs if EXT_IID is supported (EXT_IID is a four bits
>> field). If the hardware queue index is encoded in the EXT_IID field,
>> hba->nutrs is the number of commands per hardware queue.
>>
>> Thanks,
>>
>> Bart.
>
> Thanks Bart, I wasn't aware of the background of John's Q. I will
> consider your proposal and get back.
>

I went through the change and fwiu of your proposal is to use
host_tagset = 1 if HC doesn't support EXT_IID capability.
That way tags would be shared across 16 HWQs (4-bit IID encoding the
queue-ids).
That would also mean that the hba->nutrs would have to be adjusted such
that the tags(8-bit) don't exceed 255.

Summarily,
if EXT_IID is not supported:
host_tagset = 1, maximum configurable hba->nutrs = 16, maximum
configurable nr_hw_queues = 16.
maximum number of outstanding commands to host = 16 x 16 = 256.

if EXT_IID is supported:
host_tagset = 0, maximum confiugrable hba-nutrs = 255, maximum
configurable nr_hw_queues = 255.

Please let me know if I'm missing something.

Thank you,
-asd




2022-07-29 19:04:33

by Bart Van Assche

[permalink] [raw]
Subject: Re: [PATCH 1/2] scsi: ufs: Add Multi-Circular Queue support

On 7/29/22 09:43, Asutosh Das (asd) wrote:
> I went through the change and fwiu of your proposal is to use
> host_tagset = 1 if HC doesn't support EXT_IID capability.
> That way tags would be shared across 16 HWQs (4-bit IID encoding the
> queue-ids).
> That would also mean that the hba->nutrs would have to be adjusted such
> that the tags(8-bit) don't exceed 255.
>
> Summarily,
> if EXT_IID is not supported:
> host_tagset = 1, maximum configurable hba->nutrs = 16, maximum
> configurable nr_hw_queues = 16.
> maximum number of outstanding commands to host = 16 x 16 = 256.
>
> if EXT_IID is supported:
> host_tagset = 0, maximum confiugrable hba-nutrs = 255, maximum
> configurable nr_hw_queues = 255.
>
> Please let me know if I'm missing something.

Hi Asutosh,

I recommend to always set host_tagset = 1. The performance overhead of
host_tagset = 1 compared to host_tagset = 0 should be negligible for UFS
devices. This will simplify the UFS host controller driver and will
allow us to limit the total number of outstanding commands to the
maximum number of outstanding commands supported by the UFS device. I
expect that we will need to do this. If more commands are sent to the
controller than what the UFS device can queue, the host controller will
decide in which order commands are fetched and sent to the controller.
We want the host to decide in which order commands are queued and not
the host controller. As an example, adding support for HCTX_TYPE_READ
queues will only work as expected if the number of outstanding commands
is less than or equal to what the UFS device can queue.

For host_tagset = 1, we can combine the 4 EXT_IID bits with the 8 bit
task tag and create a single space of 4096 (2**12) tags. The block layer
sbitmap data structure has been designed to minimize contention even if
a single sbitmap data structure is shared across multiple CPU cores.

Thanks,

Bart.