2017-04-21 15:52:40

by Gary R Hook

[permalink] [raw]
Subject: [PATCH 0/2] Change CCP ISR handler model

he CCP has the ability to perform several operations
simultaneously, but only one interrupt. The current design
exposes a window when using MSI/MSI-X interrupts wherein
state can change but no interrupt is generated; this can
lead to a hang in the engine. Switch to a tasklet backend
which allows serializing state changes, handles processing
of the interrupts, and avoids the loss of task completion
status.

---

Gary R Hook (2):
crypto: ccp - Change ISR handler method for a v3 CCP
crypto: ccp - Change ISR handler method for a v5 CCP


drivers/crypto/ccp/ccp-dev-v3.c | 120 +++++++++++++++++++++++----------------
drivers/crypto/ccp/ccp-dev-v5.c | 111 ++++++++++++++++++++++--------------
drivers/crypto/ccp/ccp-dev.h | 3 +
drivers/crypto/ccp/ccp-pci.c | 2 +
4 files changed, 142 insertions(+), 94 deletions(-)


2017-04-21 15:51:47

by Gary R Hook

[permalink] [raw]
Subject: [PATCH 2/2] crypto: ccp - Change ISR handler method for a v5 CCP

The CCP has the ability to perform several operations simultaneously,
but only one interrupt. When implemented as a PCI device and using
MSI-X/MSI interrupts, use a tasklet model to service interrupts. By
disabling and enabling interrupts from the CCP, coupled with the
queuing that tasklets provide, we can ensure that all events
(occurring on the device) are recognized and serviced.

This change fixes a problem wherein 2 or more busy queues can cause
notification bits to change state while a (CCP) interrupt is being
serviced, but after the queue state has been evaluated. This results
in the event being 'lost' and the queue hanging, waiting to be
serviced. Since the status bits are never fully de-asserted, the
CCP never generates another interrupt (all bits zero -> one or more
bits one), and no further CCP operations will be executed.


Cc: <[email protected]> # 4.9.x+

Signed-off-by: Gary R Hook <[email protected]>
---
drivers/crypto/ccp/ccp-dev-v5.c | 111 ++++++++++++++++++++++++---------------
1 file changed, 67 insertions(+), 44 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 13b81a1c1184..ccbe32d5dd1c 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -705,6 +705,65 @@ static int ccp_assign_lsbs(struct ccp_device *ccp)
return rc;
}

+static void ccp5_disable_queue_interrupts(struct ccp_device *ccp)
+{
+ unsigned int i;
+
+ for (i = 0; i < ccp->cmd_q_count; i++)
+ iowrite32(0x0, ccp->cmd_q[i].reg_int_enable);
+}
+
+static void ccp5_enable_queue_interrupts(struct ccp_device *ccp)
+{
+ unsigned int i;
+
+ for (i = 0; i < ccp->cmd_q_count; i++)
+ iowrite32(SUPPORTED_INTERRUPTS, ccp->cmd_q[i].reg_int_enable);
+}
+
+static void ccp5_irq_bh(unsigned long data)
+{
+ struct ccp_device *ccp = (struct ccp_device *)data;
+ u32 status;
+ unsigned int i;
+
+ for (i = 0; i < ccp->cmd_q_count; i++) {
+ struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+ status = ioread32(cmd_q->reg_interrupt_status);
+
+ if (status) {
+ cmd_q->int_status = status;
+ cmd_q->q_status = ioread32(cmd_q->reg_status);
+ cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+ /* On error, only save the first error value */
+ if ((status & INT_ERROR) && !cmd_q->cmd_error)
+ cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+ cmd_q->int_rcvd = 1;
+
+ /* Acknowledge the interrupt and wake the kthread */
+ iowrite32(status, cmd_q->reg_interrupt_status);
+ wake_up_interruptible(&cmd_q->int_queue);
+ }
+ }
+ ccp5_enable_queue_interrupts(ccp);
+}
+
+static irqreturn_t ccp5_irq_handler(int irq, void *data)
+{
+ struct device *dev = data;
+ struct ccp_device *ccp = dev_get_drvdata(dev);
+
+ ccp5_disable_queue_interrupts(ccp);
+ if (ccp->use_tasklet)
+ tasklet_schedule(&ccp->irq_tasklet);
+ else
+ ccp5_irq_bh((unsigned long)ccp);
+ return IRQ_HANDLED;
+}
+
static int ccp5_init(struct ccp_device *ccp)
{
struct device *dev = ccp->dev;
@@ -789,18 +848,17 @@ static int ccp5_init(struct ccp_device *ccp)
}

/* Turn off the queues and disable interrupts until ready */
+ ccp5_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];

cmd_q->qcontrol = 0; /* Start with nothing */
iowrite32(cmd_q->qcontrol, cmd_q->reg_control);

- /* Disable the interrupts */
- iowrite32(0x00, cmd_q->reg_int_enable);
ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);

- /* Clear the interrupts */
+ /* Clear the interrupt status */
iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
}

@@ -811,6 +869,10 @@ static int ccp5_init(struct ccp_device *ccp)
dev_err(dev, "unable to allocate an IRQ\n");
goto e_pool;
}
+ /* Initialize the ISR tasklet */
+ if (ccp->use_tasklet)
+ tasklet_init(&ccp->irq_tasklet, ccp5_irq_bh,
+ (unsigned long)ccp);

dev_dbg(dev, "Loading LSB map...\n");
/* Copy the private LSB mask to the public registers */
@@ -879,11 +941,7 @@ static int ccp5_init(struct ccp_device *ccp)
}

dev_dbg(dev, "Enabling interrupts...\n");
- /* Enable interrupts */
- for (i = 0; i < ccp->cmd_q_count; i++) {
- cmd_q = &ccp->cmd_q[i];
- iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_int_enable);
- }
+ ccp5_enable_queue_interrupts(ccp);

dev_dbg(dev, "Registering device...\n");
/* Put this on the unit list to make it available */
@@ -935,15 +993,13 @@ static void ccp5_destroy(struct ccp_device *ccp)
ccp_del_device(ccp);

/* Disable and clear interrupts */
+ ccp5_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];

/* Turn off the run bit */
iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);

- /* Disable the interrupts */
- iowrite32(0x00, cmd_q->reg_int_enable);
-
/* Clear the interrupt status */
iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
ioread32(cmd_q->reg_int_status);
@@ -978,39 +1034,6 @@ static void ccp5_destroy(struct ccp_device *ccp)
}
}

-static irqreturn_t ccp5_irq_handler(int irq, void *data)
-{
- struct device *dev = data;
- struct ccp_device *ccp = dev_get_drvdata(dev);
- u32 status;
- unsigned int i;
-
- for (i = 0; i < ccp->cmd_q_count; i++) {
- struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
-
- status = ioread32(cmd_q->reg_interrupt_status);
-
- if (status) {
- cmd_q->int_status = status;
- cmd_q->q_status = ioread32(cmd_q->reg_status);
- cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
-
- /* On error, only save the first error value */
- if ((status & INT_ERROR) && !cmd_q->cmd_error)
- cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
-
- cmd_q->int_rcvd = 1;
-
- /* Acknowledge the interrupt and wake the kthread */
- iowrite32(SUPPORTED_INTERRUPTS,
- cmd_q->reg_interrupt_status);
- wake_up_interruptible(&cmd_q->int_queue);
- }
- }
-
- return IRQ_HANDLED;
-}
-
static void ccp5_config(struct ccp_device *ccp)
{
/* Public side */

2017-04-21 16:39:38

by Gary R Hook

[permalink] [raw]
Subject: [PATCH 1/2] crypto: ccp - Change ISR handler method for a v3 CCP

The CCP has the ability to perform several operations simultaneously,
but only one interrupt. When implemented as a PCI device and using
MSI-X/MSI interrupts, use a tasklet model to service interrupts. By
disabling and enabling interrupts from the CCP, coupled with the
queuing that tasklets provide, we can ensure that all events
(occurring on the device) are recognized and serviced.

This change fixes a problem wherein 2 or more busy queues can cause
notification bits to change state while a (CCP) interrupt is being
serviced, but after the queue state has been evaluated. This results
in the event being 'lost' and the queue hanging, waiting to be
serviced. Since the status bits are never fully de-asserted, the
CCP never generates another interrupt (all bits zero -> one or more
bits one), and no further CCP operations will be executed.


Cc: <[email protected]> # 4.9.x+

Signed-off-by: Gary R Hook <[email protected]>
---
drivers/crypto/ccp/ccp-dev-v3.c | 120 +++++++++++++++++++++++----------------
drivers/crypto/ccp/ccp-dev.h | 3 +
drivers/crypto/ccp/ccp-pci.c | 2 +
3 files changed, 75 insertions(+), 50 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index a3689a66cb1d..367c2e30656f 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -315,17 +315,73 @@ static int ccp_perform_ecc(struct ccp_op *op)
return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
}

+static void ccp_disable_queue_interrupts(struct ccp_device *ccp)
+{
+ iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+}
+
+static void ccp_enable_queue_interrupts(struct ccp_device *ccp)
+{
+ iowrite32(ccp->qim, ccp->io_regs + IRQ_MASK_REG);
+}
+
+static void ccp_irq_bh(unsigned long data)
+{
+ struct ccp_device *ccp = (struct ccp_device *)data;
+ struct ccp_cmd_queue *cmd_q;
+ u32 q_int, status;
+ unsigned int i;
+
+ status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
+
+ for (i = 0; i < ccp->cmd_q_count; i++) {
+ cmd_q = &ccp->cmd_q[i];
+
+ q_int = status & (cmd_q->int_ok | cmd_q->int_err);
+ if (q_int) {
+ cmd_q->int_status = status;
+ cmd_q->q_status = ioread32(cmd_q->reg_status);
+ cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+ /* On error, only save the first error value */
+ if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
+ cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+ cmd_q->int_rcvd = 1;
+
+ /* Acknowledge the interrupt and wake the kthread */
+ iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
+ wake_up_interruptible(&cmd_q->int_queue);
+ }
+ }
+ ccp_enable_queue_interrupts(ccp);
+}
+
+static irqreturn_t ccp_irq_handler(int irq, void *data)
+{
+ struct device *dev = data;
+ struct ccp_device *ccp = dev_get_drvdata(dev);
+
+ ccp_disable_queue_interrupts(ccp);
+ if (ccp->use_tasklet)
+ tasklet_schedule(&ccp->irq_tasklet);
+ else
+ ccp_irq_bh((unsigned long)ccp);
+
+ return IRQ_HANDLED;
+}
+
static int ccp_init(struct ccp_device *ccp)
{
struct device *dev = ccp->dev;
struct ccp_cmd_queue *cmd_q;
struct dma_pool *dma_pool;
char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
- unsigned int qmr, qim, i;
+ unsigned int qmr, i;
int ret;

/* Find available queues */
- qim = 0;
+ ccp->qim = 0;
qmr = ioread32(ccp->io_regs + Q_MASK_REG);
for (i = 0; i < MAX_HW_QUEUES; i++) {
if (!(qmr & (1 << i)))
@@ -370,7 +426,7 @@ static int ccp_init(struct ccp_device *ccp)
init_waitqueue_head(&cmd_q->int_queue);

/* Build queue interrupt mask (two interrupts per queue) */
- qim |= cmd_q->int_ok | cmd_q->int_err;
+ ccp->qim |= cmd_q->int_ok | cmd_q->int_err;

#ifdef CONFIG_ARM64
/* For arm64 set the recommended queue cache settings */
@@ -388,14 +444,14 @@ static int ccp_init(struct ccp_device *ccp)
dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);

/* Disable and clear interrupts until ready */
- iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+ ccp_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];

ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
}
- iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+ iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);

/* Request an irq */
ret = ccp->get_irq(ccp);
@@ -404,6 +460,11 @@ static int ccp_init(struct ccp_device *ccp)
goto e_pool;
}

+ /* Initialize the ISR tasklet? */
+ if (ccp->use_tasklet)
+ tasklet_init(&ccp->irq_tasklet, ccp_irq_bh,
+ (unsigned long)ccp);
+
dev_dbg(dev, "Starting threads...\n");
/* Create a kthread for each queue */
for (i = 0; i < ccp->cmd_q_count; i++) {
@@ -426,7 +487,7 @@ static int ccp_init(struct ccp_device *ccp)

dev_dbg(dev, "Enabling interrupts...\n");
/* Enable interrupts */
- iowrite32(qim, ccp->io_regs + IRQ_MASK_REG);
+ ccp_enable_queue_interrupts(ccp);

dev_dbg(dev, "Registering device...\n");
ccp_add_device(ccp);
@@ -463,7 +524,7 @@ static void ccp_destroy(struct ccp_device *ccp)
{
struct ccp_cmd_queue *cmd_q;
struct ccp_cmd *cmd;
- unsigned int qim, i;
+ unsigned int i;

/* Unregister the DMA engine */
ccp_dmaengine_unregister(ccp);
@@ -474,22 +535,15 @@ static void ccp_destroy(struct ccp_device *ccp)
/* Remove this device from the list of available units */
ccp_del_device(ccp);

- /* Build queue interrupt mask (two interrupt masks per queue) */
- qim = 0;
- for (i = 0; i < ccp->cmd_q_count; i++) {
- cmd_q = &ccp->cmd_q[i];
- qim |= cmd_q->int_ok | cmd_q->int_err;
- }
-
/* Disable and clear interrupts */
- iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+ ccp_disable_queue_interrupts(ccp);
for (i = 0; i < ccp->cmd_q_count; i++) {
cmd_q = &ccp->cmd_q[i];

ioread32(cmd_q->reg_int_status);
ioread32(cmd_q->reg_status);
}
- iowrite32(qim, ccp->io_regs + IRQ_STATUS_REG);
+ iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);

/* Stop the queue kthreads */
for (i = 0; i < ccp->cmd_q_count; i++)
@@ -516,40 +570,6 @@ static void ccp_destroy(struct ccp_device *ccp)
}
}

-static irqreturn_t ccp_irq_handler(int irq, void *data)
-{
- struct device *dev = data;
- struct ccp_device *ccp = dev_get_drvdata(dev);
- struct ccp_cmd_queue *cmd_q;
- u32 q_int, status;
- unsigned int i;
-
- status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
-
- for (i = 0; i < ccp->cmd_q_count; i++) {
- cmd_q = &ccp->cmd_q[i];
-
- q_int = status & (cmd_q->int_ok | cmd_q->int_err);
- if (q_int) {
- cmd_q->int_status = status;
- cmd_q->q_status = ioread32(cmd_q->reg_status);
- cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
-
- /* On error, only save the first error value */
- if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
- cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
-
- cmd_q->int_rcvd = 1;
-
- /* Acknowledge the interrupt and wake the kthread */
- iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
- wake_up_interruptible(&cmd_q->int_queue);
- }
- }
-
- return IRQ_HANDLED;
-}
-
static const struct ccp_actions ccp3_actions = {
.aes = ccp_perform_aes,
.xts_aes = ccp_perform_xts_aes,
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 2dfec019a832..0cb09d0feeaf 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -339,7 +339,10 @@ struct ccp_device {
void *dev_specific;
int (*get_irq)(struct ccp_device *ccp);
void (*free_irq)(struct ccp_device *ccp);
+ unsigned int qim;
unsigned int irq;
+ bool use_tasklet;
+ struct tasklet_struct irq_tasklet;

/* I/O area used for device communication. The register mapping
* starts at an offset into the mapped bar.
diff --git a/drivers/crypto/ccp/ccp-pci.c b/drivers/crypto/ccp/ccp-pci.c
index 28a9996c1085..e880d4cf4ada 100644
--- a/drivers/crypto/ccp/ccp-pci.c
+++ b/drivers/crypto/ccp/ccp-pci.c
@@ -69,6 +69,7 @@ static int ccp_get_msix_irqs(struct ccp_device *ccp)
goto e_irq;
}
}
+ ccp->use_tasklet = true;

return 0;

@@ -100,6 +101,7 @@ static int ccp_get_msi_irq(struct ccp_device *ccp)
dev_notice(dev, "unable to allocate MSI IRQ (%d)\n", ret);
goto e_msi;
}
+ ccp->use_tasklet = true;

return 0;


2017-04-24 10:38:42

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH 0/2] Change CCP ISR handler model

On Fri, Apr 21, 2017 at 10:49:56AM -0500, Gary R Hook wrote:
> he CCP has the ability to perform several operations
> simultaneously, but only one interrupt. The current design
> exposes a window when using MSI/MSI-X interrupts wherein
> state can change but no interrupt is generated; this can
> lead to a hang in the engine. Switch to a tasklet backend
> which allows serializing state changes, handles processing
> of the interrupts, and avoids the loss of task completion
> status.
>
> ---
>
> Gary R Hook (2):
> crypto: ccp - Change ISR handler method for a v3 CCP
> crypto: ccp - Change ISR handler method for a v5 CCP

All applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt