2020-07-18 09:44:14

by Sven Auhagen

[permalink] [raw]
Subject: [PATCH 1/1 v2] inside-secure irq balance

Balance the irqs of the inside secure driver over all
available cpus.
Currently all interrupts are handled by the first CPU.

From my testing with IPSec AES-GCM 256
on my MCbin with 4 Cores I get a 50% speed increase:

Before the patch: 99.73 Kpps
With the patch: 151.25 Kpps

Signed-off-by: Sven Auhagen <[email protected]>
---
v2:
* use cpumask_local_spread and remove affinity on
module remove

drivers/crypto/inside-secure/safexcel.c | 13 +++++++++++--
drivers/crypto/inside-secure/safexcel.h | 3 +++
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 2cb53fbae841..fb8e0d8732f8 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -1135,11 +1135,12 @@ static irqreturn_t safexcel_irq_ring_thread(int irq, void *data)

static int safexcel_request_ring_irq(void *pdev, int irqid,
int is_pci_dev,
+ int ring_id,
irq_handler_t handler,
irq_handler_t threaded_handler,
struct safexcel_ring_irq_data *ring_irq_priv)
{
- int ret, irq;
+ int ret, irq, cpu;
struct device *dev;

if (IS_ENABLED(CONFIG_PCI) && is_pci_dev) {
@@ -1177,6 +1178,10 @@ static int safexcel_request_ring_irq(void *pdev, int irqid,
return ret;
}

+ // Set affinity
+ cpu = cpumask_local_spread(ring_id, -1);
+ irq_set_affinity_hint(irq, get_cpu_mask(cpu));
+
return irq;
}

@@ -1611,6 +1616,7 @@ static int safexcel_probe_generic(void *pdev,
irq = safexcel_request_ring_irq(pdev,
EIP197_IRQ_NUMBER(i, is_pci_dev),
is_pci_dev,
+ i,
safexcel_irq_ring,
safexcel_irq_ring_thread,
ring_irq);
@@ -1619,6 +1625,7 @@ static int safexcel_probe_generic(void *pdev,
return irq;
}

+ priv->ring[i].irq = irq;
priv->ring[i].work_data.priv = priv;
priv->ring[i].work_data.ring = i;
INIT_WORK(&priv->ring[i].work_data.work,
@@ -1756,8 +1763,10 @@ static int safexcel_remove(struct platform_device *pdev)
clk_disable_unprepare(priv->reg_clk);
clk_disable_unprepare(priv->clk);

- for (i = 0; i < priv->config.rings; i++)
+ for (i = 0; i < priv->config.rings; i++) {
+ irq_set_affinity_hint(priv->ring[i].irq, NULL);
destroy_workqueue(priv->ring[i].workqueue);
+ }

return 0;
}
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 94016c505abb..7c5fe382d272 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -707,6 +707,9 @@ struct safexcel_ring {
*/
struct crypto_async_request *req;
struct crypto_async_request *backlog;
+
+ /* irq of this ring */
+ int irq;
};

/* EIP integration context flags */
--
2.20.1


2020-07-18 13:27:00

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH 1/1 v2] inside-secure irq balance

On Sat, 18 Jul 2020 at 12:43, Sven Auhagen <[email protected]> wrote:
>
> Balance the irqs of the inside secure driver over all
> available cpus.
> Currently all interrupts are handled by the first CPU.
>
> From my testing with IPSec AES-GCM 256
> on my MCbin with 4 Cores I get a 50% speed increase:
>
> Before the patch: 99.73 Kpps
> With the patch: 151.25 Kpps
>
> Signed-off-by: Sven Auhagen <[email protected]>
> ---
> v2:
> * use cpumask_local_spread and remove affinity on
> module remove
>
> drivers/crypto/inside-secure/safexcel.c | 13 +++++++++++--
> drivers/crypto/inside-secure/safexcel.h | 3 +++
> 2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
> index 2cb53fbae841..fb8e0d8732f8 100644
> --- a/drivers/crypto/inside-secure/safexcel.c
> +++ b/drivers/crypto/inside-secure/safexcel.c
> @@ -1135,11 +1135,12 @@ static irqreturn_t safexcel_irq_ring_thread(int irq, void *data)
>
> static int safexcel_request_ring_irq(void *pdev, int irqid,
> int is_pci_dev,
> + int ring_id,
> irq_handler_t handler,
> irq_handler_t threaded_handler,
> struct safexcel_ring_irq_data *ring_irq_priv)
> {
> - int ret, irq;
> + int ret, irq, cpu;
> struct device *dev;
>
> if (IS_ENABLED(CONFIG_PCI) && is_pci_dev) {
> @@ -1177,6 +1178,10 @@ static int safexcel_request_ring_irq(void *pdev, int irqid,
> return ret;
> }
>
> + // Set affinity
> + cpu = cp = cpumask_local_spread(ring_id, -1);

Please use the symbolic constant NUMA_NO_NODE here, so it is obvious
what the second argument means without having to grep for it.

> + irq_set_affinity_hint(irq, get_cpu_mask(cpu));
> +
> return irq;
> }
>
> @@ -1611,6 +1616,7 @@ static int safexcel_probe_generic(void *pdev,
> irq = safexcel_request_ring_irq(pdev,
> EIP197_IRQ_NUMBER(i, is_pci_dev),
> is_pci_dev,
> + i,
> safexcel_irq_ring,
> safexcel_irq_ring_thread,
> ring_irq);
> @@ -1619,6 +1625,7 @@ static int safexcel_probe_generic(void *pdev,
> return irq;
> }
>
> + priv->ring[i].irq = irq;
> priv->ring[i].work_data.priv = priv;
> priv->ring[i].work_data.ring = i;
> INIT_WORK(&priv->ring[i].work_data.work,
> @@ -1756,8 +1763,10 @@ static int safexcel_remove(struct platform_device *pdev)
> clk_disable_unprepare(priv->reg_clk);
> clk_disable_unprepare(priv->clk);
>
> - for (i = 0; i < priv->config.rings; i++)
> + for (i = 0; i < priv->config.rings; i++) {
> + irq_set_affinity_hint(priv->ring[i].irq, NULL);
> destroy_workqueue(priv->ring[i].workqueue);
> + }
>
> return 0;
> }
> diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
> index 94016c505abb..7c5fe382d272 100644
> --- a/drivers/crypto/inside-secure/safexcel.h
> +++ b/drivers/crypto/inside-secure/safexcel.h
> @@ -707,6 +707,9 @@ struct safexcel_ring {
> */
> struct crypto_async_request *req;
> struct crypto_async_request *backlog;
> +
> + /* irq of this ring */
> + int irq;
> };
>
> /* EIP integration context flags */
> --
> 2.20.1
>

2020-07-19 06:11:06

by Sven Auhagen

[permalink] [raw]
Subject: Re: [PATCH 1/1 v2] inside-secure irq balance

On Sat, Jul 18, 2020 at 04:25:29PM +0300, Ard Biesheuvel wrote:
> On Sat, 18 Jul 2020 at 12:43, Sven Auhagen <[email protected]> wrote:
> >
> > Balance the irqs of the inside secure driver over all
> > available cpus.
> > Currently all interrupts are handled by the first CPU.
> >
> > From my testing with IPSec AES-GCM 256
> > on my MCbin with 4 Cores I get a 50% speed increase:
> >
> > Before the patch: 99.73 Kpps
> > With the patch: 151.25 Kpps
> >
> > Signed-off-by: Sven Auhagen <[email protected]>
> > ---
> > v2:
> > * use cpumask_local_spread and remove affinity on
> > module remove
> >
> > drivers/crypto/inside-secure/safexcel.c | 13 +++++++++++--
> > drivers/crypto/inside-secure/safexcel.h | 3 +++
> > 2 files changed, 14 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
> > index 2cb53fbae841..fb8e0d8732f8 100644
> > --- a/drivers/crypto/inside-secure/safexcel.c
> > +++ b/drivers/crypto/inside-secure/safexcel.c
> > @@ -1135,11 +1135,12 @@ static irqreturn_t safexcel_irq_ring_thread(int irq, void *data)
> >
> > static int safexcel_request_ring_irq(void *pdev, int irqid,
> > int is_pci_dev,
> > + int ring_id,
> > irq_handler_t handler,
> > irq_handler_t threaded_handler,
> > struct safexcel_ring_irq_data *ring_irq_priv)
> > {
> > - int ret, irq;
> > + int ret, irq, cpu;
> > struct device *dev;
> >
> > if (IS_ENABLED(CONFIG_PCI) && is_pci_dev) {
> > @@ -1177,6 +1178,10 @@ static int safexcel_request_ring_irq(void *pdev, int irqid,
> > return ret;
> > }
> >
> > + // Set affinity
> > + cpu = cp = cpumask_local_spread(ring_id, -1);
>
> Please use the symbolic constant NUMA_NO_NODE here, so it is obvious
> what the second argument means without having to grep for it.

Thanks, I will change it and send a new version.
I will wait a few days to see if there are more comments.

>
> > + irq_set_affinity_hint(irq, get_cpu_mask(cpu));
> > +
> > return irq;
> > }
> >
> > @@ -1611,6 +1616,7 @@ static int safexcel_probe_generic(void *pdev,
> > irq = safexcel_request_ring_irq(pdev,
> > EIP197_IRQ_NUMBER(i, is_pci_dev),
> > is_pci_dev,
> > + i,
> > safexcel_irq_ring,
> > safexcel_irq_ring_thread,
> > ring_irq);
> > @@ -1619,6 +1625,7 @@ static int safexcel_probe_generic(void *pdev,
> > return irq;
> > }
> >
> > + priv->ring[i].irq = irq;
> > priv->ring[i].work_data.priv = priv;
> > priv->ring[i].work_data.ring = i;
> > INIT_WORK(&priv->ring[i].work_data.work,
> > @@ -1756,8 +1763,10 @@ static int safexcel_remove(struct platform_device *pdev)
> > clk_disable_unprepare(priv->reg_clk);
> > clk_disable_unprepare(priv->clk);
> >
> > - for (i = 0; i < priv->config.rings; i++)
> > + for (i = 0; i < priv->config.rings; i++) {
> > + irq_set_affinity_hint(priv->ring[i].irq, NULL);
> > destroy_workqueue(priv->ring[i].workqueue);
> > + }
> >
> > return 0;
> > }
> > diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
> > index 94016c505abb..7c5fe382d272 100644
> > --- a/drivers/crypto/inside-secure/safexcel.h
> > +++ b/drivers/crypto/inside-secure/safexcel.h
> > @@ -707,6 +707,9 @@ struct safexcel_ring {
> > */
> > struct crypto_async_request *req;
> > struct crypto_async_request *backlog;
> > +
> > + /* irq of this ring */
> > + int irq;
> > };
> >
> > /* EIP integration context flags */
> > --
> > 2.20.1
> >