2017-04-05 23:21:06

by Rick Altherr

[permalink] [raw]
Subject: [PATCH v2 0/3] hw_random: timeriomem_rng: Migrate to new API and improve performance

AST2400 can generate 32-bits of random data every 1us. Original driver
was limited to one 32-bit read every jiffie due to deprecated API and use
of timers. Migrating to new hwrng API and switching to hrtimers
improves read performance of /dev/hwrng to 13Mb/s.

Changes in v2:
- Split API migration into separate patch
- Split type and variable renames into separate patch
- Split performance improvements into separate patch

Rick Altherr (3):
hw_random: Migrate timeriomem_rng to new API
hw_random: timeriomem_rng: Shorten verbose type and variable names
hw_random: timeriomem_rng: Improve performance for sub-jiffie update
periods

drivers/char/hw_random/timeriomem-rng.c | 157 ++++++++++++++++----------------
1 file changed, 80 insertions(+), 77 deletions(-)

--
2.12.2.715.g7642488e1d-goog


2017-04-05 23:21:18

by Rick Altherr

[permalink] [raw]
Subject: [PATCH v2 1/3] hw_random: Migrate timeriomem_rng to new API

Preserves the existing behavior of only returning 32-bits per call.

Signed-off-by: Rick Altherr <[email protected]>
---

Changes in v2:
- Split API migration into separate patch

drivers/char/hw_random/timeriomem-rng.c | 60 ++++++++++++++++-----------------
1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index cf37db263ecd..17574452fd35 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -20,18 +20,16 @@
* TODO: add support for reading sizes other than 32bits and masking
*/

-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
+#include <linux/completion.h>
#include <linux/hw_random.h>
#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/timeriomem-rng.h>
-#include <linux/jiffies.h>
-#include <linux/sched.h>
#include <linux/timer.h>
-#include <linux/completion.h>

struct timeriomem_rng_private_data {
void __iomem *io_base;
@@ -45,32 +43,36 @@ struct timeriomem_rng_private_data {
struct hwrng timeriomem_rng_ops;
};

-#define to_rng_priv(rng) \
- ((struct timeriomem_rng_private_data *)rng->priv)
-
-/*
- * have data return 1, however return 0 if we have nothing
- */
-static int timeriomem_rng_data_present(struct hwrng *rng, int wait)
+static int timeriomem_rng_read(struct hwrng *hwrng, void *data,
+ size_t max, bool wait)
{
- struct timeriomem_rng_private_data *priv = to_rng_priv(rng);
+ struct timeriomem_rng_private_data *priv =
+ container_of(hwrng, struct timeriomem_rng_private_data,
+ timeriomem_rng_ops);
+ unsigned long cur;
+ s32 delay;

- if (!wait || priv->present)
- return priv->present;
+ /* The RNG provides 32-bit per read. Ensure there is enough space. */
+ if (max < sizeof(u32))
+ return 0;

- wait_for_completion(&priv->completion);
+ /*
+ * There may not have been enough time for new data to be generated
+ * since the last request. If the caller doesn't want to wait, let them
+ * bail out. Otherwise, wait for the completion. If the new data has
+ * already been generated, the completion should already be available.
+ */
+ if (!wait && !priv->present)
+ return 0;

- return 1;
-}
-
-static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data)
-{
- struct timeriomem_rng_private_data *priv = to_rng_priv(rng);
- unsigned long cur;
- s32 delay;
+ wait_for_completion(&priv->completion);

- *data = readl(priv->io_base);
+ *(u32 *)data = readl(priv->io_base);

+ /*
+ * Block any new callers until the RNG has had time to generate new
+ * data.
+ */
cur = jiffies;

delay = cur - priv->expires;
@@ -154,9 +156,7 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
setup_timer(&priv->timer, timeriomem_rng_trigger, (unsigned long)priv);

priv->timeriomem_rng_ops.name = dev_name(&pdev->dev);
- priv->timeriomem_rng_ops.data_present = timeriomem_rng_data_present;
- priv->timeriomem_rng_ops.data_read = timeriomem_rng_data_read;
- priv->timeriomem_rng_ops.priv = (unsigned long)priv;
+ priv->timeriomem_rng_ops.read = timeriomem_rng_read;

priv->io_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(priv->io_base)) {
--
2.12.2.715.g7642488e1d-goog

2017-04-05 23:21:25

by Rick Altherr

[permalink] [raw]
Subject: [PATCH v2 2/3] hw_random: timeriomem_rng: Shorten verbose type and variable names

No functional changes.

Signed-off-by: Rick Altherr <[email protected]>
---

Changes in v2:
- Split type and variable renames into separate patch

drivers/char/hw_random/timeriomem-rng.c | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 17574452fd35..024bdff7999f 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -31,7 +31,7 @@
#include <linux/timeriomem-rng.h>
#include <linux/timer.h>

-struct timeriomem_rng_private_data {
+struct timeriomem_rng_private {
void __iomem *io_base;
unsigned int expires;
unsigned int period;
@@ -40,15 +40,14 @@ struct timeriomem_rng_private_data {
struct timer_list timer;
struct completion completion;

- struct hwrng timeriomem_rng_ops;
+ struct hwrng rng_ops;
};

static int timeriomem_rng_read(struct hwrng *hwrng, void *data,
size_t max, bool wait)
{
- struct timeriomem_rng_private_data *priv =
- container_of(hwrng, struct timeriomem_rng_private_data,
- timeriomem_rng_ops);
+ struct timeriomem_rng_private *priv =
+ container_of(hwrng, struct timeriomem_rng_private, rng_ops);
unsigned long cur;
s32 delay;

@@ -89,8 +88,8 @@ static int timeriomem_rng_read(struct hwrng *hwrng, void *data,

static void timeriomem_rng_trigger(unsigned long data)
{
- struct timeriomem_rng_private_data *priv
- = (struct timeriomem_rng_private_data *)data;
+ struct timeriomem_rng_private *priv
+ = (struct timeriomem_rng_private *)data;

priv->present = 1;
complete(&priv->completion);
@@ -99,7 +98,7 @@ static void timeriomem_rng_trigger(unsigned long data)
static int timeriomem_rng_probe(struct platform_device *pdev)
{
struct timeriomem_rng_data *pdata = pdev->dev.platform_data;
- struct timeriomem_rng_private_data *priv;
+ struct timeriomem_rng_private *priv;
struct resource *res;
int err = 0;
int period;
@@ -121,7 +120,7 @@ static int timeriomem_rng_probe(struct platform_device *pdev)

/* Allocate memory for the device structure (and zero it) */
priv = devm_kzalloc(&pdev->dev,
- sizeof(struct timeriomem_rng_private_data), GFP_KERNEL);
+ sizeof(struct timeriomem_rng_private), GFP_KERNEL);
if (!priv)
return -ENOMEM;

@@ -155,8 +154,8 @@ static int timeriomem_rng_probe(struct platform_device *pdev)

setup_timer(&priv->timer, timeriomem_rng_trigger, (unsigned long)priv);

- priv->timeriomem_rng_ops.name = dev_name(&pdev->dev);
- priv->timeriomem_rng_ops.read = timeriomem_rng_read;
+ priv->rng_ops.name = dev_name(&pdev->dev);
+ priv->rng_ops.read = timeriomem_rng_read;

priv->io_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(priv->io_base)) {
@@ -164,7 +163,7 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
goto out_timer;
}

- err = hwrng_register(&priv->timeriomem_rng_ops);
+ err = hwrng_register(&priv->rng_ops);
if (err) {
dev_err(&pdev->dev, "problem registering\n");
goto out_timer;
@@ -182,9 +181,9 @@ static int timeriomem_rng_probe(struct platform_device *pdev)

static int timeriomem_rng_remove(struct platform_device *pdev)
{
- struct timeriomem_rng_private_data *priv = platform_get_drvdata(pdev);
+ struct timeriomem_rng_private *priv = platform_get_drvdata(pdev);

- hwrng_unregister(&priv->timeriomem_rng_ops);
+ hwrng_unregister(&priv->rng_ops);

del_timer_sync(&priv->timer);

--
2.12.2.715.g7642488e1d-goog

2017-04-05 23:21:45

by Rick Altherr

[permalink] [raw]
Subject: [PATCH v2 3/3] hw_random: timeriomem_rng: Improve performance for sub-jiffie update periods

Some hardware RNGs provide a single register for obtaining random data.
Instead of signaling when new data is available, the reader must wait a
fixed amount of time between reads for new data to be generated.
timeriomem_rng implements this scheme with the period specified in
platform data or device tree. While the period is specified in
microseconds, the implementation used a standard timer which has a
minimum delay of 1 jiffie and caused a significant bottleneck for
devices that can update at 1us. By switching to an hrtimer, 1us periods
now only delay at most 2us per read.

Signed-off-by: Rick Altherr <[email protected]>
---

Changes in v2:
- Split performance improvements into separate patch

drivers/char/hw_random/timeriomem-rng.c | 86 +++++++++++++++++----------------
1 file changed, 45 insertions(+), 41 deletions(-)

diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 024bdff7999f..a0faa5f05deb 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -21,23 +21,24 @@
*/

#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/hrtimer.h>
#include <linux/hw_random.h>
#include <linux/io.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
+#include <linux/time.h>
#include <linux/timeriomem-rng.h>
-#include <linux/timer.h>

struct timeriomem_rng_private {
void __iomem *io_base;
- unsigned int expires;
- unsigned int period;
+ ktime_t period;
unsigned int present:1;

- struct timer_list timer;
+ struct hrtimer timer;
struct completion completion;

struct hwrng rng_ops;
@@ -48,10 +49,13 @@ static int timeriomem_rng_read(struct hwrng *hwrng, void *data,
{
struct timeriomem_rng_private *priv =
container_of(hwrng, struct timeriomem_rng_private, rng_ops);
- unsigned long cur;
- s32 delay;
+ int retval = 0;
+ int period_us = ktime_to_us(priv->period);

- /* The RNG provides 32-bit per read. Ensure there is enough space. */
+ /*
+ * The RNG provides 32-bits per read. Ensure there is enough space for
+ * at minimum one read.
+ */
if (max < sizeof(u32))
return 0;

@@ -66,33 +70,44 @@ static int timeriomem_rng_read(struct hwrng *hwrng, void *data,

wait_for_completion(&priv->completion);

- *(u32 *)data = readl(priv->io_base);
+ do {
+ /*
+ * After the first read, all additional reads will need to wait
+ * for the RNG to generate new data. Since the period can have
+ * a wide range of values (1us to 1s have been observed), allow
+ * for 1% tolerance in the sleep time rather than a fixed value.
+ */
+ if (retval > 0)
+ usleep_range(period_us,
+ period_us + min(1, period_us / 100));
+
+ *(u32 *)data = readl(priv->io_base);
+ retval += sizeof(u32);
+ data += sizeof(u32);
+ max -= sizeof(u32);
+ } while (wait && max > sizeof(u32));

/*
* Block any new callers until the RNG has had time to generate new
* data.
*/
- cur = jiffies;
-
- delay = cur - priv->expires;
- delay = priv->period - (delay % priv->period);
-
- priv->expires = cur + delay;
priv->present = 0;
-
reinit_completion(&priv->completion);
- mod_timer(&priv->timer, priv->expires);
+ hrtimer_forward_now(&priv->timer, priv->period);
+ hrtimer_restart(&priv->timer);

- return 4;
+ return retval;
}

-static void timeriomem_rng_trigger(unsigned long data)
+static enum hrtimer_restart timeriomem_rng_trigger(struct hrtimer *timer)
{
struct timeriomem_rng_private *priv
- = (struct timeriomem_rng_private *)data;
+ = container_of(timer, struct timeriomem_rng_private, timer);

priv->present = 1;
complete(&priv->completion);
+
+ return HRTIMER_NORESTART;
}

static int timeriomem_rng_probe(struct platform_device *pdev)
@@ -140,43 +155,33 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
period = pdata->period;
}

- priv->period = usecs_to_jiffies(period);
- if (priv->period < 1) {
- dev_err(&pdev->dev, "period is less than one jiffy\n");
- return -EINVAL;
- }
-
- priv->expires = jiffies;
- priv->present = 1;
-
+ priv->period = ns_to_ktime(period * NSEC_PER_USEC);
init_completion(&priv->completion);
- complete(&priv->completion);
-
- setup_timer(&priv->timer, timeriomem_rng_trigger, (unsigned long)priv);
+ hrtimer_init(&priv->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ priv->timer.function = timeriomem_rng_trigger;

priv->rng_ops.name = dev_name(&pdev->dev);
priv->rng_ops.read = timeriomem_rng_read;

priv->io_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(priv->io_base)) {
- err = PTR_ERR(priv->io_base);
- goto out_timer;
+ return PTR_ERR(priv->io_base);
}

+ /* Assume random data is already available. */
+ priv->present = 1;
+ complete(&priv->completion);
+
err = hwrng_register(&priv->rng_ops);
if (err) {
dev_err(&pdev->dev, "problem registering\n");
- goto out_timer;
+ return err;
}

dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n",
priv->io_base, period);

return 0;
-
-out_timer:
- del_timer_sync(&priv->timer);
- return err;
}

static int timeriomem_rng_remove(struct platform_device *pdev)
@@ -184,8 +189,7 @@ static int timeriomem_rng_remove(struct platform_device *pdev)
struct timeriomem_rng_private *priv = platform_get_drvdata(pdev);

hwrng_unregister(&priv->rng_ops);
-
- del_timer_sync(&priv->timer);
+ hrtimer_cancel(&priv->timer);

return 0;
}
--
2.12.2.715.g7642488e1d-goog

2017-04-10 11:23:23

by Herbert Xu

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] hw_random: timeriomem_rng: Migrate to new API and improve performance

On Wed, Apr 05, 2017 at 04:20:57PM -0700, Rick Altherr wrote:
> AST2400 can generate 32-bits of random data every 1us. Original driver
> was limited to one 32-bit read every jiffie due to deprecated API and use
> of timers. Migrating to new hwrng API and switching to hrtimers
> improves read performance of /dev/hwrng to 13Mb/s.
>
> Changes in v2:
> - Split API migration into separate patch
> - Split type and variable renames into separate patch
> - Split performance improvements into separate patch

All applied. Thanks.
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt