2021-02-27 13:47:04

by Stephan Müller

[permalink] [raw]
Subject: [PATCH v38 02/13] LRNG - allocate one DRNG instance per NUMA node

In order to improve NUMA-locality when serving getrandom(2) requests,
allocate one DRNG instance per node.

The DRNG instance that is present right from the start of the kernel is
reused as the first per-NUMA-node DRNG. For all remaining online NUMA
nodes a new DRNG instance is allocated.

During boot time, the multiple DRNG instances are seeded sequentially.
With this, the first DRNG instance (referenced as the initial DRNG
in the code) is completely seeded with 256 bits of entropy before the
next DRNG instance is completely seeded.

When random numbers are requested, the NUMA-node-local DRNG is checked
whether it has been already fully seeded. If this is not the case, the
initial DRNG is used to serve the request.

CC: Torsten Duwe <[email protected]>
CC: "Eric W. Biederman" <[email protected]>
CC: "Alexander E. Patrakov" <[email protected]>
CC: "Ahmed S. Darwish" <[email protected]>
CC: "Theodore Y. Ts'o" <[email protected]>
CC: Willy Tarreau <[email protected]>
CC: Matthew Garrett <[email protected]>
CC: Vito Caputo <[email protected]>
CC: Andreas Dilger <[email protected]>
CC: Jan Kara <[email protected]>
CC: Ray Strode <[email protected]>
CC: William Jon McCann <[email protected]>
CC: zhangjs <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Florian Weimer <[email protected]>
CC: Lennart Poettering <[email protected]>
CC: Nicolai Stange <[email protected]>
CC: Eric Biggers <[email protected]>
Reviewed-by: Marcelo Henrique Cerri <[email protected]>
Reviewed-by: Roman Drahtmueller <[email protected]>
Tested-by: Roman Drahtm?ller <[email protected]>
Tested-by: Marcelo Henrique Cerri <[email protected]>
Tested-by: Neil Horman <[email protected]>
Signed-off-by: Stephan Mueller <[email protected]>
---
drivers/char/lrng/Makefile | 2 +
drivers/char/lrng/lrng_internal.h | 5 ++
drivers/char/lrng/lrng_numa.c | 120 ++++++++++++++++++++++++++++++
3 files changed, 127 insertions(+)
create mode 100644 drivers/char/lrng/lrng_numa.c

diff --git a/drivers/char/lrng/Makefile b/drivers/char/lrng/Makefile
index e72e01c15bb9..29724c65287d 100644
--- a/drivers/char/lrng/Makefile
+++ b/drivers/char/lrng/Makefile
@@ -7,3 +7,5 @@ obj-y += lrng_pool.o lrng_aux.o \
lrng_sw_noise.o lrng_archrandom.o \
lrng_drng.o lrng_chacha20.o \
lrng_interfaces.o
+
+obj-$(CONFIG_NUMA) += lrng_numa.o
diff --git a/drivers/char/lrng/lrng_internal.h b/drivers/char/lrng/lrng_internal.h
index d398ac6b5674..1bfb3710c767 100644
--- a/drivers/char/lrng/lrng_internal.h
+++ b/drivers/char/lrng/lrng_internal.h
@@ -214,8 +214,13 @@ int lrng_drng_get_sleep(u8 *outbuf, u32 outbuflen);
void lrng_drng_force_reseed(void);
void lrng_drng_seed_work(struct work_struct *dummy);

+#ifdef CONFIG_NUMA
+struct lrng_drng **lrng_drng_instances(void);
+void lrng_drngs_numa_alloc(void);
+#else /* CONFIG_NUMA */
static inline struct lrng_drng **lrng_drng_instances(void) { return NULL; }
static inline void lrng_drngs_numa_alloc(void) { return; }
+#endif /* CONFIG_NUMA */

/************************** Entropy pool management ***************************/

diff --git a/drivers/char/lrng/lrng_numa.c b/drivers/char/lrng/lrng_numa.c
new file mode 100644
index 000000000000..37817771b97a
--- /dev/null
+++ b/drivers/char/lrng/lrng_numa.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+/*
+ * LRNG NUMA support
+ *
+ * Copyright (C) 2016 - 2021, Stephan Mueller <[email protected]>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/lrng.h>
+#include <linux/slab.h>
+
+#include "lrng_internal.h"
+
+static struct lrng_drng **lrng_drng __read_mostly = NULL;
+
+struct lrng_drng **lrng_drng_instances(void)
+{
+ return smp_load_acquire(&lrng_drng);
+}
+
+/* Allocate the data structures for the per-NUMA node DRNGs */
+static void _lrng_drngs_numa_alloc(struct work_struct *work)
+{
+ struct lrng_drng **drngs;
+ struct lrng_drng *lrng_drng_init = lrng_drng_init_instance();
+ u32 node;
+ bool init_drng_used = false;
+
+ mutex_lock(&lrng_crypto_cb_update);
+
+ /* per-NUMA-node DRNGs are already present */
+ if (lrng_drng)
+ goto unlock;
+
+ drngs = kcalloc(nr_node_ids, sizeof(void *), GFP_KERNEL|__GFP_NOFAIL);
+ for_each_online_node(node) {
+ struct lrng_drng *drng;
+
+ if (!init_drng_used) {
+ drngs[node] = lrng_drng_init;
+ init_drng_used = true;
+ continue;
+ }
+
+ drng = kmalloc_node(sizeof(struct lrng_drng),
+ GFP_KERNEL|__GFP_NOFAIL, node);
+ memset(drng, 0, sizeof(lrng_drng));
+
+ drng->crypto_cb = lrng_drng_init->crypto_cb;
+ drng->drng = drng->crypto_cb->lrng_drng_alloc(
+ LRNG_DRNG_SECURITY_STRENGTH_BYTES);
+ if (IS_ERR(drng->drng)) {
+ kfree(drng);
+ goto err;
+ }
+
+ drng->hash = drng->crypto_cb->lrng_hash_alloc();
+ if (IS_ERR(drng->hash)) {
+ drng->crypto_cb->lrng_drng_dealloc(drng->drng);
+ kfree(drng);
+ goto err;
+ }
+
+ mutex_init(&drng->lock);
+ spin_lock_init(&drng->spin_lock);
+ rwlock_init(&drng->hash_lock);
+
+ /*
+ * Switch the hash used by the per-CPU pool.
+ * We do not need to lock the new hash as it is not usable yet
+ * due to **drngs not yet being initialized.
+ */
+ if (lrng_pcpu_switch_hash(node, drng->crypto_cb, drng->hash,
+ &lrng_cc20_crypto_cb))
+ goto err;
+
+ /*
+ * No reseeding of NUMA DRNGs from previous DRNGs as this
+ * would complicate the code. Let it simply reseed.
+ */
+ lrng_drng_reset(drng);
+ drngs[node] = drng;
+
+ lrng_pool_inc_numa_node();
+ pr_info("DRNG and entropy pool read hash for NUMA node %d allocated\n",
+ node);
+ }
+
+ /* counterpart to smp_load_acquire in lrng_drng_instances */
+ if (!cmpxchg_release(&lrng_drng, NULL, drngs))
+ goto unlock;
+
+err:
+ for_each_online_node(node) {
+ struct lrng_drng *drng = drngs[node];
+
+ if (drng == lrng_drng_init)
+ continue;
+
+ if (drng) {
+ lrng_pcpu_switch_hash(node, &lrng_cc20_crypto_cb, NULL,
+ drng->crypto_cb);
+ drng->crypto_cb->lrng_hash_dealloc(drng->hash);
+ drng->crypto_cb->lrng_drng_dealloc(drng->drng);
+ kfree(drng);
+ }
+ }
+ kfree(drngs);
+
+unlock:
+ mutex_unlock(&lrng_crypto_cb_update);
+}
+
+static DECLARE_WORK(lrng_drngs_numa_alloc_work, _lrng_drngs_numa_alloc);
+
+void lrng_drngs_numa_alloc(void)
+{
+ schedule_work(&lrng_drngs_numa_alloc_work);
+}
--
2.29.2