LinuxLists.cc - [PATCH 0/2] Parallel crypto/IPsec v7

2009-12-18 12:47:00

Subject: [PATCH 0/2] Parallel crypto/IPsec v7

This patchset adds the 'pcrypt' parallel crypto template. With this template it
is possible to process the crypto requests of a transform in parallel without
getting request reorder. This is in particular interesting for IPsec.

The parallel crypto template is based on the 'padata' generic
parallelization/serialization method. With this method data objects can
be processed in parallel, starting at some given point.
The parallelized data objects return after serialization in the order as
they were before the parallelization. In the case of IPsec, this makes it
possible to run the expensive parts in parallel without getting packet
reordering.

IPsec forwarding tests with two quad core machines (Intel Core 2 Quad Q6600)
and an EXFO FTB-400 packet blazer showed the following results:

On all tests I used smp_affinity to pin the interrupts of the network cards
to different cpus.

linux-2.6.33-rc1 (64 bit)
Packetsize: 1420 byte
Test time: 60 sec
Encryption: aes192-sha1
bidirectional throughput without packet loss: 2 x 325 Mbit/s
unidirectional throughput without packet loss: 325 Mbit/s

linux-2.6.33-rc1 (64 bit)
Packetsize: 128 byte
Test time: 60 sec
Encryption: aes192-sha1
bidirectional throughput without packet loss: 2 x 100 Mbit/s
unidirectional throughput without packet loss: 125 Mbit/s

linux-2.6.33-rc1 with padata/pcrypt (64 bit)
Packetsize: 1420 byte
Test time: 60 sec
Encryption: aes192-sha1
bidirectional throughput without packet loss: 2 x 650 Mbit/s
unidirectional throughput without packet loss: 850 Mbit/s

linux-2.6.33-rc1 with padata/pcrypt (64 bit)
Packetsize: 128 byte
Test time: 60 sec
Encryption: aes192-sha1
bidirectional throughput without packet loss: 2 x 100 Mbit/s
unidirectional throughput without packet loss: 125 Mbit/s

So the performance win on big packets is quite good. But on small packets
the troughput results with and without the workqueue based parallelization
are amost the same on my testing environment.

Changes from v6:

- Rework padata to use workqueues instead of softirqs for
parallelization/serialization

- Add a cyclic sequence number pattern, makes the reset of the padata
serialization logic on sequence number overrun superfluous.

- Adapt pcrypt to the changed padata interface.

- Rebased to linux-2.6.33-rc1

Steffen

2009-12-18 12:46:58

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 2/2] crypto: pcrypt - Add pcrypt crypto parallelization wrapper

This patch adds a parallel crypto template that takes a crypto
algorithm and converts it to process the crypto transforms in
parallel. For the moment only aead algorithms are supported.

Signed-off-by: Steffen Klassert <[email protected]>
---
crypto/Kconfig | 10 +
crypto/Makefile | 1 +
crypto/pcrypt.c | 445 +++++++++++++++++++++++++++++++++++++++++++++++
include/crypto/pcrypt.h | 51 ++++++
4 files changed, 507 insertions(+), 0 deletions(-)
create mode 100644 crypto/pcrypt.c
create mode 100644 include/crypto/pcrypt.h

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 81c185a..6a2e295 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -114,6 +114,16 @@ config CRYPTO_NULL
help
These are 'Null' algorithms, used by IPsec, which do nothing.

+config CRYPTO_PCRYPT
+ tristate "Parallel crypto engine (EXPERIMENTAL)"
+ depends on SMP && EXPERIMENTAL
+ select PADATA
+ select CRYPTO_MANAGER
+ select CRYPTO_AEAD
+ help
+ This converts an arbitrary crypto algorithm into a parallel
+ algorithm that executes in kernel threads.
+
config CRYPTO_WORKQUEUE
tristate

diff --git a/crypto/Makefile b/crypto/Makefile
index 9e8f619..d7e6441 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_CRYPTO_XTS) += xts.o
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
obj-$(CONFIG_CRYPTO_GCM) += gcm.o
obj-$(CONFIG_CRYPTO_CCM) += ccm.o
+obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
obj-$(CONFIG_CRYPTO_DES) += des_generic.o
obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
new file mode 100644
index 0000000..b9527d0
--- /dev/null
+++ b/crypto/pcrypt.c
@@ -0,0 +1,445 @@
+/*
+ * pcrypt - Parallel crypto wrapper.
+ *
+ * Copyright (C) 2009 secunet Security Networks AG
+ * Copyright (C) 2009 Steffen Klassert <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/aead.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <crypto/pcrypt.h>
+
+static struct padata_instance *pcrypt_enc_padata;
+static struct padata_instance *pcrypt_dec_padata;
+static struct workqueue_struct *encwq;
+static struct workqueue_struct *decwq;
+
+struct pcrypt_instance_ctx {
+ struct crypto_spawn spawn;
+ unsigned int tfm_count;
+};
+
+struct pcrypt_aead_ctx {
+ struct crypto_aead *child;
+ unsigned int cb_cpu;
+};
+
+static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
+ struct padata_instance *pinst)
+{
+ unsigned int cpu_index, cpu, i;
+
+ cpu = *cb_cpu;
+
+ if (cpumask_test_cpu(cpu, cpu_active_mask))
+ goto out;
+
+ cpu_index = cpu % cpumask_weight(cpu_active_mask);
+
+ cpu = cpumask_first(cpu_active_mask);
+ for (i = 0; i < cpu_index; i++)
+ cpu = cpumask_next(cpu, cpu_active_mask);
+
+ *cb_cpu = cpu;
+
+out:
+ return padata_do_parallel(pinst, padata, cpu);
+}
+
+static int pcrypt_aead_setkey(struct crypto_aead *parent,
+ const u8 *key, unsigned int keylen)
+{
+ struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(parent);
+
+ return crypto_aead_setkey(ctx->child, key, keylen);
+}
+
+static int pcrypt_aead_setauthsize(struct crypto_aead *parent,
+ unsigned int authsize)
+{
+ struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(parent);
+
+ return crypto_aead_setauthsize(ctx->child, authsize);
+}
+
+static void pcrypt_aead_serial(struct padata_priv *padata)
+{
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_request *req = pcrypt_request_ctx(preq);
+
+ aead_request_complete(req->base.data, padata->info);
+}
+
+static void pcrypt_aead_giv_serial(struct padata_priv *padata)
+{
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_givcrypt_request *req = pcrypt_request_ctx(preq);
+
+ aead_request_complete(req->areq.base.data, padata->info);
+}
+
+static void pcrypt_aead_done(struct crypto_async_request *areq, int err)
+{
+ struct aead_request *req = areq->data;
+ struct pcrypt_request *preq = aead_request_ctx(req);
+ struct padata_priv *padata = pcrypt_request_padata(preq);
+
+ padata->info = err;
+ req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ padata_do_serial(padata);
+}
+
+static void pcrypt_aead_enc(struct padata_priv *padata)
+{
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_request *req = pcrypt_request_ctx(preq);
+
+ padata->info = crypto_aead_encrypt(req);
+
+ if (padata->info)
+ return;
+
+ padata_do_serial(padata);
+}
+
+static int pcrypt_aead_encrypt(struct aead_request *req)
+{
+ int err;
+ struct pcrypt_request *preq = aead_request_ctx(req);
+ struct aead_request *creq = pcrypt_request_ctx(preq);
+ struct padata_priv *padata = pcrypt_request_padata(preq);
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
+ u32 flags = aead_request_flags(req);
+
+ memset(padata, 0, sizeof(struct padata_priv));
+
+ padata->parallel = pcrypt_aead_enc;
+ padata->serial = pcrypt_aead_serial;
+
+ aead_request_set_tfm(creq, ctx->child);
+ aead_request_set_callback(creq, flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
+ pcrypt_aead_done, req);
+ aead_request_set_crypt(creq, req->src, req->dst,
+ req->cryptlen, req->iv);
+ aead_request_set_assoc(creq, req->assoc, req->assoclen);
+
+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
+ if (err)
+ return err;
+ else
+ err = crypto_aead_encrypt(creq);
+
+ return err;
+}
+
+static void pcrypt_aead_dec(struct padata_priv *padata)
+{
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_request *req = pcrypt_request_ctx(preq);
+
+ padata->info = crypto_aead_decrypt(req);
+
+ if (padata->info)
+ return;
+
+ padata_do_serial(padata);
+}
+
+static int pcrypt_aead_decrypt(struct aead_request *req)
+{
+ int err;
+ struct pcrypt_request *preq = aead_request_ctx(req);
+ struct aead_request *creq = pcrypt_request_ctx(preq);
+ struct padata_priv *padata = pcrypt_request_padata(preq);
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
+ u32 flags = aead_request_flags(req);
+
+ memset(padata, 0, sizeof(struct padata_priv));
+
+ padata->parallel = pcrypt_aead_dec;
+ padata->serial = pcrypt_aead_serial;
+
+ aead_request_set_tfm(creq, ctx->child);
+ aead_request_set_callback(creq, flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
+ pcrypt_aead_done, req);
+ aead_request_set_crypt(creq, req->src, req->dst,
+ req->cryptlen, req->iv);
+ aead_request_set_assoc(creq, req->assoc, req->assoclen);
+
+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_dec_padata);
+ if (err)
+ return err;
+ else
+ err = crypto_aead_decrypt(creq);
+
+ return err;
+}
+
+static void pcrypt_aead_givenc(struct padata_priv *padata)
+{
+ struct pcrypt_request *preq = pcrypt_padata_request(padata);
+ struct aead_givcrypt_request *req = pcrypt_request_ctx(preq);
+
+ padata->info = crypto_aead_givencrypt(req);
+
+ if (padata->info)
+ return;
+
+ padata_do_serial(padata);
+}
+
+static int pcrypt_aead_givencrypt(struct aead_givcrypt_request *req)
+{
+ int err;
+ struct aead_request *areq = &req->areq;
+ struct pcrypt_request *preq = aead_request_ctx(areq);
+ struct aead_givcrypt_request *creq = pcrypt_request_ctx(preq);
+ struct padata_priv *padata = pcrypt_request_padata(preq);
+ struct crypto_aead *aead = aead_givcrypt_reqtfm(req);
+ struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(aead);
+ u32 flags = aead_request_flags(areq);
+
+ memset(padata, 0, sizeof(struct padata_priv));
+
+ padata->parallel = pcrypt_aead_givenc;
+ padata->serial = pcrypt_aead_giv_serial;
+
+ aead_givcrypt_set_tfm(creq, ctx->child);
+ aead_givcrypt_set_callback(creq, flags & ~CRYPTO_TFM_REQ_MAY_SLEEP,
+ pcrypt_aead_done, areq);
+ aead_givcrypt_set_crypt(creq, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ aead_givcrypt_set_assoc(creq, areq->assoc, areq->assoclen);
+ aead_givcrypt_set_giv(creq, req->giv, req->seq);
+
+ err = pcrypt_do_parallel(padata, &ctx->cb_cpu, pcrypt_enc_padata);
+ if (err)
+ return err;
+ else
+ err = crypto_aead_givencrypt(creq);
+
+ return err;
+}
+
+static int pcrypt_aead_init_tfm(struct crypto_tfm *tfm)
+{
+ int cpu, cpu_index;
+ struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
+ struct pcrypt_instance_ctx *ictx = crypto_instance_ctx(inst);
+ struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_aead *cipher;
+
+ ictx->tfm_count++;
+
+ cpu_index = ictx->tfm_count % cpumask_weight(cpu_active_mask);
+
+ ctx->cb_cpu = cpumask_first(cpu_active_mask);
+ for (cpu = 0; cpu < cpu_index; cpu++)
+ ctx->cb_cpu = cpumask_next(ctx->cb_cpu, cpu_active_mask);
+
+ cipher = crypto_spawn_aead(crypto_instance_ctx(inst));
+
+ if (IS_ERR(cipher))
+ return PTR_ERR(cipher);
+
+ ctx->child = cipher;
+ tfm->crt_aead.reqsize = sizeof(struct pcrypt_request)
+ + sizeof(struct aead_givcrypt_request)
+ + crypto_aead_reqsize(cipher);
+
+ return 0;
+}
+
+static void pcrypt_aead_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct pcrypt_aead_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ crypto_free_aead(ctx->child);
+}
+
+static struct crypto_instance *pcrypt_alloc_instance(struct crypto_alg *alg)
+{
+ struct crypto_instance *inst;
+ struct pcrypt_instance_ctx *ctx;
+ int err;
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
+ if (!inst) {
+ inst = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ err = -ENAMETOOLONG;
+ if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+ "pcrypt(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_free_inst;
+
+ memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
+
+ ctx = crypto_instance_ctx(inst);
+ err = crypto_init_spawn(&ctx->spawn, alg, inst,
+ CRYPTO_ALG_TYPE_MASK);
+ if (err)
+ goto out_free_inst;
+
+ inst->alg.cra_priority = alg->cra_priority + 100;
+ inst->alg.cra_blocksize = alg->cra_blocksize;
+ inst->alg.cra_alignmask = alg->cra_alignmask;
+
+out:
+ return inst;
+
+out_free_inst:
+ kfree(inst);
+ inst = ERR_PTR(err);
+ goto out;
+}
+
+static struct crypto_instance *pcrypt_alloc_aead(struct rtattr **tb)
+{
+ struct crypto_instance *inst;
+ struct crypto_alg *alg;
+ struct crypto_attr_type *algt;
+
+ algt = crypto_get_attr_type(tb);
+
+ alg = crypto_get_attr_alg(tb, algt->type,
+ (algt->mask & CRYPTO_ALG_TYPE_MASK));
+ if (IS_ERR(alg))
+ return ERR_CAST(alg);
+
+ inst = pcrypt_alloc_instance(alg);
+ if (IS_ERR(inst))
+ goto out_put_alg;
+
+ inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+ inst->alg.cra_type = &crypto_aead_type;
+
+ inst->alg.cra_aead.ivsize = alg->cra_aead.ivsize;
+ inst->alg.cra_aead.geniv = alg->cra_aead.geniv;
+ inst->alg.cra_aead.maxauthsize = alg->cra_aead.maxauthsize;
+
+ inst->alg.cra_ctxsize = sizeof(struct pcrypt_aead_ctx);
+
+ inst->alg.cra_init = pcrypt_aead_init_tfm;
+ inst->alg.cra_exit = pcrypt_aead_exit_tfm;
+
+ inst->alg.cra_aead.setkey = pcrypt_aead_setkey;
+ inst->alg.cra_aead.setauthsize = pcrypt_aead_setauthsize;
+ inst->alg.cra_aead.encrypt = pcrypt_aead_encrypt;
+ inst->alg.cra_aead.decrypt = pcrypt_aead_decrypt;
+ inst->alg.cra_aead.givencrypt = pcrypt_aead_givencrypt;
+
+out_put_alg:
+ crypto_mod_put(alg);
+ return inst;
+}
+
+static struct crypto_instance *pcrypt_alloc(struct rtattr **tb)
+{
+ struct crypto_attr_type *algt;
+
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return ERR_CAST(algt);
+
+ switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
+ case CRYPTO_ALG_TYPE_AEAD:
+ return pcrypt_alloc_aead(tb);
+ }
+
+ return ERR_PTR(-EINVAL);
+}
+
+static void pcrypt_free(struct crypto_instance *inst)
+{
+ struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
+
+ crypto_drop_spawn(&ctx->spawn);
+ kfree(inst);
+}
+
+static struct crypto_template pcrypt_tmpl = {
+ .name = "pcrypt",
+ .alloc = pcrypt_alloc,
+ .free = pcrypt_free,
+ .module = THIS_MODULE,
+};
+
+static int __init pcrypt_init(void)
+{
+ encwq = create_workqueue("pencrypt");
+ if (!encwq)
+ goto err;
+
+ decwq = create_workqueue("pdecrypt");
+ if (!decwq)
+ goto err_destroy_encwq;
+
+
+ pcrypt_enc_padata = padata_alloc(cpu_possible_mask, encwq);
+ if (!pcrypt_enc_padata)
+ goto err_destroy_decwq;
+
+ pcrypt_dec_padata = padata_alloc(cpu_possible_mask, decwq);
+ if (!pcrypt_dec_padata)
+ goto err_free_padata;
+
+ padata_start(pcrypt_enc_padata);
+ padata_start(pcrypt_dec_padata);
+
+ return crypto_register_template(&pcrypt_tmpl);
+
+err_free_padata:
+ padata_free(pcrypt_enc_padata);
+
+err_destroy_decwq:
+ destroy_workqueue(decwq);
+
+err_destroy_encwq:
+ destroy_workqueue(encwq);
+
+err:
+ return -ENOMEM;
+}
+
+static void __exit pcrypt_exit(void)
+{
+ padata_stop(pcrypt_enc_padata);
+ padata_stop(pcrypt_dec_padata);
+
+ destroy_workqueue(encwq);
+ destroy_workqueue(decwq);
+
+ padata_free(pcrypt_enc_padata);
+ padata_free(pcrypt_dec_padata);
+
+ crypto_unregister_template(&pcrypt_tmpl);
+}
+
+module_init(pcrypt_init);
+module_exit(pcrypt_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Steffen Klassert <[email protected]>");
+MODULE_DESCRIPTION("Parallel crypto wrapper");
diff --git a/include/crypto/pcrypt.h b/include/crypto/pcrypt.h
new file mode 100644
index 0000000..d7d8bd8
--- /dev/null
+++ b/include/crypto/pcrypt.h
@@ -0,0 +1,51 @@
+/*
+ * pcrypt - Parallel crypto engine.
+ *
+ * Copyright (C) 2009 secunet Security Networks AG
+ * Copyright (C) 2009 Steffen Klassert <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _CRYPTO_PCRYPT_H
+#define _CRYPTO_PCRYPT_H
+
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/padata.h>
+
+struct pcrypt_request {
+ struct padata_priv padata;
+ void *data;
+ void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+static inline void *pcrypt_request_ctx(struct pcrypt_request *req)
+{
+ return req->__ctx;
+}
+
+static inline
+struct padata_priv *pcrypt_request_padata(struct pcrypt_request *req)
+{
+ return &req->padata;
+}
+
+static inline
+struct pcrypt_request *pcrypt_padata_request(struct padata_priv *padata)
+{
+ return container_of(padata, struct pcrypt_request, padata);
+}
+
+#endif
--
1.5.4.2

2009-12-18 12:46:58

by Steffen Klassert

[permalink] [raw]

Subject: [PATCH 1/2] padata: generic parallelization/serialization interface

This patch introduces an interface to process data objects
in parallel. The parallelized objects return after serialization
in the same order as they were before the parallelization.

Signed-off-by: Steffen Klassert <[email protected]>
---
include/linux/padata.h | 88 ++++++
init/Kconfig | 4 +
kernel/Makefile | 1 +
kernel/padata.c | 690 ++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 783 insertions(+), 0 deletions(-)
create mode 100644 include/linux/padata.h
create mode 100644 kernel/padata.c

diff --git a/include/linux/padata.h b/include/linux/padata.h
new file mode 100644
index 0000000..51611da
--- /dev/null
+++ b/include/linux/padata.h
@@ -0,0 +1,88 @@
+/*
+ * padata.h - header for the padata parallelization interface
+ *
+ * Copyright (C) 2008, 2009 secunet Security Networks AG
+ * Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PADATA_H
+#define PADATA_H
+
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+struct padata_priv {
+ struct list_head list;
+ struct parallel_data *pd;
+ int cb_cpu;
+ int seq_nr;
+ int info;
+ void (*parallel)(struct padata_priv *padata);
+ void (*serial)(struct padata_priv *padata);
+};
+
+struct padata_list {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+struct padata_queue {
+ struct padata_list parallel;
+ struct padata_list reorder;
+ struct padata_list serial;
+ struct work_struct pwork;
+ struct work_struct swork;
+ struct parallel_data *pd;
+ atomic_t num_obj;
+ int cpu_index;
+};
+
+struct parallel_data {
+ struct padata_instance *pinst;
+ struct padata_queue *queue;
+ atomic_t seq_nr;
+ atomic_t reorder_objects;
+ atomic_t refcnt;
+ unsigned int max_seq_nr;
+ cpumask_var_t cpumask;
+ spinlock_t lock;
+};
+
+struct padata_instance {
+ struct notifier_block cpu_notifier;
+ struct workqueue_struct *wq;
+ struct parallel_data *pd;
+ cpumask_var_t cpumask;
+ struct mutex lock;
+ u8 flags;
+#define PADATA_INIT 1
+#define PADATA_RESET 2
+};
+
+extern struct padata_instance *padata_alloc(const struct cpumask *cpumask,
+ struct workqueue_struct *wq);
+extern void padata_free(struct padata_instance *pinst);
+extern int padata_do_parallel(struct padata_instance *pinst,
+ struct padata_priv *padata, int cb_cpu);
+extern void padata_do_serial(struct padata_priv *padata);
+extern int padata_set_cpumask(struct padata_instance *pinst,
+ cpumask_var_t cpumask);
+extern int padata_add_cpu(struct padata_instance *pinst, int cpu);
+extern int padata_remove_cpu(struct padata_instance *pinst, int cpu);
+extern void padata_start(struct padata_instance *pinst);
+extern void padata_stop(struct padata_instance *pinst);
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f..9fd23bc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1252,4 +1252,8 @@ source "block/Kconfig"
config PREEMPT_NOTIFIERS
bool

+config PADATA
+ depends on SMP
+ bool
+
source "kernel/Kconfig.locks"
diff --git a/kernel/Makefile b/kernel/Makefile
index 864ff75..6aebdeb 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
+obj-$(CONFIG_PADATA) += padata.o

ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer is
diff --git a/kernel/padata.c b/kernel/padata.c
new file mode 100644
index 0000000..6f9bcb8
--- /dev/null
+++ b/kernel/padata.c
@@ -0,0 +1,690 @@
+/*
+ * padata.c - generic interface to process data streams in parallel
+ *
+ * Copyright (C) 2008, 2009 secunet Security Networks AG
+ * Copyright (C) 2008, 2009 Steffen Klassert <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/padata.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/rcupdate.h>
+
+#define MAX_SEQ_NR INT_MAX - NR_CPUS
+#define MAX_OBJ_NUM 10000 * NR_CPUS
+
+static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
+{
+ int cpu, target_cpu;
+
+ target_cpu = cpumask_first(pd->cpumask);
+ for (cpu = 0; cpu < cpu_index; cpu++)
+ target_cpu = cpumask_next(target_cpu, pd->cpumask);
+
+ return target_cpu;
+}
+
+static int padata_cpu_hash(struct padata_priv *padata)
+{
+ int cpu_index;
+ struct parallel_data *pd;
+
+ pd = padata->pd;
+
+ /*
+ * Hash the sequence numbers to the cpus by taking
+ * seq_nr mod. number of cpus in use.
+ */
+ cpu_index = padata->seq_nr % cpumask_weight(pd->cpumask);
+
+ return padata_index_to_cpu(pd, cpu_index);
+}
+
+static void padata_parallel_worker(struct work_struct *work)
+{
+ struct padata_queue *queue;
+ struct parallel_data *pd;
+ struct padata_instance *pinst;
+ LIST_HEAD(local_list);
+
+ local_bh_disable();
+ queue = container_of(work, struct padata_queue, pwork);
+ pd = queue->pd;
+ pinst = pd->pinst;
+
+ spin_lock(&queue->parallel.lock);
+ list_replace_init(&queue->parallel.list, &local_list);
+ spin_unlock(&queue->parallel.lock);
+
+ while (!list_empty(&local_list)) {
+ struct padata_priv *padata;
+
+ padata = list_entry(local_list.next,
+ struct padata_priv, list);
+
+ list_del_init(&padata->list);
+
+ padata->parallel(padata);
+ }
+
+ local_bh_enable();
+}
+
+/*
+ * padata_do_parallel - padata parallelization function
+ *
+ * @pinst: padata instance
+ * @padata: object to be parallelized
+ * @cb_cpu: cpu the serialization callback function will run on,
+ * must be in the cpumask of padata.
+ *
+ * The parallelization callback function will run with BHs off.
+ * Note: Every object which is parallelized by padata_do_parallel
+ * must be seen by padata_do_serial.
+ */
+int padata_do_parallel(struct padata_instance *pinst,
+ struct padata_priv *padata, int cb_cpu)
+{
+ int target_cpu, err;
+ struct padata_queue *queue;
+ struct parallel_data *pd;
+
+ rcu_read_lock_bh();
+
+ pd = rcu_dereference(pinst->pd);
+
+ err = 0;
+ if (!(pinst->flags & PADATA_INIT))
+ goto out;
+
+ err = -EBUSY;
+ if ((pinst->flags & PADATA_RESET))
+ goto out;
+
+ if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
+ goto out;
+
+ err = -EINVAL;
+ if (!cpumask_test_cpu(cb_cpu, pd->cpumask))
+ goto out;
+
+ err = -EINPROGRESS;
+ atomic_inc(&pd->refcnt);
+ padata->pd = pd;
+ padata->cb_cpu = cb_cpu;
+
+ if (unlikely(atomic_read(&pd->seq_nr) == pd->max_seq_nr))
+ atomic_set(&pd->seq_nr, -1);
+
+ padata->seq_nr = atomic_inc_return(&pd->seq_nr);
+
+ target_cpu = padata_cpu_hash(padata);
+ queue = per_cpu_ptr(pd->queue, target_cpu);
+
+ spin_lock(&queue->parallel.lock);
+ list_add_tail(&padata->list, &queue->parallel.list);
+ spin_unlock(&queue->parallel.lock);
+
+ queue_work_on(target_cpu, pinst->wq, &queue->pwork);
+
+out:
+ rcu_read_unlock_bh();
+
+ return err;
+}
+EXPORT_SYMBOL(padata_do_parallel);
+
+static struct padata_priv *padata_get_next(struct parallel_data *pd)
+{
+ int cpu, num_cpus, empty, calc_seq_nr;
+ int seq_nr, next_nr, overrun, next_overrun;
+ struct padata_queue *queue, *next_queue;
+ struct padata_priv *padata;
+ struct padata_list *reorder;
+
+ empty = 0;
+ next_nr = -1;
+ next_overrun = 0;
+ next_queue = NULL;
+
+ num_cpus = cpumask_weight(pd->cpumask);
+
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ reorder = &queue->reorder;
+
+ /*
+ * Calculate the seq_nr of the object that should be
+ * next in this queue.
+ */
+ overrun = 0;
+ calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
+ + queue->cpu_index;
+
+ if (unlikely(calc_seq_nr > pd->max_seq_nr)) {
+ calc_seq_nr = calc_seq_nr - pd->max_seq_nr - 1;
+ overrun = 1;
+ }
+
+ if (!list_empty(&reorder->list)) {
+ padata = list_entry(reorder->list.next,
+ struct padata_priv, list);
+
+ seq_nr = padata->seq_nr;
+ BUG_ON(calc_seq_nr != seq_nr);
+ } else {
+ seq_nr = calc_seq_nr;
+ empty++;
+ }
+
+ if (next_nr < 0 || seq_nr < next_nr
+ || (next_overrun && !overrun)) {
+ next_nr = seq_nr;
+ next_overrun = overrun;
+ next_queue = queue;
+ }
+ }
+
+ padata = NULL;
+
+ if (empty == num_cpus)
+ goto out;
+
+ reorder = &next_queue->reorder;
+
+ if (!list_empty(&reorder->list)) {
+ padata = list_entry(reorder->list.next,
+ struct padata_priv, list);
+
+ if (unlikely(next_overrun)) {
+ for_each_cpu(cpu, pd->cpumask) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+ atomic_set(&queue->num_obj, 0);
+ }
+ }
+
+ spin_lock(&reorder->lock);
+ list_del_init(&padata->list);
+ atomic_dec(&pd->reorder_objects);
+ spin_unlock(&reorder->lock);
+
+ atomic_inc(&next_queue->num_obj);
+
+ goto out;
+ }
+
+ if (next_nr % num_cpus == next_queue->cpu_index) {
+ padata = ERR_PTR(-ENODATA);
+ goto out;
+ }
+
+ padata = ERR_PTR(-EINPROGRESS);
+out:
+ return padata;
+}
+
+static void padata_reorder(struct parallel_data *pd)
+{
+ struct padata_priv *padata;
+ struct padata_queue *queue;
+ struct padata_instance *pinst = pd->pinst;
+
+try_again:
+ if (!spin_trylock_bh(&pd->lock))
+ goto out;
+
+ while (1) {
+ padata = padata_get_next(pd);
+
+ if (!padata || PTR_ERR(padata) == -EINPROGRESS)
+ break;
+
+ if (PTR_ERR(padata) == -ENODATA) {
+ spin_unlock_bh(&pd->lock);
+ goto out;
+ }
+
+ queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
+
+ spin_lock(&queue->serial.lock);
+ list_add_tail(&padata->list, &queue->serial.list);
+ spin_unlock(&queue->serial.lock);
+
+ queue_work_on(padata->cb_cpu, pinst->wq, &queue->swork);
+ }
+
+ spin_unlock_bh(&pd->lock);
+
+ if (atomic_read(&pd->reorder_objects))
+ goto try_again;
+
+out:
+ return;
+}
+
+static void padata_serial_worker(struct work_struct *work)
+{
+ struct padata_queue *queue;
+ struct parallel_data *pd;
+ LIST_HEAD(local_list);
+
+ local_bh_disable();
+ queue = container_of(work, struct padata_queue, swork);
+ pd = queue->pd;
+
+ spin_lock(&queue->serial.lock);
+ list_replace_init(&queue->serial.list, &local_list);
+ spin_unlock(&queue->serial.lock);
+
+ while (!list_empty(&local_list)) {
+ struct padata_priv *padata;
+
+ padata = list_entry(local_list.next,
+ struct padata_priv, list);
+
+ list_del_init(&padata->list);
+
+ padata->serial(padata);
+ atomic_dec(&pd->refcnt);
+ }
+ local_bh_enable();
+}
+
+/*
+ * padata_do_serial - padata serialization function
+ *
+ * @padata: object to be serialized.
+ *
+ * padata_do_serial must be called for every parallelized object.
+ * The serialization callback function will run with BHs off.
+ */
+void padata_do_serial(struct padata_priv *padata)
+{
+ int cpu;
+ struct padata_queue *queue;
+ struct parallel_data *pd;
+
+ pd = padata->pd;
+
+ cpu = get_cpu();
+ queue = per_cpu_ptr(pd->queue, cpu);
+
+ spin_lock(&queue->reorder.lock);
+ atomic_inc(&pd->reorder_objects);
+ list_add_tail(&padata->list, &queue->reorder.list);
+ spin_unlock(&queue->reorder.lock);
+
+ put_cpu();
+
+ padata_reorder(pd);
+}
+EXPORT_SYMBOL(padata_do_serial);
+
+static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
+ const struct cpumask *cpumask)
+{
+ int cpu, cpu_index, num_cpus;
+ struct padata_queue *queue;
+ struct parallel_data *pd;
+
+ cpu_index = 0;
+
+ pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
+ if (!pd)
+ goto err;
+
+ pd->queue = alloc_percpu(struct padata_queue);
+ if (!pd->queue)
+ goto err_free_pd;
+
+ if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
+ goto err_free_queue;
+
+ for_each_possible_cpu(cpu) {
+ queue = per_cpu_ptr(pd->queue, cpu);
+
+ queue->pd = pd;
+
+ if (cpumask_test_cpu(cpu, cpumask)
+ && cpumask_test_cpu(cpu, cpu_active_mask)) {
+ queue->cpu_index = cpu_index;
+ cpu_index++;
+ } else
+ queue->cpu_index = -1;
+
+ INIT_LIST_HEAD(&queue->reorder.list);
+ INIT_LIST_HEAD(&queue->parallel.list);
+ INIT_LIST_HEAD(&queue->serial.list);
+ spin_lock_init(&queue->reorder.lock);
+ spin_lock_init(&queue->parallel.lock);
+ spin_lock_init(&queue->serial.lock);
+
+ INIT_WORK(&queue->pwork, padata_parallel_worker);
+ INIT_WORK(&queue->swork, padata_serial_worker);
+ atomic_set(&queue->num_obj, 0);
+ }
+
+ cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
+
+ num_cpus = cpumask_weight(pd->cpumask);
+ pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
+
+ atomic_set(&pd->seq_nr, -1);
+ atomic_set(&pd->reorder_objects, 0);
+ atomic_set(&pd->refcnt, 0);
+ pd->pinst = pinst;
+ spin_lock_init(&pd->lock);
+
+ return pd;
+
+err_free_queue:
+ free_percpu(pd->queue);
+err_free_pd:
+ kfree(pd);
+err:
+ return NULL;
+}
+
+static void padata_free_pd(struct parallel_data *pd)
+{
+ free_cpumask_var(pd->cpumask);
+ free_percpu(pd->queue);
+ kfree(pd);
+}
+
+static void padata_replace(struct padata_instance *pinst,
+ struct parallel_data *pd_new)
+{
+ struct parallel_data *pd_old = pinst->pd;
+
+ pinst->flags |= PADATA_RESET;
+
+ rcu_assign_pointer(pinst->pd, pd_new);
+
+ synchronize_rcu();
+
+ while (atomic_read(&pd_old->refcnt) != 0)
+ yield();
+
+ flush_workqueue(pinst->wq);
+
+ padata_free_pd(pd_old);
+
+ pinst->flags &= ~PADATA_RESET;
+}
+
+/*
+ * padata_set_cpumask - set the cpumask that padata should use
+ *
+ * @pinst: padata instance
+ * @cpumask: the cpumask to use
+ */
+int padata_set_cpumask(struct padata_instance *pinst,
+ cpumask_var_t cpumask)
+{
+ struct parallel_data *pd;
+ int err = 0;
+
+ might_sleep();
+
+ mutex_lock(&pinst->lock);
+
+ pd = padata_alloc_pd(pinst, cpumask);
+ if (!pd) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cpumask_copy(pinst->cpumask, cpumask);
+
+ padata_replace(pinst, pd);
+
+out:
+ mutex_unlock(&pinst->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(padata_set_cpumask);
+
+static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
+{
+ struct parallel_data *pd;
+
+ if (cpumask_test_cpu(cpu, cpu_active_mask)) {
+ pd = padata_alloc_pd(pinst, pinst->cpumask);
+ if (!pd)
+ return -ENOMEM;
+
+ padata_replace(pinst, pd);
+ }
+
+ return 0;
+}
+
+/*
+ * padata_add_cpu - add a cpu to the padata cpumask
+ *
+ * @pinst: padata instance
+ * @cpu: cpu to add
+ */
+int padata_add_cpu(struct padata_instance *pinst, int cpu)
+{
+ int err;
+
+ might_sleep();
+
+ mutex_lock(&pinst->lock);
+
+ cpumask_set_cpu(cpu, pinst->cpumask);
+ err = __padata_add_cpu(pinst, cpu);
+
+ mutex_unlock(&pinst->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(padata_add_cpu);
+
+static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
+{
+ struct parallel_data *pd;
+
+ if (cpumask_test_cpu(cpu, cpu_online_mask)) {
+ pd = padata_alloc_pd(pinst, pinst->cpumask);
+ if (!pd)
+ return -ENOMEM;
+
+ padata_replace(pinst, pd);
+ }
+
+ return 0;
+}
+
+/*
+ * padata_remove_cpu - remove a cpu from the padata cpumask
+ *
+ * @pinst: padata instance
+ * @cpu: cpu to remove
+ */
+int padata_remove_cpu(struct padata_instance *pinst, int cpu)
+{
+ int err;
+
+ might_sleep();
+
+ mutex_lock(&pinst->lock);
+
+ cpumask_clear_cpu(cpu, pinst->cpumask);
+ err = __padata_remove_cpu(pinst, cpu);
+
+ mutex_unlock(&pinst->lock);
+
+ return err;
+}
+EXPORT_SYMBOL(padata_remove_cpu);
+
+/*
+ * padata_start - start the parallel processing
+ *
+ * @pinst: padata instance to start
+ */
+void padata_start(struct padata_instance *pinst)
+{
+ might_sleep();
+
+ mutex_lock(&pinst->lock);
+ pinst->flags |= PADATA_INIT;
+ mutex_unlock(&pinst->lock);
+}
+EXPORT_SYMBOL(padata_start);
+
+/*
+ * padata_stop - stop the parallel processing
+ *
+ * @pinst: padata instance to stop
+ */
+void padata_stop(struct padata_instance *pinst)
+{
+ might_sleep();
+
+ mutex_lock(&pinst->lock);
+ pinst->flags &= ~PADATA_INIT;
+ mutex_unlock(&pinst->lock);
+}
+EXPORT_SYMBOL(padata_stop);
+
+static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int err;
+ struct padata_instance *pinst;
+ int cpu = (unsigned long)hcpu;
+
+ pinst = container_of(nfb, struct padata_instance, cpu_notifier);
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ break;
+ mutex_lock(&pinst->lock);
+ err = __padata_add_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+ if (err)
+ return NOTIFY_BAD;
+ break;
+
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ break;
+ mutex_lock(&pinst->lock);
+ err = __padata_remove_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+ if (err)
+ return NOTIFY_BAD;
+ break;
+
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ break;
+ mutex_lock(&pinst->lock);
+ __padata_remove_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+
+ case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
+ if (!cpumask_test_cpu(cpu, pinst->cpumask))
+ break;
+ mutex_lock(&pinst->lock);
+ __padata_add_cpu(pinst, cpu);
+ mutex_unlock(&pinst->lock);
+ }
+
+ return NOTIFY_OK;
+}
+
+/*
+ * padata_alloc - allocate and initialize a padata instance
+ *
+ * @cpumask: cpumask that padata uses for parallelization
+ * @wq: workqueue to use for the allocated padata instance
+ */
+struct padata_instance *padata_alloc(const struct cpumask *cpumask,
+ struct workqueue_struct *wq)
+{
+ int err;
+ struct padata_instance *pinst;
+ struct parallel_data *pd;
+
+ pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
+ if (!pinst)
+ goto err;
+
+ pd = padata_alloc_pd(pinst, cpumask);
+ if (!pd)
+ goto err_free_inst;
+
+ rcu_assign_pointer(pinst->pd, pd);
+
+ pinst->wq = wq;
+
+ cpumask_copy(pinst->cpumask, cpumask);
+
+ pinst->flags = 0;
+
+ pinst->cpu_notifier.notifier_call = padata_cpu_callback;
+ pinst->cpu_notifier.priority = 0;
+ err = register_hotcpu_notifier(&pinst->cpu_notifier);
+ if (err)
+ goto err_free_pd;
+
+ mutex_init(&pinst->lock);
+
+ return pinst;
+
+err_free_pd:
+ padata_free_pd(pd);
+err_free_inst:
+ kfree(pinst);
+err:
+ return NULL;
+}
+EXPORT_SYMBOL(padata_alloc);
+
+/*
+ * padata_free - free a padata instance
+ *
+ * @ padata_inst: padata instance to free
+ */
+void padata_free(struct padata_instance *pinst)
+{
+ padata_stop(pinst);
+
+ synchronize_rcu();
+
+ while (atomic_read(&pinst->pd->refcnt) != 0)
+ yield();
+
+ unregister_hotcpu_notifier(&pinst->cpu_notifier);
+ padata_free_pd(pinst->pd);
+ kfree(pinst);
+}
+EXPORT_SYMBOL(padata_free);
--
1.5.4.2