From: Stephan Mueller <smueller@chronox.de>
Subject: Re: [PATCH 3/3 v4] crypto: kpp - Add ECDH software support
Date: Fri, 06 May 2016 14:02:31 +0200
Message-ID: <1651427.zoOpPeUgY0@positron.chronox.de>
References: <1462439857-93895-1-git-send-email-salvatore.benedetto@intel.com> <1462439857-93895-4-git-send-email-salvatore.benedetto@intel.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7Bit
Cc: herbert@gondor.apana.org.au, linux-crypto@vger.kernel.org
To: Salvatore Benedetto <salvatore.benedetto@intel.com>
In-Reply-To: <1462439857-93895-4-git-send-email-salvatore.benedetto@intel.com>
Sender: linux-crypto-owner@vger.kernel.org

Am Donnerstag, 5. Mai 2016, 10:17:37 schrieb Salvatore Benedetto:

Hi Salvatore,

>  * Implement ECDH under kpp API
>  * Provide ECC software support for curve P-192 and
>    P-256.
>  * Add kpp test for ECDH with data generated by OpenSSL
> 
> Signed-off-by: Salvatore Benedetto <salvatore.benedetto@intel.com>
> ---
>  crypto/Kconfig          |    5 +
>  crypto/Makefile         |    3 +
>  crypto/ecc.c            | 1038
> +++++++++++++++++++++++++++++++++++++++++++++++ crypto/ecc.h            |  
> 70 ++++
>  crypto/ecc_curve_defs.h |   57 +++
>  crypto/ecdh.c           |  171 ++++++++
>  crypto/testmgr.c        |  136 ++++++-
>  crypto/testmgr.h        |   73 ++++
>  include/crypto/ecdh.h   |   24 ++
>  9 files changed, 1568 insertions(+), 9 deletions(-)
>  create mode 100644 crypto/ecc.c
>  create mode 100644 crypto/ecc.h
>  create mode 100644 crypto/ecc_curve_defs.h
>  create mode 100644 crypto/ecdh.c
>  create mode 100644 include/crypto/ecdh.h
> 
> diff --git a/crypto/Kconfig b/crypto/Kconfig
> index 89db25c..08a1a3b 100644
> --- a/crypto/Kconfig
> +++ b/crypto/Kconfig
> @@ -117,6 +117,11 @@ config CRYPTO_DH
>  	help
>  	  Generic implementation of the Diffie-Hellman algorithm.
> 
> +config CRYPTO_ECDH
> +	tristate "ECDH algorithm"
> +	select CRYTPO_KPP
> +	help
> +	  Generic implementation of the ECDH algorithm
> 
>  config CRYPTO_MANAGER
>  	tristate "Cryptographic algorithm manager"
> diff --git a/crypto/Makefile b/crypto/Makefile
> index 101f8fd..ba03079 100644
> --- a/crypto/Makefile
> +++ b/crypto/Makefile
> @@ -33,6 +33,9 @@ obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o
>  obj-$(CONFIG_CRYPTO_KPP2) += kpp.o
> 
>  obj-$(CONFIG_CRYPTO_DH) += dh.o
> +ecdh_generic-y := ecc.o
> +ecdh_generic-y += ecdh.o
> +obj-$(CONFIG_CRYPTO_ECDH) += ecdh_generic.o
> 
>  $(obj)/rsapubkey-asn1.o: $(obj)/rsapubkey-asn1.c $(obj)/rsapubkey-asn1.h
>  $(obj)/rsaprivkey-asn1.o: $(obj)/rsaprivkey-asn1.c $(obj)/rsaprivkey-asn1.h
> diff --git a/crypto/ecc.c b/crypto/ecc.c
> new file mode 100644
> index 0000000..c50f9c8
> --- /dev/null
> +++ b/crypto/ecc.c
> @@ -0,0 +1,1038 @@
> +/*
> + * Copyright (c) 2013, Kenneth MacKay
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *  * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + *  * Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <linux/random.h>
> +#include <linux/slab.h>
> +#include <linux/swab.h>
> +#include <crypto/ecdh.h>
> +
> +#include "ecc.h"
> +#include "ecc_curve_defs.h"
> +
> +#define MAX_TRIES 16
> +
> +typedef struct {
> +	u64 m_low;
> +	u64 m_high;
> +} uint128_t;
> +
> +static inline const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
> +{
> +	switch (curve_id) {
> +	case ECC_CURVE_NIST_P192: return &nist_p192;
> +	case ECC_CURVE_NIST_P256: return &nist_p256;
> +	default: return NULL;
> +	}
> +}
> +
> +static u64 *ecc_alloc_digits_space(unsigned int ndigits)
> +{
> +	size_t len = ndigits * sizeof(u64);
> +
> +	if (!len)
> +		return NULL;
> +
> +	return kmalloc(len, GFP_KERNEL);
> +}
> +
> +static void ecc_free_digits_space(u64 *space)
> +{
> +	kzfree(space);
> +}
> +
> +static struct ecc_point *ecc_alloc_point(unsigned int ndigits)
> +{
> +	struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
> +
> +	if (!p)
> +		return NULL;
> +
> +	p->x = ecc_alloc_digits_space(ndigits);
> +	if (!p->x)
> +		goto err_alloc_x;
> +
> +	p->y = ecc_alloc_digits_space(ndigits);
> +	if (!p->y)
> +		goto err_alloc_y;
> +
> +	p->ndigits = ndigits;
> +
> +	return p;
> +
> +err_alloc_y:
> +	ecc_free_digits_space(p->x);
> +err_alloc_x:
> +	kfree(p);
> +	return NULL;
> +}
> +
> +static void ecc_free_point(struct ecc_point *p)
> +{
> +	if (!p)
> +		return;
> +
> +	kzfree(p->x);
> +	kzfree(p->y);
> +	kzfree(p);
> +}
> +
> +static void vli_clear(u64 *vli, unsigned int ndigits)
> +{
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++)
> +		vli[i] = 0;
> +}
> +
> +/* Returns true if vli == 0, false otherwise. */
> +static bool vli_is_zero(const u64 *vli, unsigned int ndigits)
> +{
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++) {
> +		if (vli[i])
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
> +/* Returns nonzero if bit bit of vli is set. */
> +static u64 vli_test_bit(const u64 *vli, unsigned int bit)
> +{
> +	return (vli[bit / 64] & ((u64)1 << (bit % 64)));
> +}
> +
> +/* Counts the number of 64-bit "digits" in vli. */
> +static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
> +{
> +	int i;
> +
> +	/* Search from the end until we find a non-zero digit.
> +	 * We do it in reverse because we expect that most digits will
> +	 * be nonzero.
> +	 */
> +	for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
> +
> +	return (i + 1);
> +}
> +
> +/* Counts the number of bits required for vli. */
> +static unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
> +{
> +	unsigned int i, num_digits;
> +	u64 digit;
> +
> +	num_digits = vli_num_digits(vli, ndigits);
> +	if (num_digits == 0)
> +		return 0;
> +
> +	digit = vli[num_digits - 1];
> +	for (i = 0; digit; i++)
> +		digit >>= 1;
> +
> +	return ((num_digits - 1) * 64 + i);
> +}
> +
> +/* Sets dest = src. */
> +static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
> +{
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++)
> +		dest[i] = src[i];
> +}
> +
> +/* Returns sign of left - right. */
> +static int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
> +{
> +	int i;
> +
> +	for (i = ndigits - 1; i >= 0; i--) {
> +		if (left[i] > right[i])
> +			return 1;
> +		else if (left[i] < right[i])
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +/* Computes result = in << c, returning carry. Can modify in place
> + * (if result == in). 0 < shift < 64.
> + */
> +static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
> +		      unsigned int ndigits)
> +{
> +	u64 carry = 0;
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++) {
> +		u64 temp = in[i];
> +
> +		result[i] = (temp << shift) | carry;
> +		carry = temp >> (64 - shift);
> +	}
> +
> +	return carry;
> +}
> +
> +/* Computes vli = vli >> 1. */
> +static void vli_rshift1(u64 *vli, unsigned int ndigits)
> +{
> +	u64 *end = vli;
> +	u64 carry = 0;
> +
> +	vli += ndigits;
> +
> +	while (vli-- > end) {
> +		u64 temp = *vli;
> +		*vli = (temp >> 1) | carry;
> +		carry = temp << 63;
> +	}
> +}
> +
> +/* Computes result = left + right, returning carry. Can modify in place. */
> +static u64 vli_add(u64 *result, const u64 *left, const u64 *right, +		
  
> unsigned int ndigits)
> +{
> +	u64 carry = 0;
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++) {
> +		u64 sum;
> +
> +		sum = left[i] + right[i] + carry;
> +		if (sum != left[i])
> +			carry = (sum < left[i]);
> +
> +		result[i] = sum;
> +	}
> +
> +	return carry;
> +}
> +
> +/* Computes result = left - right, returning borrow. Can modify in place.
> */ +static u64 vli_sub(u64 *result, const u64 *left, const u64 *right, +	
	 
>  unsigned int ndigits)
> +{
> +	u64 borrow = 0;
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++) {
> +		u64 diff;
> +
> +		diff = left[i] - right[i] - borrow;
> +		if (diff != left[i])
> +			borrow = (diff > left[i]);
> +
> +		result[i] = diff;
> +	}
> +
> +	return borrow;
> +}
> +
> +static uint128_t mul_64_64(u64 left, u64 right)
> +{
> +	u64 a0 = left & 0xffffffffull;
> +	u64 a1 = left >> 32;
> +	u64 b0 = right & 0xffffffffull;
> +	u64 b1 = right >> 32;
> +	u64 m0 = a0 * b0;
> +	u64 m1 = a0 * b1;
> +	u64 m2 = a1 * b0;
> +	u64 m3 = a1 * b1;
> +	uint128_t result;
> +
> +	m2 += (m0 >> 32);
> +	m2 += m1;
> +
> +	/* Overflow */
> +	if (m2 < m1)
> +		m3 += 0x100000000ull;
> +
> +	result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
> +	result.m_high = m3 + (m2 >> 32);
> +
> +	return result;
> +}
> +
> +static uint128_t add_128_128(uint128_t a, uint128_t b)
> +{
> +	uint128_t result;
> +
> +	result.m_low = a.m_low + b.m_low;
> +	result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
> +
> +	return result;
> +}
> +
> +static void vli_mult(u64 *result, const u64 *left, const u64 *right,
> +		     unsigned int ndigits)
> +{
> +	uint128_t r01 = { 0, 0 };
> +	u64 r2 = 0;
> +	unsigned int i, k;
> +
> +	/* Compute each digit of result in sequence, maintaining the
> +	 * carries.
> +	 */
> +	for (k = 0; k < ndigits * 2 - 1; k++) {
> +		unsigned int min;
> +
> +		if (k < ndigits)
> +			min = 0;
> +		else
> +			min = (k + 1) - ndigits;
> +
> +		for (i = min; i <= k && i < ndigits; i++) {
> +			uint128_t product;
> +
> +			product = mul_64_64(left[i], right[k - i]);
> +
> +			r01 = add_128_128(r01, product);
> +			r2 += (r01.m_high < product.m_high);
> +		}
> +
> +		result[k] = r01.m_low;
> +		r01.m_low = r01.m_high;
> +		r01.m_high = r2;
> +		r2 = 0;
> +	}
> +
> +	result[ndigits * 2 - 1] = r01.m_low;
> +}
> +
> +static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
> +{
> +	uint128_t r01 = { 0, 0 };
> +	u64 r2 = 0;
> +	int i, k;
> +
> +	for (k = 0; k < ndigits * 2 - 1; k++) {
> +		unsigned int min;
> +
> +		if (k < ndigits)
> +			min = 0;
> +		else
> +			min = (k + 1) - ndigits;
> +
> +		for (i = min; i <= k && i <= k - i; i++) {
> +			uint128_t product;
> +
> +			product = mul_64_64(left[i], left[k - i]);
> +
> +			if (i < k - i) {
> +				r2 += product.m_high >> 63;
> +				product.m_high = (product.m_high << 1) |
> +						 (product.m_low >> 63);
> +				product.m_low <<= 1;
> +			}
> +
> +			r01 = add_128_128(r01, product);
> +			r2 += (r01.m_high < product.m_high);
> +		}
> +
> +		result[k] = r01.m_low;
> +		r01.m_low = r01.m_high;
> +		r01.m_high = r2;
> +		r2 = 0;
> +	}
> +
> +	result[ndigits * 2 - 1] = r01.m_low;
> +}
> +
> +/* Computes result = (left + right) % mod.
> + * Assumes that left < mod and right < mod, result != mod.
> + */
> +static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
> +			const u64 *mod, unsigned int ndigits)
> +{
> +	u64 carry;
> +
> +	carry = vli_add(result, left, right, ndigits);
> +
> +	/* result > mod (result = mod + remainder), so subtract mod to
> +	 * get remainder.
> +	 */
> +	if (carry || vli_cmp(result, mod, ndigits) >= 0)
> +		vli_sub(result, result, mod, ndigits);
> +}
> +
> +/* Computes result = (left - right) % mod.
> + * Assumes that left < mod and right < mod, result != mod.
> + */
> +static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
> +			const u64 *mod, unsigned int ndigits)
> +{
> +	u64 borrow = vli_sub(result, left, right, ndigits);
> +
> +	/* In this case, p_result == -diff == (max int) - diff.
> +	 * Since -x % d == d - x, we can get the correct result from
> +	 * result + mod (with overflow).
> +	 */
> +	if (borrow)
> +		vli_add(result, result, mod, ndigits);
> +}
> +
> +/* Computes p_result = p_product % curve_p.
> + * See algorithm 5 and 6 from
> + * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
> + */
> +static void vli_mmod_fast_192(u64 *result, const u64 *product,
> +			      const u64 *curve_prime, u64 *tmp)
> +{
> +	const unsigned int ndigits = 3;
> +	int carry;
> +
> +	vli_set(result, product, ndigits);
> +
> +	vli_set(tmp, &product[3], ndigits);
> +	carry = vli_add(result, result, tmp, ndigits);
> +
> +	tmp[0] = 0;
> +	tmp[1] = product[3];
> +	tmp[2] = product[4];
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	tmp[0] = tmp[1] = product[5];
> +	tmp[2] = 0;
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
> +		carry -= vli_sub(result, result, curve_prime, ndigits);
> +}
> +
> +/* Computes result = product % curve_prime
> + * from http://www.nsa.gov/ia/_files/nist-routines.pdf
> + */
> +static void vli_mmod_fast_256(u64 *result, const u64 *product,
> +			      const u64 *curve_prime, u64 *tmp)
> +{
> +	int carry;
> +	const unsigned int ndigits = 4;
> +
> +	/* t */
> +	vli_set(result, product, ndigits);
> +
> +	/* s1 */
> +	tmp[0] = 0;
> +	tmp[1] = product[5] & 0xffffffff00000000ull;
> +	tmp[2] = product[6];
> +	tmp[3] = product[7];
> +	carry = vli_lshift(tmp, tmp, 1, ndigits);
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	/* s2 */
> +	tmp[1] = product[6] << 32;
> +	tmp[2] = (product[6] >> 32) | (product[7] << 32);
> +	tmp[3] = product[7] >> 32;
> +	carry += vli_lshift(tmp, tmp, 1, ndigits);
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	/* s3 */
> +	tmp[0] = product[4];
> +	tmp[1] = product[5] & 0xffffffff;
> +	tmp[2] = 0;
> +	tmp[3] = product[7];
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	/* s4 */
> +	tmp[0] = (product[4] >> 32) | (product[5] << 32);
> +	tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
> +	tmp[2] = product[7];
> +	tmp[3] = (product[6] >> 32) | (product[4] << 32);
> +	carry += vli_add(result, result, tmp, ndigits);
> +
> +	/* d1 */
> +	tmp[0] = (product[5] >> 32) | (product[6] << 32);
> +	tmp[1] = (product[6] >> 32);
> +	tmp[2] = 0;
> +	tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
> +	carry -= vli_sub(result, result, tmp, ndigits);
> +
> +	/* d2 */
> +	tmp[0] = product[6];
> +	tmp[1] = product[7];
> +	tmp[2] = 0;
> +	tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
> +	carry -= vli_sub(result, result, tmp, ndigits);
> +
> +	/* d3 */
> +	tmp[0] = (product[6] >> 32) | (product[7] << 32);
> +	tmp[1] = (product[7] >> 32) | (product[4] << 32);
> +	tmp[2] = (product[4] >> 32) | (product[5] << 32);
> +	tmp[3] = (product[6] << 32);
> +	carry -= vli_sub(result, result, tmp, ndigits);
> +
> +	/* d4 */
> +	tmp[0] = product[7];
> +	tmp[1] = product[4] & 0xffffffff00000000ull;
> +	tmp[2] = product[5];
> +	tmp[3] = product[6] & 0xffffffff00000000ull;
> +	carry -= vli_sub(result, result, tmp, ndigits);
> +
> +	if (carry < 0) {
> +		do {
> +			carry += vli_add(result, result, curve_prime, 
ndigits);
> +		} while (carry < 0);
> +	} else {
> +		while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
> +			carry -= vli_sub(result, result, curve_prime, 
ndigits);
> +	}
> +}
> +
> +/* Computes result = product % curve_prime
> + *  from http://www.nsa.gov/ia/_files/nist-routines.pdf
> +*/
> +static bool vli_mmod_fast(u64 *result, u64 *product,
> +			  const u64 *curve_prime, unsigned int ndigits)
> +{
> +	u64 tmp[2 * ndigits];
> +
> +	switch (ndigits) {
> +	case 3:
> +		vli_mmod_fast_192(result, product, curve_prime, tmp);
> +		break;
> +	case 4:
> +		vli_mmod_fast_256(result, product, curve_prime, tmp);
> +		break;
> +	default:
> +		pr_err("unsupports digits size!\n");
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +/* Computes result = (left * right) % curve_prime. */
> +static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64
> *right, +			      const u64 *curve_prime, unsigned int 
ndigits)
> +{
> +	u64 product[2 * ndigits];
> +
> +	vli_mult(product, left, right, ndigits);
> +	vli_mmod_fast(result, product, curve_prime, ndigits);
> +}
> +
> +/* Computes result = left^2 % curve_prime. */
> +static void vli_mod_square_fast(u64 *result, const u64 *left,
> +				const u64 *curve_prime, unsigned int ndigits)
> +{
> +	u64 product[2 * ndigits];
> +
> +	vli_square(product, left, ndigits);
> +	vli_mmod_fast(result, product, curve_prime, ndigits);
> +}
> +
> +#define EVEN(vli) (!(vli[0] & 1))
> +/* Computes result = (1 / p_input) % mod. All VLIs are the same size.
> + * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
> + * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
> + */
> +static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
> +			unsigned int ndigits)
> +{
> +	u64 a[ndigits], b[ndigits];
> +	u64 u[ndigits], v[ndigits];
> +	u64 carry;
> +	int cmp_result;
> +
> +	if (vli_is_zero(input, ndigits)) {
> +		vli_clear(result, ndigits);
> +		return;
> +	}
> +
> +	vli_set(a, input, ndigits);
> +	vli_set(b, mod, ndigits);
> +	vli_clear(u, ndigits);
> +	u[0] = 1;
> +	vli_clear(v, ndigits);
> +
> +	while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
> +		carry = 0;
> +
> +		if (EVEN(a)) {
> +			vli_rshift1(a, ndigits);
> +
> +			if (!EVEN(u))
> +				carry = vli_add(u, u, mod, ndigits);
> +
> +			vli_rshift1(u, ndigits);
> +			if (carry)
> +				u[ndigits - 1] |= 0x8000000000000000ull;
> +		} else if (EVEN(b)) {
> +			vli_rshift1(b, ndigits);
> +
> +			if (!EVEN(v))
> +				carry = vli_add(v, v, mod, ndigits);
> +
> +			vli_rshift1(v, ndigits);
> +			if (carry)
> +				v[ndigits - 1] |= 0x8000000000000000ull;
> +		} else if (cmp_result > 0) {
> +			vli_sub(a, a, b, ndigits);
> +			vli_rshift1(a, ndigits);
> +
> +			if (vli_cmp(u, v, ndigits) < 0)
> +				vli_add(u, u, mod, ndigits);
> +
> +			vli_sub(u, u, v, ndigits);
> +			if (!EVEN(u))
> +				carry = vli_add(u, u, mod, ndigits);
> +
> +			vli_rshift1(u, ndigits);
> +			if (carry)
> +				u[ndigits - 1] |= 0x8000000000000000ull;
> +		} else {
> +			vli_sub(b, b, a, ndigits);
> +			vli_rshift1(b, ndigits);
> +
> +			if (vli_cmp(v, u, ndigits) < 0)
> +				vli_add(v, v, mod, ndigits);
> +
> +			vli_sub(v, v, u, ndigits);
> +			if (!EVEN(v))
> +				carry = vli_add(v, v, mod, ndigits);
> +
> +			vli_rshift1(v, ndigits);
> +			if (carry)
> +				v[ndigits - 1] |= 0x8000000000000000ull;
> +		}
> +	}
> +
> +	vli_set(result, u, ndigits);
> +}
> +
> +/* ------ Point operations ------ */
> +
> +/* Returns true if p_point is the point at infinity, false otherwise. */
> +static bool ecc_point_is_zero(const struct ecc_point *point)
> +{
> +	return (vli_is_zero(point->x, point->ndigits) &&
> +		vli_is_zero(point->y, point->ndigits));
> +}
> +
> +/* Point multiplication algorithm using Montgomery's ladder with co-Z
> + * coordinates. From http://eprint.iacr.org/2011/338.pdf
> + */
> +
> +/* Double in place */
> +static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
> +				      u64 *curve_prime, unsigned int ndigits)
> +{
> +	/* t1 = x, t2 = y, t3 = z */
> +	u64 t4[ndigits];
> +	u64 t5[ndigits];
> +
> +	if (vli_is_zero(z1, ndigits))
> +		return;
> +
> +	/* t4 = y1^2 */
> +	vli_mod_square_fast(t4, y1, curve_prime, ndigits);
> +	/* t5 = x1*y1^2 = A */
> +	vli_mod_mult_fast(t5, x1, t4, curve_prime, ndigits);
> +	/* t4 = y1^4 */
> +	vli_mod_square_fast(t4, t4, curve_prime, ndigits);
> +	/* t2 = y1*z1 = z3 */
> +	vli_mod_mult_fast(y1, y1, z1, curve_prime, ndigits);
> +	/* t3 = z1^2 */
> +	vli_mod_square_fast(z1, z1, curve_prime, ndigits);
> +
> +	/* t1 = x1 + z1^2 */
> +	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
> +	/* t3 = 2*z1^2 */
> +	vli_mod_add(z1, z1, z1, curve_prime, ndigits);
> +	/* t3 = x1 - z1^2 */
> +	vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
> +	/* t1 = x1^2 - z1^4 */
> +	vli_mod_mult_fast(x1, x1, z1, curve_prime, ndigits);
> +
> +	/* t3 = 2*(x1^2 - z1^4) */
> +	vli_mod_add(z1, x1, x1, curve_prime, ndigits);
> +	/* t1 = 3*(x1^2 - z1^4) */
> +	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
> +	if (vli_test_bit(x1, 0)) {
> +		u64 carry = vli_add(x1, x1, curve_prime, ndigits);
> +
> +		vli_rshift1(x1, ndigits);
> +		x1[ndigits - 1] |= carry << 63;
> +	} else {
> +		vli_rshift1(x1, ndigits);
> +	}
> +	/* t1 = 3/2*(x1^2 - z1^4) = B */
> +
> +	/* t3 = B^2 */
> +	vli_mod_square_fast(z1, x1, curve_prime, ndigits);
> +	/* t3 = B^2 - A */
> +	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
> +	/* t3 = B^2 - 2A = x3 */
> +	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
> +	/* t5 = A - x3 */
> +	vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
> +	/* t1 = B * (A - x3) */
> +	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
> +	/* t4 = B * (A - x3) - y1^4 = y3 */
> +	vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
> +
> +	vli_set(x1, z1, ndigits);
> +	vli_set(z1, y1, ndigits);
> +	vli_set(y1, t4, ndigits);
> +}
> +
> +/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
> +static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
> +		    unsigned int ndigits)
> +{
> +	u64 t1[ndigits];
> +
> +	vli_mod_square_fast(t1, z, curve_prime, ndigits);    /* z^2 */
> +	vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
> +	vli_mod_mult_fast(t1, t1, z, curve_prime, ndigits);  /* z^3 */
> +	vli_mod_mult_fast(y1, y1, t1, curve_prime, ndigits); /* y1 * z^3 */
> +}
> +
> +/* P = (x1, y1) => 2P, (x2, y2) => P' */
> +static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
> +				u64 *p_initial_z, u64 *curve_prime,
> +				unsigned int ndigits)
> +{
> +	u64 z[ndigits];
> +
> +	vli_set(x2, x1, ndigits);
> +	vli_set(y2, y1, ndigits);
> +
> +	vli_clear(z, ndigits);
> +	z[0] = 1;
> +
> +	if (p_initial_z)
> +		vli_set(z, p_initial_z, ndigits);
> +
> +	apply_z(x1, y1, z, curve_prime, ndigits);
> +
> +	ecc_point_double_jacobian(x1, y1, z, curve_prime, ndigits);
> +
> +	apply_z(x2, y2, z, curve_prime, ndigits);
> +}
> +
> +/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
> + * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
> + * or P => P', Q => P + Q
> + */
> +static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
> +		     unsigned int ndigits)
> +{
> +	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
> +	u64 t5[ndigits];
> +
> +	/* t5 = x2 - x1 */
> +	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
> +	/* t5 = (x2 - x1)^2 = A */
> +	vli_mod_square_fast(t5, t5, curve_prime, ndigits);
> +	/* t1 = x1*A = B */
> +	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
> +	/* t3 = x2*A = C */
> +	vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
> +	/* t4 = y2 - y1 */
> +	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
> +	/* t5 = (y2 - y1)^2 = D */
> +	vli_mod_square_fast(t5, y2, curve_prime, ndigits);
> +
> +	/* t5 = D - B */
> +	vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
> +	/* t5 = D - B - C = x3 */
> +	vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
> +	/* t3 = C - B */
> +	vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
> +	/* t2 = y1*(C - B) */
> +	vli_mod_mult_fast(y1, y1, x2, curve_prime, ndigits);
> +	/* t3 = B - x3 */
> +	vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
> +	/* t4 = (y2 - y1)*(B - x3) */
> +	vli_mod_mult_fast(y2, y2, x2, curve_prime, ndigits);
> +	/* t4 = y3 */
> +	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
> +
> +	vli_set(x2, t5, ndigits);
> +}
> +
> +/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
> + * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
> + * or P => P - Q, Q => P + Q
> + */
> +static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64
> *curve_prime, +		       unsigned int ndigits)
> +{
> +	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
> +	u64 t5[ndigits];
> +	u64 t6[ndigits];
> +	u64 t7[ndigits];
> +
> +	/* t5 = x2 - x1 */
> +	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
> +	/* t5 = (x2 - x1)^2 = A */
> +	vli_mod_square_fast(t5, t5, curve_prime, ndigits);
> +	/* t1 = x1*A = B */
> +	vli_mod_mult_fast(x1, x1, t5, curve_prime, ndigits);
> +	/* t3 = x2*A = C */
> +	vli_mod_mult_fast(x2, x2, t5, curve_prime, ndigits);
> +	/* t4 = y2 + y1 */
> +	vli_mod_add(t5, y2, y1, curve_prime, ndigits);
> +	/* t4 = y2 - y1 */
> +	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
> +
> +	/* t6 = C - B */
> +	vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
> +	/* t2 = y1 * (C - B) */
> +	vli_mod_mult_fast(y1, y1, t6, curve_prime, ndigits);
> +	/* t6 = B + C */
> +	vli_mod_add(t6, x1, x2, curve_prime, ndigits);
> +	/* t3 = (y2 - y1)^2 */
> +	vli_mod_square_fast(x2, y2, curve_prime, ndigits);
> +	/* t3 = x3 */
> +	vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
> +
> +	/* t7 = B - x3 */
> +	vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
> +	/* t4 = (y2 - y1)*(B - x3) */
> +	vli_mod_mult_fast(y2, y2, t7, curve_prime, ndigits);
> +	/* t4 = y3 */
> +	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
> +
> +	/* t7 = (y2 + y1)^2 = F */
> +	vli_mod_square_fast(t7, t5, curve_prime, ndigits);
> +	/* t7 = x3' */
> +	vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
> +	/* t6 = x3' - B */
> +	vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
> +	/* t6 = (y2 + y1)*(x3' - B) */
> +	vli_mod_mult_fast(t6, t6, t5, curve_prime, ndigits);
> +	/* t2 = y3' */
> +	vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
> +
> +	vli_set(x1, t7, ndigits);
> +}
> +
> +static void ecc_point_mult(struct ecc_point *result,
> +			   const struct ecc_point *point, const u64 *scalar,
> +			   u64 *initial_z, u64 *curve_prime,
> +			   unsigned int ndigits)
> +{
> +	/* R0 and R1 */
> +	u64 rx[2][ndigits];
> +	u64 ry[2][ndigits];
> +	u64 z[ndigits];
> +	int i, nb;
> +	int num_bits = vli_num_bits(scalar, ndigits);
> +
> +	vli_set(rx[1], point->x, ndigits);
> +	vli_set(ry[1], point->y, ndigits);
> +
> +	xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, 
curve_prime,
> +			    ndigits);
> +
> +	for (i = num_bits - 2; i > 0; i--) {
> +		nb = !vli_test_bit(scalar, i);
> +		xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], 
curve_prime,
> +			   ndigits);
> +		xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime,
> +			 ndigits);
> +	}
> +
> +	nb = !vli_test_bit(scalar, 0);
> +	xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve_prime,
> +		   ndigits);
> +
> +	/* Find final 1/Z value. */
> +	/* X1 - X0 */
> +	vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
> +	/* Yb * (X1 - X0) */
> +	vli_mod_mult_fast(z, z, ry[1 - nb], curve_prime, ndigits);
> +	/* xP * Yb * (X1 - X0) */
> +	vli_mod_mult_fast(z, z, point->x, curve_prime, ndigits);
> +
> +	/* 1 / (xP * Yb * (X1 - X0)) */
> +	vli_mod_inv(z, z, curve_prime, point->ndigits);
> +
> +	/* yP / (xP * Yb * (X1 - X0)) */
> +	vli_mod_mult_fast(z, z, point->y, curve_prime, ndigits);
> +	/* Xb * yP / (xP * Yb * (X1 - X0)) */
> +	vli_mod_mult_fast(z, z, rx[1 - nb], curve_prime, ndigits);
> +	/* End 1/Z calculation */
> +
> +	xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve_prime, 
ndigits);
> +
> +	apply_z(rx[0], ry[0], z, curve_prime, ndigits);
> +
> +	vli_set(result->x, rx[0], ndigits);
> +	vli_set(result->y, ry[0], ndigits);
> +}
> +
> +static inline void ecc_swap_digits(const u64 *in, u64 *out,
> +				   unsigned int ndigits)
> +{
> +	int i;
> +
> +	for (i = 0; i < ndigits; i++)
> +		out[i] = __swab64(in[ndigits - 1 - i]);
> +}
> +
> +int ecdh_make_pub_key(unsigned int curve_id,
> +		      const u8 *private_key, unsigned int private_key_len,
> +		      u8 *public_key, unsigned int public_key_len)
> +{
> +	int ret = 0;
> +	struct ecc_point *pk;
> +	unsigned int tries = 0;
> +	u64 *priv = NULL;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +	const struct ecc_curve *curve = ecc_get_curve(curve_id);
> +
> +	if (!private_key || !curve) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	ndigits = curve->g.ndigits;
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	if (private_key_len != nbytes) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (vli_is_zero((const u64 *)&private_key[0], ndigits)) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	/* Make sure the private key is in the range [1, n-1]. */
> +	if (vli_cmp(curve->n, (const u64 *)&private_key[0], ndigits) != 1) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	priv = ecc_alloc_digits_space(ndigits);
> +	if (!priv) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
> +
> +	pk = ecc_alloc_point(ndigits);
> +	if (!pk) {
> +		ret = -ENOMEM;
> +		goto err_alloc_pk;
> +	}
> +
> +	do {
> +		if (tries++ >= MAX_TRIES)
> +			goto err_retries;
> +
> +		ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
> +
> +	} while (ecc_point_is_zero(pk));
> +
> +	ecc_swap_digits(pk->x, (u64 *)public_key, ndigits);
> +	ecc_swap_digits(pk->y, (u64 *)&public_key[nbytes], ndigits);
> +
> +err_retries:
> +	ecc_free_point(pk);
> +err_alloc_pk:
> +	ecc_free_digits_space(priv);
> +out:
> +	return ret;
> +}
> +
> +int ecdh_shared_secret(unsigned int curve_id,
> +		       const u8 *private_key, unsigned int private_key_len,
> +		       const u8 *public_key, unsigned int public_key_len,
> +		       u8 *secret, unsigned int secret_len)
> +{
> +	int ret = 0;
> +	struct ecc_point *product, *pk;
> +	u64 *priv, *rand_z;
> +	unsigned int ndigits;
> +	unsigned int nbytes;
> +	const struct ecc_curve *curve = ecc_get_curve(curve_id);
> +
> +	if (!private_key || !public_key) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	ndigits = curve->g.ndigits;
> +	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	rand_z = ecc_alloc_digits_space(ndigits);
> +	if (!rand_z) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	priv = ecc_alloc_digits_space(ndigits);
> +	if (!priv) {
> +		ret = -ENOMEM;
> +		goto err_alloc_priv;
> +	}
> +
> +	get_random_bytes(rand_z, nbytes);
> +
> +	pk = ecc_alloc_point(ndigits);
> +	if (!pk) {
> +		ret = -ENOMEM;
> +		goto err_alloc_pk;
> +	}
> +
> +	product = ecc_alloc_point(ndigits);
> +	if (!product) {
> +		ret = -ENOMEM;
> +		goto err_alloc_product;
> +	}
> +
> +	ecc_swap_digits((const u64 *)public_key, pk->x, ndigits);
> +	ecc_swap_digits((const u64 *)&public_key[nbytes], pk->y, ndigits);
> +	ecc_swap_digits((const u64 *)private_key, priv, ndigits);
> +
> +	ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
> +
> +	ecc_swap_digits(product->x, (u64 *)secret, ndigits);
> +
> +	if (ecc_point_is_zero(product))
> +		ret = -EFAULT;
> +
> +	ecc_free_point(product);
> +err_alloc_product:
> +	ecc_free_point(pk);
> +err_alloc_pk:
> +	ecc_free_digits_space(priv);
> +err_alloc_priv:
> +	ecc_free_digits_space(rand_z);
> +out:
> +	return ret;
> +}
> diff --git a/crypto/ecc.h b/crypto/ecc.h
> new file mode 100644
> index 0000000..7889410
> --- /dev/null
> +++ b/crypto/ecc.h
> @@ -0,0 +1,70 @@
> +/*
> + * Copyright (c) 2013, Kenneth MacKay
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *  * Redistributions of source code must retain the above copyright
> + *   notice, this list of conditions and the following disclaimer.
> + *  * Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#ifndef _CRYPTO_ECC_H
> +#define _CRYPTO_ECC_H
> +
> +#define ECC_MAX_DIGITS	4 /* 256 */
> +
> +#define ECC_DIGITS_TO_BYTES_SHIFT 3
> +
> +/**
> + * ecdh_make_pub_key() - Compute an ECC public key
> + *
> + * @curve_id:		id representing the curve to use
> + * @private_key:	pregenerated private key for the given curve
> + * @private_key_len:	length of private_key
> + * @public_key:		buffer for storing the public key generated
> + * @public_key_len:	length of the public_key buffer
> + *
> + * Returns 0 if the public key was generated successfully, a negative value
> + * if an error occurred.
> + */
> +int ecdh_make_pub_key(const unsigned int curve_id,
> +		      const u8 *private_key, unsigned int private_key_len,
> +		      u8 *public_key, unsigned int public_key_len);
> +
> +/**
> + * ecdh_shared_secret() - Compute a shared secret
> + *
> + * @curve_id:		id representing the curve to use
> + * @private_key:	private key of part A
> + * @private_key_len:	length of private_key
> + * @public_key:		public key of counterpart B
> + * @public_key_len:	length of public_key
> + * @secret:		buffer for storing the calculated shared secret
> + * @secret_len:		length of the secret buffer
> + *
> + * Note: It is recommended that you hash the result of ecdh_shared_secret
> + * before using it for symmetric encryption or HMAC.
> + *
> + * Returns 0 if the shared secret was generated successfully, a negative
> value + * if an error occurred.
> + */
> +int ecdh_shared_secret(unsigned int curve_id,
> +		       const u8 *private_key, unsigned int private_key_len,
> +		       const u8 *public_key, unsigned int public_key_len,
> +		       u8 *secret, unsigned int secret_len);
> +#endif
> diff --git a/crypto/ecc_curve_defs.h b/crypto/ecc_curve_defs.h
> new file mode 100644
> index 0000000..03ae5f7
> --- /dev/null
> +++ b/crypto/ecc_curve_defs.h
> @@ -0,0 +1,57 @@
> +#ifndef _CRYTO_ECC_CURVE_DEFS_H
> +#define _CRYTO_ECC_CURVE_DEFS_H
> +
> +struct ecc_point {
> +	u64 *x;
> +	u64 *y;
> +	u8 ndigits;
> +};
> +
> +struct ecc_curve {
> +	char *name;
> +	struct ecc_point g;
> +	u64 *p;
> +	u64 *n;
> +};
> +
> +/* NIST P-192 */
> +static u64 nist_p192_g_x[] = { 0xF4FF0AFD82FF1012ull,
> 0x7CBF20EB43A18800ull, +				0x188DA80EB03090F6ull 
};
> +static u64 nist_p192_g_y[] = { 0x73F977A11E794811ull,
> 0x631011ED6B24CDD5ull, +				0x07192B95FFC8DA78ull 
};
> +static u64 nist_p192_p[] = { 0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFEull,
> +				0xFFFFFFFFFFFFFFFFull };
> +static u64 nist_p192_n[] = { 0x146BC9B1B4D22831ull, 0xFFFFFFFF99DEF836ull,
> +				0xFFFFFFFFFFFFFFFFull };
> +static struct ecc_curve nist_p192 = {
> +	.name = "nist_192",
> +	.g = {
> +		.x = nist_p192_g_x,
> +		.y = nist_p192_g_y,
> +		.ndigits = 3,
> +	},
> +	.p = nist_p192_p,
> +	.n = nist_p192_n
> +};
> +
> +/* NIST P-256 */
> +static u64 nist_p256_g_x[] = { 0xF4A13945D898C296ull,
> 0x77037D812DEB33A0ull, +				0xF8BCE6E563A440F2ull, 
0x6B17D1F2E12C4247ull };
> +static u64 nist_p256_g_y[] = { 0xCBB6406837BF51F5ull,
> 0x2BCE33576B315ECEull, +				0x8EE7EB4A7C0F9E16ull, 
0x4FE342E2FE1A7F9Bull };
> +static u64 nist_p256_p[] = { 0xFFFFFFFFFFFFFFFFull, 0x00000000FFFFFFFFull,
> +				0x0000000000000000ull, 0xFFFFFFFF00000001ull 
};
> +static u64 nist_p256_n[] = { 0xF3B9CAC2FC632551ull, 0xBCE6FAADA7179E84ull,
> +				0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF00000000ull 
};
> +static struct ecc_curve nist_p256 = {
> +	.name = "nist_256",
> +	.g = {
> +		.x = nist_p256_g_x,
> +		.y = nist_p256_g_y,
> +		.ndigits = 4,
> +	},
> +	.p = nist_p256_p,
> +	.n = nist_p256_n
> +};
> +
> +#endif
> diff --git a/crypto/ecdh.c b/crypto/ecdh.c
> new file mode 100644
> index 0000000..828aa14
> --- /dev/null
> +++ b/crypto/ecdh.c
> @@ -0,0 +1,171 @@
> +/* ECDH key-agreement protocol
> + *
> + * Copyright (c) 2016, Intel Corporation
> + * Authors: Salvator Benedetto <salvatore.benedetto@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public Licence
> + * as published by the Free Software Foundation; either version
> + * 2 of the Licence, or (at your option) any later version.
> + */
> +
> +#include <linux/module.h>
> +#include <crypto/internal/kpp.h>
> +#include <crypto/kpp.h>
> +#include <crypto/ecdh.h>
> +#include <linux/scatterlist.h>
> +#include "ecc.h"
> +
> +struct ecdh_ctx {
> +	unsigned int curve_id;
> +	unsigned int ndigits;
> +	u64 private_key[ECC_MAX_DIGITS];
> +	u64 public_key[2 * ECC_MAX_DIGITS];
> +	u64 shared_secret[ECC_MAX_DIGITS];
> +};
> +
> +static inline struct ecdh_ctx *ecdh_get_ctx(struct crypto_kpp *tfm)
> +{
> +	return kpp_tfm_ctx(tfm);
> +}
> +
> +static unsigned int ecdh_supported_curve(unsigned int curve_id)
> +{
> +	switch (curve_id) {
> +	case ECC_CURVE_NIST_P192: return 3;
> +	case ECC_CURVE_NIST_P256: return 4;

Sorry for the late review:

As we have fips_allowed=1 for the entire system, I would ask for a change 
here: Only allow P256 in FIPS mode (or P384 or P521). All other curves are not 
supported in FIPS mode.

> +	default: return 0;
> +	}
> +}
> +
> +static int ecdh_set_params(struct crypto_kpp *tfm, void *buffer,
> +			   unsigned int len)
> +{
> +	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
> +	struct ecdh_params *params = (struct ecdh_params *)buffer;
> +
> +	if (unlikely(!buffer || !len))
> +		return -EINVAL;
> +
> +	ctx->ndigits = ecdh_supported_curve(params->curve_id);
> +	if (unlikely(!ctx->ndigits))
> +		return -EINVAL;
> +
> +	ctx->curve_id = params->curve_id;
> +
> +	return 0;
> +}
> +
> +static int ecdh_set_secret(struct crypto_kpp *tfm, void *buffer,
> +			   unsigned int len)
> +{
> +	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
> +
> +	if (unlikely(!buffer || !len))
> +		return -EINVAL;
> +
> +	if (unlikely(ctx->ndigits != (len >> ECC_DIGITS_TO_BYTES_SHIFT)))
> +		return -EINVAL;
> +
> +	memcpy(ctx->private_key, buffer, len);
> +
> +	return 0;
> +}
> +
> +static int ecdh_generate_public_key(struct kpp_request *req)
> +{
> +	int ret = 0;
> +	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
> +	const struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
> +	size_t copied, nbytes;
> +
> +	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	ret = ecdh_make_pub_key(ctx->curve_id,
> +				(const u8 *)ctx->private_key, nbytes,
> +				(u8 *)ctx->public_key, sizeof(ctx-
>public_key));
> +	if (ret < 0)
> +		return ret;
> +
> +	/* Public part is a point thus it has both coordinates */
> +	copied = sg_copy_from_buffer(req->dst, 1, ctx->public_key,
> +				     nbytes * 2);
> +
> +	if (copied != 2 * nbytes)
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static int ecdh_compute_shared_secret(struct kpp_request *req)
> +{
> +	int ret = 0;
> +	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
> +	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
> +	size_t copied, nbytes;
> +
> +	nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	copied = sg_copy_to_buffer(req->src, 1, ctx->public_key, 2 * nbytes);
> +	if (copied != 2 * nbytes)
> +		return -EINVAL;
> +
> +	ret = ecdh_shared_secret(ctx->curve_id,
> +				 (const u8 *)ctx->private_key, nbytes,
> +				 (const u8 *)ctx->public_key, 2 * nbytes,
> +				 (u8 *)ctx->shared_secret, nbytes);
> +	if (ret < 0)
> +		return ret;
> +
> +	copied = sg_copy_from_buffer(req->dst, 1, ctx->shared_secret, nbytes);
> +	if (copied != nbytes)
> +		return -EINVAL;
> +
> +	return ret;
> +}
> +
> +static int ecdh_max_size(struct crypto_kpp *tfm)
> +{
> +	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
> +	int nbytes = ctx->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	/* Public key is made of two coordinates */
> +	return 2 * nbytes;
> +}
> +
> +static void no_exit_tfm(struct crypto_kpp *tfm)
> +{
> +	return;
> +}
> +
> +static struct kpp_alg ecdh = {
> +	.set_params = ecdh_set_params,
> +	.set_secret = ecdh_set_secret,
> +	.generate_public_key = ecdh_generate_public_key,
> +	.compute_shared_secret = ecdh_compute_shared_secret,
> +	.max_size = ecdh_max_size,
> +	.exit = no_exit_tfm,
> +	.base = {
> +		.cra_name = "ecdh",
> +		.cra_driver_name = "ecdh-generic",
> +		.cra_priority = 100,
> +		.cra_module = THIS_MODULE,
> +		.cra_ctxsize = sizeof(struct ecdh_ctx),
> +	},
> +};
> +
> +static int ecdh_init(void)
> +{
> +	return crypto_register_kpp(&ecdh);
> +}
> +
> +static void ecdh_exit(void)
> +{
> +	crypto_unregister_kpp(&ecdh);
> +}
> +
> +module_init(ecdh_init);
> +module_exit(ecdh_exit);
> +MODULE_ALIAS_CRYPTO("ecdh");
> +MODULE_LICENSE("GPL");
> +MODULE_DESCRIPTION("ECDH generic algorithm");
> diff --git a/crypto/testmgr.c b/crypto/testmgr.c
> index d68fa58..6d7b30c 100644
> --- a/crypto/testmgr.c
> +++ b/crypto/testmgr.c
> @@ -34,6 +34,7 @@
>  #include <crypto/akcipher.h>
>  #include <crypto/kpp.h>
>  #include <crypto/dh.h>
> +#include <crypto/ecdh.h>
> 
>  #include "internal.h"
> 
> @@ -119,7 +120,10 @@ struct akcipher_test_suite {
>  };
> 
>  struct kpp_test_suite {
> -	struct kpp_testvec_dh *vecs;
> +	union {
> +		struct kpp_testvec_dh *dh;
> +		struct kpp_testvec_ecdh *ecdh;
> +	} vecs;
>  	unsigned int count;
>  };
> 
> @@ -1891,12 +1895,113 @@ static int test_dh(struct crypto_kpp *tfm, struct
> kpp_testvec_dh *vecs, return 0;
>  }
> 
> -static int test_kpp(struct crypto_kpp *tfm, const char *alg,
> -		    struct kpp_testvec_dh *vecs, unsigned int tcount)
> +static int do_test_ecdh(struct crypto_kpp *tfm, struct kpp_testvec_ecdh
> *vec) +{
> +	struct kpp_request *req;
> +	void *input_buf = NULL;
> +	void *output_buf = NULL;
> +	struct tcrypt_result result;
> +	unsigned int out_len_max;
> +	int err = -ENOMEM;
> +	struct scatterlist src, dst;
> +	struct ecdh_params p;
> +	unsigned int nbytes = vec->ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
> +
> +	req = kpp_request_alloc(tfm, GFP_KERNEL);
> +	if (!req)
> +		return err;
> +
> +	init_completion(&result.completion);
> +
> +	/* Set curve_id */
> +	p.curve_id = vec->curve_id;
> +	err = crypto_kpp_set_params(tfm, (void *)&p, sizeof(p));
> +	if (err)
> +		goto free_req;
> +
> +	/* Set A private Key */
> +	err = crypto_kpp_set_secret(tfm, vec->private_a, nbytes);
> +	if (err)
> +		goto free_req;
> +
> +	out_len_max = crypto_kpp_maxsize(tfm);
> +	output_buf = kzalloc(out_len_max, GFP_KERNEL);
> +	if (!output_buf) {
> +		err = -ENOMEM;
> +		goto free_req;
> +	}
> +
> +	sg_init_one(&dst, output_buf, out_len_max);
> +	kpp_request_set_output(req, &dst, out_len_max);
> +	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				 tcrypt_complete, &result);
> +
> +	/* Compute A public key = aG mod p */
> +	err = wait_async_op(&result, crypto_kpp_generate_public_key(req));
> +	if (err) {
> +		pr_err("alg: ecdh: generate public key test failed. err %d\n",
> +		       err);
> +		goto free_output;
> +	}
> +	/* Verify calculated public key */
> +	if (memcmp(vec->expected_pub_a, sg_virt(req->dst), 2 * nbytes)) {
> +		pr_err("alg: ecdh: generate public key test failed. Invalid 
output\n");
> +		err = -EINVAL;
> +		goto free_output;
> +	}
> +
> +	/* Calculate shared secret key by using counter part public key. */
> +	input_buf = kzalloc(2 * nbytes, GFP_KERNEL);
> +	if (!input_buf) {
> +		err = -ENOMEM;
> +		goto free_output;
> +	}
> +
> +	memcpy(input_buf, vec->public_b, 2 * nbytes);
> +	sg_init_one(&src, input_buf, 2 * nbytes);
> +	sg_init_one(&dst, output_buf, out_len_max);
> +	kpp_request_set_input(req, &src, 2 * nbytes);
> +	kpp_request_set_output(req, &dst, out_len_max);
> +	kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				 tcrypt_complete, &result);
> +	err = wait_async_op(&result, crypto_kpp_compute_shared_secret(req));
> +	if (err) {
> +		pr_err("alg: ecdh: compute shard secret test failed. err 
%d\n",
> +		       err);
> +		goto free_all;
> +	}
> +
> +	/*
> +	 * verify shared secret from which the user will derive
> +	 * secret key by executing whatever hash it has chosen
> +	 */
> +	if (memcmp(vec->expected_ss, sg_virt(req->dst), nbytes)) {
> +		pr_err("alg: ecdh: compute shared secret test failed. Invalid 
output\n");
> +		err = -EINVAL;
> +	}
> +
> +free_all:
> +	kfree(input_buf);
> +free_output:
> +	kfree(output_buf);
> +free_req:
> +	kpp_request_free(req);
> +	return err;
> +}
> +
> +static int test_ecdh(struct crypto_kpp *tfm, struct kpp_testvec_ecdh *vecs,
> +		     unsigned int tcount)
>  {
> -	if (strncmp(alg, "dh", 2) == 0)
> -		return test_dh(tfm, vecs, tcount);
> +	int ret, i;
> 
> +	for (i = 0; i < tcount; i++) {
> +		ret = do_test_ecdh(tfm, vecs++);
> +		if (ret) {
> +			pr_err("alg: ecdh: test failed on vector %d, 
err=%d\n",
> +			       i + 1, ret);
> +			return ret;
> +		}
> +	}
>  	return 0;
>  }
> 
> @@ -1912,9 +2017,12 @@ static int alg_test_kpp(const struct alg_test_desc
> *desc, const char *driver, driver, PTR_ERR(tfm));
>  		return PTR_ERR(tfm);
>  	}
> -	if (desc->suite.kpp.vecs)
> -		err = test_kpp(tfm, desc->alg, desc->suite.kpp.vecs,
> -			       desc->suite.kpp.count);
> +	if (!strncmp(desc->alg, "dh", 2) && desc->suite.kpp.vecs.dh)
> +		err = test_dh(tfm, desc->suite.kpp.vecs.dh,
> +			      desc->suite.kpp.count);
> +	else if (!strncmp(desc->alg, "ecdh", 4) && desc->suite.kpp.vecs.ecdh)
> +		err = test_ecdh(tfm, desc->suite.kpp.vecs.ecdh,
> +				desc->suite.kpp.count);
> 
>  	crypto_free_kpp(tfm);
>  	return err;
> @@ -2860,7 +2968,7 @@ static const struct alg_test_desc alg_test_descs[] = {
> .fips_allowed = 1,
>  		.suite = {
>  			.kpp = {
> -				.vecs = dh_tv_template,
> +				.vecs.dh = dh_tv_template,
>  				.count = DH_TEST_VECTORS
>  			}
>  		}
> @@ -3293,6 +3401,16 @@ static const struct alg_test_desc alg_test_descs[] =
> { }
>  		}
>  	}, {
> +		.alg = "ecdh",
> +		.test = alg_test_kpp,
> +		.fips_allowed = 1,
> +		.suite = {
> +			.kpp = {
> +				.vecs.ecdh = ecdh_tv_template,
> +				.count = ECDH_TEST_VECTORS
> +			}
> +		}
> +	}, {
>  		.alg = "gcm(aes)",
>  		.test = alg_test_aead,
>  		.fips_allowed = 1,
> diff --git a/crypto/testmgr.h b/crypto/testmgr.h
> index e9c34c7..74b3080 100644
> --- a/crypto/testmgr.h
> +++ b/crypto/testmgr.h
> @@ -26,6 +26,8 @@
> 
>  #include <linux/netlink.h>
> 
> +#include "ecc.h"
> +
>  #define MAX_DIGEST_SIZE		64
>  #define MAX_TAP			8
> 
> @@ -148,6 +150,15 @@ struct kpp_testvec_dh {
>  	unsigned short expected_ss_size;
>  };
> 
> +struct kpp_testvec_ecdh {
> +	unsigned int curve_id;
> +	char *private_a;
> +	char *expected_pub_a;
> +	char *public_b;
> +	char *expected_ss;
> +	unsigned short ndigits;
> +};
> +
>  static char zeroed_string[48];
> 
>  /*
> @@ -538,6 +549,68 @@ struct kpp_testvec_dh dh_tv_template[] = {
>  	}
>  };
> 
> +#define ECDH_TEST_VECTORS 2
> +
> +struct kpp_testvec_ecdh ecdh_tv_template[] = {
> +	{
> +	.curve_id = ECC_CURVE_NIST_P192,
> +	.private_a =
> +	"\xb5\x05\xb1\x71\x1e\xbf\x8c\xda"
> +	"\x4e\x19\x1e\x62\x1f\x23\x23\x31"
> +	"\x36\x1e\xd3\x84\x2f\xcc\x21\x72",
> +	.expected_pub_a =
> +	"\x1a\x04\xdb\xa5\xe1\xdd\x4e\x79"
> +	"\xa3\xe6\xef\x0e\x5c\x80\x49\x85"
> +	"\xfa\x78\xb4\xef\x49\xbd\x4c\x7c"
> +	"\x22\x90\x21\x02\xf9\x1b\x81\x5d"
> +	"\x0c\x8a\xa8\x98\xd6\x27\x69\x88"
> +	"\x5e\xbc\x94\xd8\x15\x9e\x21\xce",
> +	.public_b =
> +	"\xc3\xba\x67\x4b\x71\xec\xd0\x76"
> +	"\x7a\x99\x75\x64\x36\x13\x9a\x94"
> +	"\x5d\x8b\xdc\x60\x90\x91\xfd\x3f"
> +	"\xb0\x1f\x8a\x0a\x68\xc6\x88\x6e"
> +	"\x83\x87\xdd\x67\x09\xf8\x8d\x96"
> +	"\x07\xd6\xbd\x1c\xe6\x8d\x9d\x67",
> +	.expected_ss =
> +	"\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc"
> +	"\xe3\x40\x60\xc8\x06\x93\xc6\x2e"
> +	"\x99\x80\x81\x28\xaf\xc5\x51\x74",
> +	.ndigits = 3,
> +	}, {
> +	.curve_id = ECC_CURVE_NIST_P256,
> +	.private_a =
> +	"\x24\xd1\x21\xeb\xe5\xcf\x2d\x83"
> +	"\xf6\x62\x1b\x6e\x43\x84\x3a\xa3"
> +	"\x8b\xe0\x86\xc3\x20\x19\xda\x92"
> +	"\x50\x53\x03\xe1\xc0\xea\xb8\x82",
> +	.expected_pub_a =
> +	"\x1a\x7f\xeb\x52\x00\xbd\x3c\x31"
> +	"\x7d\xb6\x70\xc1\x86\xa6\xc7\xc4"
> +	"\x3b\xc5\x5f\x6c\x6f\x58\x3c\xf5"
> +	"\xb6\x63\x82\x77\x33\x24\xa1\x5f"
> +	"\x6a\xca\x43\x6f\xf7\x7e\xff\x02"
> +	"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
> +	"\x6a\x02\x6e\x41\x87\x68\x38\x77"
> +	"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
> +	.public_b =
> +	"\xcc\xb4\xda\x74\xb1\x47\x3f\xea"
> +	"\x6c\x70\x9e\x38\x2d\xc7\xaa\xb7"
> +	"\x29\xb2\x47\x03\x19\xab\xdd\x34"
> +	"\xbd\xa8\x2c\x93\xe1\xa4\x74\xd9"
> +	"\x64\x63\xf7\x70\x20\x2f\xa4\xe6"
> +	"\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f"
> +	"\xb1\x32\xbb\xaf\x22\x61\xda\xcb"
> +	"\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3",
> +	.expected_ss =
> +	"\xea\x17\x6f\x7e\x6e\x57\x26\x38"
> +	"\x8b\xfb\x41\xeb\xba\xc8\x6d\xa5"
> +	"\xa8\x72\xd1\xff\xc9\x47\x3d\xaa"
> +	"\x58\x43\x9f\x34\x0f\x8c\xf3\xc9",
> +	.ndigits = 4,
> +	}
> +};
> +
>  /*
>   * MD4 test vectors from RFC1320
>   */
> diff --git a/include/crypto/ecdh.h b/include/crypto/ecdh.h
> new file mode 100644
> index 0000000..438214b
> --- /dev/null
> +++ b/include/crypto/ecdh.h
> @@ -0,0 +1,24 @@
> +/*
> + * ECDH params to be used with kpp API
> + *
> + * Copyright (c) 2016, Intel Corporation
> + * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> Free + * Software Foundation; either version 2 of the License, or (at your
> option) + * any later version.
> + *
> + */
> +#ifndef _CRYPTO_ECDH_
> +#define _CRYPTO_ECDH_
> +
> +/* Curves IDs */
> +#define ECC_CURVE_NIST_P192	0x0001
> +#define ECC_CURVE_NIST_P256	0x0002
> +
> +struct ecdh_params {
> +	unsigned int curve_id;
> +};
> +
> +#endif


Ciao
Stephan