2022-12-19 15:46:41

by Peter Zijlstra

[permalink] [raw]
Subject: [RFC][PATCH 01/12] crypto: Remove u128 usage

As seems to be the common (majority) usage in crypto, use __uint128_t
instead of u128.

This frees up u128 for definition in linux/types.h.

Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
---
lib/crypto/curve25519-hacl64.c | 142 ++++++++++++++++++++---------------------
lib/crypto/poly1305-donna64.c | 22 ++----
2 files changed, 80 insertions(+), 84 deletions(-)

--- a/lib/crypto/curve25519-hacl64.c
+++ b/lib/crypto/curve25519-hacl64.c
@@ -14,8 +14,6 @@
#include <crypto/curve25519.h>
#include <linux/string.h>

-typedef __uint128_t u128;
-
static __always_inline u64 u64_eq_mask(u64 a, u64 b)
{
u64 x = a ^ b;
@@ -50,77 +48,77 @@ static __always_inline void modulo_carry
b[0] = b0_;
}

-static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
+static __always_inline void fproduct_copy_from_wide_(u64 *output, __uint128_t *input)
{
{
- u128 xi = input[0];
+ __uint128_t xi = input[0];
output[0] = ((u64)(xi));
}
{
- u128 xi = input[1];
+ __uint128_t xi = input[1];
output[1] = ((u64)(xi));
}
{
- u128 xi = input[2];
+ __uint128_t xi = input[2];
output[2] = ((u64)(xi));
}
{
- u128 xi = input[3];
+ __uint128_t xi = input[3];
output[3] = ((u64)(xi));
}
{
- u128 xi = input[4];
+ __uint128_t xi = input[4];
output[4] = ((u64)(xi));
}
}

static __always_inline void
-fproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s)
+fproduct_sum_scalar_multiplication_(__uint128_t *output, u64 *input, u64 s)
{
- output[0] += (u128)input[0] * s;
- output[1] += (u128)input[1] * s;
- output[2] += (u128)input[2] * s;
- output[3] += (u128)input[3] * s;
- output[4] += (u128)input[4] * s;
+ output[0] += (__uint128_t)input[0] * s;
+ output[1] += (__uint128_t)input[1] * s;
+ output[2] += (__uint128_t)input[2] * s;
+ output[3] += (__uint128_t)input[3] * s;
+ output[4] += (__uint128_t)input[4] * s;
}

-static __always_inline void fproduct_carry_wide_(u128 *tmp)
+static __always_inline void fproduct_carry_wide_(__uint128_t *tmp)
{
{
u32 ctr = 0;
- u128 tctr = tmp[ctr];
- u128 tctrp1 = tmp[ctr + 1];
+ __uint128_t tctr = tmp[ctr];
+ __uint128_t tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
- u128 c = ((tctr) >> (51));
- tmp[ctr] = ((u128)(r0));
+ __uint128_t c = ((tctr) >> (51));
+ tmp[ctr] = ((__uint128_t)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
{
u32 ctr = 1;
- u128 tctr = tmp[ctr];
- u128 tctrp1 = tmp[ctr + 1];
+ __uint128_t tctr = tmp[ctr];
+ __uint128_t tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
- u128 c = ((tctr) >> (51));
- tmp[ctr] = ((u128)(r0));
+ __uint128_t c = ((tctr) >> (51));
+ tmp[ctr] = ((__uint128_t)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}

{
u32 ctr = 2;
- u128 tctr = tmp[ctr];
- u128 tctrp1 = tmp[ctr + 1];
+ __uint128_t tctr = tmp[ctr];
+ __uint128_t tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
- u128 c = ((tctr) >> (51));
- tmp[ctr] = ((u128)(r0));
+ __uint128_t c = ((tctr) >> (51));
+ tmp[ctr] = ((__uint128_t)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
{
u32 ctr = 3;
- u128 tctr = tmp[ctr];
- u128 tctrp1 = tmp[ctr + 1];
+ __uint128_t tctr = tmp[ctr];
+ __uint128_t tctrp1 = tmp[ctr + 1];
u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU;
- u128 c = ((tctr) >> (51));
- tmp[ctr] = ((u128)(r0));
+ __uint128_t c = ((tctr) >> (51));
+ tmp[ctr] = ((__uint128_t)(r0));
tmp[ctr + 1] = ((tctrp1) + (c));
}
}
@@ -154,7 +152,7 @@ static __always_inline void fmul_shift_r
output[0] = 19 * b0;
}

-static __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input,
+static __always_inline void fmul_mul_shift_reduce_(__uint128_t *output, u64 *input,
u64 *input21)
{
u32 i;
@@ -188,21 +186,21 @@ static __always_inline void fmul_fmul(u6
{
u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] };
{
- u128 b4;
- u128 b0;
- u128 b4_;
- u128 b0_;
+ __uint128_t b4;
+ __uint128_t b0;
+ __uint128_t b4_;
+ __uint128_t b0_;
u64 i0;
u64 i1;
u64 i0_;
u64 i1_;
- u128 t[5] = { 0 };
+ __uint128_t t[5] = { 0 };
fmul_mul_shift_reduce_(t, tmp, input21);
fproduct_carry_wide_(t);
b4 = t[4];
b0 = t[0];
- b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
- b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
t[4] = b4_;
t[0] = b0_;
fproduct_copy_from_wide_(output, t);
@@ -215,7 +213,7 @@ static __always_inline void fmul_fmul(u6
}
}

-static __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output)
+static __always_inline void fsquare_fsquare__(__uint128_t *tmp, u64 *output)
{
u64 r0 = output[0];
u64 r1 = output[1];
@@ -227,16 +225,16 @@ static __always_inline void fsquare_fsqu
u64 d2 = r2 * 2 * 19;
u64 d419 = r4 * 19;
u64 d4 = d419 * 2;
- u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) +
- (((u128)(d2) * (r3))));
- u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) +
- (((u128)(r3 * 19) * (r3))));
- u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) +
- (((u128)(d4) * (r3))));
- u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) +
- (((u128)(r4) * (d419))));
- u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) +
- (((u128)(r2) * (r2))));
+ __uint128_t s0 = ((((((__uint128_t)(r0) * (r0))) + (((__uint128_t)(d4) * (r1))))) +
+ (((__uint128_t)(d2) * (r3))));
+ __uint128_t s1 = ((((((__uint128_t)(d0) * (r1))) + (((__uint128_t)(d4) * (r2))))) +
+ (((__uint128_t)(r3 * 19) * (r3))));
+ __uint128_t s2 = ((((((__uint128_t)(d0) * (r2))) + (((__uint128_t)(r1) * (r1))))) +
+ (((__uint128_t)(d4) * (r3))));
+ __uint128_t s3 = ((((((__uint128_t)(d0) * (r3))) + (((__uint128_t)(d1) * (r2))))) +
+ (((__uint128_t)(r4) * (d419))));
+ __uint128_t s4 = ((((((__uint128_t)(d0) * (r4))) + (((__uint128_t)(d1) * (r3))))) +
+ (((__uint128_t)(r2) * (r2))));
tmp[0] = s0;
tmp[1] = s1;
tmp[2] = s2;
@@ -244,12 +242,12 @@ static __always_inline void fsquare_fsqu
tmp[4] = s4;
}

-static __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output)
+static __always_inline void fsquare_fsquare_(__uint128_t *tmp, u64 *output)
{
- u128 b4;
- u128 b0;
- u128 b4_;
- u128 b0_;
+ __uint128_t b4;
+ __uint128_t b0;
+ __uint128_t b4_;
+ __uint128_t b0_;
u64 i0;
u64 i1;
u64 i0_;
@@ -258,8 +256,8 @@ static __always_inline void fsquare_fsqu
fproduct_carry_wide_(tmp);
b4 = tmp[4];
b0 = tmp[0];
- b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
- b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
tmp[4] = b4_;
tmp[0] = b0_;
fproduct_copy_from_wide_(output, tmp);
@@ -271,7 +269,7 @@ static __always_inline void fsquare_fsqu
output[1] = i1_;
}

-static __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp,
+static __always_inline void fsquare_fsquare_times_(u64 *output, __uint128_t *tmp,
u32 count1)
{
u32 i;
@@ -283,7 +281,7 @@ static __always_inline void fsquare_fsqu
static __always_inline void fsquare_fsquare_times(u64 *output, u64 *input,
u32 count1)
{
- u128 t[5];
+ __uint128_t t[5];
memcpy(output, input, 5 * sizeof(*input));
fsquare_fsquare_times_(output, t, count1);
}
@@ -291,7 +289,7 @@ static __always_inline void fsquare_fsqu
static __always_inline void fsquare_fsquare_times_inplace(u64 *output,
u32 count1)
{
- u128 t[5];
+ __uint128_t t[5];
fsquare_fsquare_times_(output, t, count1);
}

@@ -396,36 +394,36 @@ static __always_inline void fdifference(

static __always_inline void fscalar(u64 *output, u64 *b, u64 s)
{
- u128 tmp[5];
- u128 b4;
- u128 b0;
- u128 b4_;
- u128 b0_;
+ __uint128_t tmp[5];
+ __uint128_t b4;
+ __uint128_t b0;
+ __uint128_t b4_;
+ __uint128_t b0_;
{
u64 xi = b[0];
- tmp[0] = ((u128)(xi) * (s));
+ tmp[0] = ((__uint128_t)(xi) * (s));
}
{
u64 xi = b[1];
- tmp[1] = ((u128)(xi) * (s));
+ tmp[1] = ((__uint128_t)(xi) * (s));
}
{
u64 xi = b[2];
- tmp[2] = ((u128)(xi) * (s));
+ tmp[2] = ((__uint128_t)(xi) * (s));
}
{
u64 xi = b[3];
- tmp[3] = ((u128)(xi) * (s));
+ tmp[3] = ((__uint128_t)(xi) * (s));
}
{
u64 xi = b[4];
- tmp[4] = ((u128)(xi) * (s));
+ tmp[4] = ((__uint128_t)(xi) * (s));
}
fproduct_carry_wide_(tmp);
b4 = tmp[4];
b0 = tmp[0];
- b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU))));
- b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51))))))));
+ b4_ = ((b4) & (((__uint128_t)(0x7ffffffffffffLLU))));
+ b0_ = ((b0) + (((__uint128_t)(19) * (((u64)(((b4) >> (51))))))));
tmp[4] = b4_;
tmp[0] = b0_;
fproduct_copy_from_wide_(output, tmp);
--- a/lib/crypto/poly1305-donna64.c
+++ b/lib/crypto/poly1305-donna64.c
@@ -10,8 +10,6 @@
#include <asm/unaligned.h>
#include <crypto/internal/poly1305.h>

-typedef __uint128_t u128;
-
void poly1305_core_setkey(struct poly1305_core_key *key,
const u8 raw_key[POLY1305_BLOCK_SIZE])
{
@@ -41,7 +39,7 @@ void poly1305_core_blocks(struct poly130
u64 s1, s2;
u64 h0, h1, h2;
u64 c;
- u128 d0, d1, d2, d;
+ __uint128_t d0, d1, d2, d;

if (!nblocks)
return;
@@ -71,20 +69,20 @@ void poly1305_core_blocks(struct poly130
h2 += (((t1 >> 24)) & 0x3ffffffffffULL) | hibit64;

/* h *= r */
- d0 = (u128)h0 * r0;
- d = (u128)h1 * s2;
+ d0 = (__uint128_t)h0 * r0;
+ d = (__uint128_t)h1 * s2;
d0 += d;
- d = (u128)h2 * s1;
+ d = (__uint128_t)h2 * s1;
d0 += d;
- d1 = (u128)h0 * r1;
- d = (u128)h1 * r0;
+ d1 = (__uint128_t)h0 * r1;
+ d = (__uint128_t)h1 * r0;
d1 += d;
- d = (u128)h2 * s2;
+ d = (__uint128_t)h2 * s2;
d1 += d;
- d2 = (u128)h0 * r2;
- d = (u128)h1 * r1;
+ d2 = (__uint128_t)h0 * r2;
+ d = (__uint128_t)h1 * r1;
d2 += d;
- d = (u128)h2 * r0;
+ d = (__uint128_t)h2 * r0;
d2 += d;

/* (partial) h %= p */



2022-12-19 15:59:46

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [RFC][PATCH 01/12] crypto: Remove u128 usage

On Mon, Dec 19, 2022 at 04:35:26PM +0100, Peter Zijlstra wrote:
> As seems to be the common (majority) usage in crypto, use __uint128_t
> instead of u128.
>
> This frees up u128 for definition in linux/types.h.
>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> ---
> lib/crypto/curve25519-hacl64.c | 142 ++++++++++++++++++++---------------------
> lib/crypto/poly1305-donna64.c | 22 ++----
> 2 files changed, 80 insertions(+), 84 deletions(-)
>
> --- a/lib/crypto/curve25519-hacl64.c
> +++ b/lib/crypto/curve25519-hacl64.c
> @@ -14,8 +14,6 @@
> #include <crypto/curve25519.h>
> #include <linux/string.h>
>
> -typedef __uint128_t u128;
> -
> static __always_inline u64 u64_eq_mask(u64 a, u64 b)
> {
> u64 x = a ^ b;
> @@ -50,77 +48,77 @@ static __always_inline void modulo_carry
> b[0] = b0_;
> }
>
> -static __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input)
> +static __always_inline void fproduct_copy_from_wide_(u64 *output, __uint128_t *input)
> {
> {
> - u128 xi = input[0];
> + __uint128_t xi = input[0];

Why not just use `u128` from types.h in this file?

Jason

2022-12-19 17:09:05

by Jason A. Donenfeld

[permalink] [raw]
Subject: Re: [RFC][PATCH 01/12] crypto: Remove u128 usage

On Mon, Dec 19, 2022 at 6:01 PM Peter Zijlstra <[email protected]> wrote:
>
> On Mon, Dec 19, 2022 at 04:56:33PM +0100, Jason A. Donenfeld wrote:
>
> > Why not just use `u128` from types.h in this file?
>
> Ordering, I can't very well introduce it in types.h while other
> definitions exist in the tree. So I first have to clean up the u128
> namespace.

Is there a patch at the end of the series that adds it back in to use u128?

2022-12-19 17:11:08

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [RFC][PATCH 01/12] crypto: Remove u128 usage

On Mon, Dec 19, 2022 at 04:56:33PM +0100, Jason A. Donenfeld wrote:

> Why not just use `u128` from types.h in this file?

Ordering, I can't very well introduce it in types.h while other
definitions exist in the tree. So I first have to clean up the u128
namespace.

2022-12-20 03:56:28

by Herbert Xu

[permalink] [raw]
Subject: Re: [RFC][PATCH 01/12] crypto: Remove u128 usage

On Mon, Dec 19, 2022 at 06:03:04PM +0100, Jason A. Donenfeld wrote:
>
> Is there a patch at the end of the series that adds it back in to use u128?

Could we do some ifdef trickery to reduce the amount of code churn
please? Changing everything away from u128 and then back to it seems
silly.

Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

2022-12-20 04:32:40

by Herbert Xu

[permalink] [raw]
Subject: Re: [RFC][PATCH 01/12] crypto: Remove u128 usage

On Mon, Dec 19, 2022 at 08:11:37PM -0800, H. Peter Anvin wrote:
>
> Seems like "merging common code snippets" is something we at least used to do with single patches...

I certainly don't have any objections if we go down this route.

Thanks,
--
Email: Herbert Xu <[email protected]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt