2024-02-22 02:37:43

by Charlie Jenkins

[permalink] [raw]
Subject: [PATCH 0/4] parisc: checksum: Use generic implementations and optimize checksum

After the parisc checksumming functions were created, generic versions
were written that are the same or better, making the architecture
specific ones redundant.

Signed-off-by: Charlie Jenkins <[email protected]>
---
Charlie Jenkins (4):
asm-generic headers: Allow csum_partial arch override
parisc: checksum: Use generic implementations
parisc: checksum: Remove folding from csum_partial
parisc: checksum: Optimize from32to16

arch/parisc/Kconfig | 3 +++
arch/parisc/include/asm/checksum.h | 42 ++++++++------------------------------
arch/parisc/lib/checksum.c | 14 ++++---------
include/asm-generic/checksum.h | 2 ++
lib/checksum.c | 2 ++
5 files changed, 20 insertions(+), 43 deletions(-)
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20240221-parisc_use_generic_checksum-1bb01d466877
--
- Charlie



2024-02-22 02:37:49

by Charlie Jenkins

[permalink] [raw]
Subject: [PATCH 1/4] asm-generic headers: Allow csum_partial arch override

Arches can have more a efficient implementation of csum_partial.

Signed-off-by: Charlie Jenkins <[email protected]>
---
include/asm-generic/checksum.h | 2 ++
lib/checksum.c | 2 ++
2 files changed, 4 insertions(+)

diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
index ad928cce268b..3309830ba2cb 100644
--- a/include/asm-generic/checksum.h
+++ b/include/asm-generic/checksum.h
@@ -4,6 +4,7 @@

#include <linux/bitops.h>

+#ifndef csum_partial
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
@@ -17,6 +18,7 @@
* it's best to have buff aligned on a 32-bit boundary
*/
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+#endif

#ifndef ip_fast_csum
/*
diff --git a/lib/checksum.c b/lib/checksum.c
index 6860d6b05a17..c115a9ac71d9 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -110,6 +110,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
EXPORT_SYMBOL(ip_fast_csum);
#endif

+#ifndef csum_partial
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
@@ -134,6 +135,7 @@ __wsum csum_partial(const void *buff, int len, __wsum wsum)
return (__force __wsum)result;
}
EXPORT_SYMBOL(csum_partial);
+#endif

/*
* this routine is used for miscellaneous IP-like checksums, mainly

--
2.34.1


2024-02-22 02:38:01

by Charlie Jenkins

[permalink] [raw]
Subject: [PATCH 2/4] parisc: checksum: Use generic implementations

The generic implementations of the checksum functions
csum_tcpudp_nofold, csum_fold, and ip_compute_csum are either identical
or perform better than the parisc ones, so use the generic
implementations instead.

In order to use the generic implementations of checksum functions,
do_csum can no longer be static.

Signed-off-by: Charlie Jenkins <[email protected]>
---
arch/parisc/Kconfig | 3 +++
arch/parisc/include/asm/checksum.h | 42 ++++++++------------------------------
arch/parisc/lib/checksum.c | 2 +-
3 files changed, 13 insertions(+), 34 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index d14ccc948a29..1638deb23287 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -122,6 +122,9 @@ config GENERIC_BUG
config GENERIC_BUG_RELATIVE_POINTERS
bool

+config GENERIC_CSUM
+ def_bool y
+
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index 3c43baca7b39..c7847a08ef7c 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -17,6 +17,7 @@
* it's best to have buff aligned on a 32-bit boundary
*/
extern __wsum csum_partial(const void *, int, __wsum);
+#define csum_partial csum_partial

/*
* Optimized for IP headers, which always checksum on 4 octet boundaries.
@@ -57,20 +58,8 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
return (__force __sum16)sum;
}

-/*
- * Fold a partial checksum
- */
-static inline __sum16 csum_fold(__wsum csum)
-{
- u32 sum = (__force u32)csum;
- /* add the swapped two 16-bit halves of sum,
- a possible carry from adding the two 16-bit halves,
- will carry from the lower half into the upper half,
- giving us the correct sum in the upper half. */
- sum += (sum << 16) + (sum >> 16);
- return (__force __sum16)(~sum >> 16);
-}
-
+#define ip_fast_csum ip_fast_csum
+
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
__u32 len, __u8 proto,
__wsum sum)
@@ -85,28 +74,15 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
return sum;
}

-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- __u32 len, __u8 proto,
- __wsum sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-static inline __sum16 ip_compute_csum(const void *buf, int len)
-{
- return csum_fold (csum_partial(buf, len, 0));
-}
+#define csum_tcpudp_nofold csum_tcpudp_nofold

+extern unsigned int do_csum(const unsigned char *buff, int len);
+#define do_csum do_csum

#define _HAVE_ARCH_IPV6_CSUM
+
+#include <asm-generic/checksum.h>
+
static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto,
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index 4818f3db84a5..05f5ca4b2f96 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -34,7 +34,7 @@ static inline unsigned short from32to16(unsigned int x)
return (unsigned short)x;
}

-static inline unsigned int do_csum(const unsigned char * buff, int len)
+unsigned int do_csum(const unsigned char *buff, int len)
{
int odd, count;
unsigned int result = 0;

--
2.34.1


2024-02-22 02:38:14

by Charlie Jenkins

[permalink] [raw]
Subject: [PATCH 3/4] parisc: checksum: Remove folding from csum_partial

The parisc implementation of csum_partial previously folded the result
into 16 bits instead of returning all 32 bits and letting consumers like
ip_compute_csum do the folding. Since ip_compute_csum no longer depends
on this requirement, remove the folding so that the parisc
implementation operates the same as other architectures.

Signed-off-by: Charlie Jenkins <[email protected]>
---
arch/parisc/lib/checksum.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index 05f5ca4b2f96..eaa660491e24 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -95,14 +95,11 @@ unsigned int do_csum(const unsigned char *buff, int len)
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
-/*
- * why bother folding?
- */
__wsum csum_partial(const void *buff, int len, __wsum sum)
{
unsigned int result = do_csum(buff, len);
addc(result, sum);
- return (__force __wsum)from32to16(result);
+ return (__force __wsum)result;
}

EXPORT_SYMBOL(csum_partial);

--
2.34.1


2024-02-22 02:38:20

by Charlie Jenkins

[permalink] [raw]
Subject: [PATCH 4/4] parisc: checksum: Optimize from32to16

Replace the shifting and masking of x with a rotation. This generates
better assembly.

Signed-off-by: Charlie Jenkins <[email protected]>
---
arch/parisc/lib/checksum.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index eaa660491e24..1ae8cc730d13 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -27,11 +27,8 @@

static inline unsigned short from32to16(unsigned int x)
{
- /* 32 bits --> 16 bits + carry */
- x = (x & 0xffff) + (x >> 16);
- /* 16 bits + carry --> 16 bits including carry */
- x = (x & 0xffff) + (x >> 16);
- return (unsigned short)x;
+ x += ror32(x, 16);
+ return (unsigned short)(x >> 16);
}

unsigned int do_csum(const unsigned char *buff, int len)

--
2.34.1


2024-02-22 16:05:05

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 2/4] parisc: checksum: Use generic implementations

On Wed, Feb 21, 2024 at 06:37:12PM -0800, Charlie Jenkins wrote:
> The generic implementations of the checksum functions
> csum_tcpudp_nofold, csum_fold, and ip_compute_csum are either identical
> or perform better than the parisc ones, so use the generic
> implementations instead.
>
> In order to use the generic implementations of checksum functions,
> do_csum can no longer be static.
>
> Signed-off-by: Charlie Jenkins <[email protected]>

Tested-by: Guenter Roeck <[email protected]>

> ---
> arch/parisc/Kconfig | 3 +++
> arch/parisc/include/asm/checksum.h | 42 ++++++++------------------------------
> arch/parisc/lib/checksum.c | 2 +-
> 3 files changed, 13 insertions(+), 34 deletions(-)
>
> diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
> index d14ccc948a29..1638deb23287 100644
> --- a/arch/parisc/Kconfig
> +++ b/arch/parisc/Kconfig
> @@ -122,6 +122,9 @@ config GENERIC_BUG
> config GENERIC_BUG_RELATIVE_POINTERS
> bool
>
> +config GENERIC_CSUM
> + def_bool y
> +
> config GENERIC_HWEIGHT
> bool
> default y
> diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
> index 3c43baca7b39..c7847a08ef7c 100644
> --- a/arch/parisc/include/asm/checksum.h
> +++ b/arch/parisc/include/asm/checksum.h
> @@ -17,6 +17,7 @@
> * it's best to have buff aligned on a 32-bit boundary
> */
> extern __wsum csum_partial(const void *, int, __wsum);
> +#define csum_partial csum_partial
>
> /*
> * Optimized for IP headers, which always checksum on 4 octet boundaries.
> @@ -57,20 +58,8 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
> return (__force __sum16)sum;
> }
>
> -/*
> - * Fold a partial checksum
> - */
> -static inline __sum16 csum_fold(__wsum csum)
> -{
> - u32 sum = (__force u32)csum;
> - /* add the swapped two 16-bit halves of sum,
> - a possible carry from adding the two 16-bit halves,
> - will carry from the lower half into the upper half,
> - giving us the correct sum in the upper half. */
> - sum += (sum << 16) + (sum >> 16);
> - return (__force __sum16)(~sum >> 16);
> -}
> -
> +#define ip_fast_csum ip_fast_csum
> +
> static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
> __u32 len, __u8 proto,
> __wsum sum)
> @@ -85,28 +74,15 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
> return sum;
> }
>
> -/*
> - * computes the checksum of the TCP/UDP pseudo-header
> - * returns a 16-bit checksum, already complemented
> - */
> -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
> - __u32 len, __u8 proto,
> - __wsum sum)
> -{
> - return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
> -}
> -
> -/*
> - * this routine is used for miscellaneous IP-like checksums, mainly
> - * in icmp.c
> - */
> -static inline __sum16 ip_compute_csum(const void *buf, int len)
> -{
> - return csum_fold (csum_partial(buf, len, 0));
> -}
> +#define csum_tcpudp_nofold csum_tcpudp_nofold
>
> +extern unsigned int do_csum(const unsigned char *buff, int len);
> +#define do_csum do_csum
>
> #define _HAVE_ARCH_IPV6_CSUM
> +
> +#include <asm-generic/checksum.h>
> +
> static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
> const struct in6_addr *daddr,
> __u32 len, __u8 proto,
> diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
> index 4818f3db84a5..05f5ca4b2f96 100644
> --- a/arch/parisc/lib/checksum.c
> +++ b/arch/parisc/lib/checksum.c
> @@ -34,7 +34,7 @@ static inline unsigned short from32to16(unsigned int x)
> return (unsigned short)x;
> }
>
> -static inline unsigned int do_csum(const unsigned char * buff, int len)
> +unsigned int do_csum(const unsigned char *buff, int len)
> {
> int odd, count;
> unsigned int result = 0;
>
> --
> 2.34.1
>

2024-02-22 16:07:02

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 4/4] parisc: checksum: Optimize from32to16

On Wed, Feb 21, 2024 at 06:37:14PM -0800, Charlie Jenkins wrote:
> Replace the shifting and masking of x with a rotation. This generates
> better assembly.
>
> Signed-off-by: Charlie Jenkins <[email protected]>

Tested-by: Guenter Roeck <[email protected]>

> ---
> arch/parisc/lib/checksum.c | 7 ++-----
> 1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
> index eaa660491e24..1ae8cc730d13 100644
> --- a/arch/parisc/lib/checksum.c
> +++ b/arch/parisc/lib/checksum.c
> @@ -27,11 +27,8 @@
>
> static inline unsigned short from32to16(unsigned int x)
> {
> - /* 32 bits --> 16 bits + carry */
> - x = (x & 0xffff) + (x >> 16);
> - /* 16 bits + carry --> 16 bits including carry */
> - x = (x & 0xffff) + (x >> 16);
> - return (unsigned short)x;
> + x += ror32(x, 16);
> + return (unsigned short)(x >> 16);
> }
>
> unsigned int do_csum(const unsigned char *buff, int len)
>
> --
> 2.34.1
>

2024-02-22 16:21:32

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 1/4] asm-generic headers: Allow csum_partial arch override

On Wed, Feb 21, 2024 at 06:37:11PM -0800, Charlie Jenkins wrote:
> Arches can have more a efficient implementation of csum_partial.
>
> Signed-off-by: Charlie Jenkins <[email protected]>

Tested-by: Guenter Roeck <[email protected]>

> ---
> include/asm-generic/checksum.h | 2 ++
> lib/checksum.c | 2 ++
> 2 files changed, 4 insertions(+)
>
> diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
> index ad928cce268b..3309830ba2cb 100644
> --- a/include/asm-generic/checksum.h
> +++ b/include/asm-generic/checksum.h
> @@ -4,6 +4,7 @@
>
> #include <linux/bitops.h>
>
> +#ifndef csum_partial
> /*
> * computes the checksum of a memory block at buff, length len,
> * and adds in "sum" (32-bit)
> @@ -17,6 +18,7 @@
> * it's best to have buff aligned on a 32-bit boundary
> */
> extern __wsum csum_partial(const void *buff, int len, __wsum sum);
> +#endif
>
> #ifndef ip_fast_csum
> /*
> diff --git a/lib/checksum.c b/lib/checksum.c
> index 6860d6b05a17..c115a9ac71d9 100644
> --- a/lib/checksum.c
> +++ b/lib/checksum.c
> @@ -110,6 +110,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
> EXPORT_SYMBOL(ip_fast_csum);
> #endif
>
> +#ifndef csum_partial
> /*
> * computes the checksum of a memory block at buff, length len,
> * and adds in "sum" (32-bit)
> @@ -134,6 +135,7 @@ __wsum csum_partial(const void *buff, int len, __wsum wsum)
> return (__force __wsum)result;
> }
> EXPORT_SYMBOL(csum_partial);
> +#endif
>
> /*
> * this routine is used for miscellaneous IP-like checksums, mainly
>
> --
> 2.34.1
>

2024-02-22 16:23:05

by Guenter Roeck

[permalink] [raw]
Subject: Re: [PATCH 3/4] parisc: checksum: Remove folding from csum_partial

On Wed, Feb 21, 2024 at 06:37:13PM -0800, Charlie Jenkins wrote:
> The parisc implementation of csum_partial previously folded the result
> into 16 bits instead of returning all 32 bits and letting consumers like
> ip_compute_csum do the folding. Since ip_compute_csum no longer depends
> on this requirement, remove the folding so that the parisc
> implementation operates the same as other architectures.
>
> Signed-off-by: Charlie Jenkins <[email protected]>

Tested-by: Guenter Roeck <[email protected]>

> ---
> arch/parisc/lib/checksum.c | 5 +----
> 1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
> index 05f5ca4b2f96..eaa660491e24 100644
> --- a/arch/parisc/lib/checksum.c
> +++ b/arch/parisc/lib/checksum.c
> @@ -95,14 +95,11 @@ unsigned int do_csum(const unsigned char *buff, int len)
> /*
> * computes a partial checksum, e.g. for TCP/UDP fragments
> */
> -/*
> - * why bother folding?
> - */
> __wsum csum_partial(const void *buff, int len, __wsum sum)
> {
> unsigned int result = do_csum(buff, len);
> addc(result, sum);
> - return (__force __wsum)from32to16(result);
> + return (__force __wsum)result;
> }
>
> EXPORT_SYMBOL(csum_partial);
>
> --
> 2.34.1
>