Received-SPF: softfail (google.com: domain of transitioning linux-kernel-owner@vger.kernel.org does not designate 23.128.96.19 as permitted sender) client-ip=23.128.96.19;
Message-ID: <1f6cabc8b183056546571b391770e1eea8524fd3.camel@perches.com>
Subject: Re: [PATCH net v3] net: Force inlining of checksum functions in
 net/checksum.h
From:   Joe Perches <joe@perches.com>
To:     Christophe Leroy <christophe.leroy@csgroup.eu>,
        "David S. Miller" <davem@davemloft.net>,
        Jakub Kicinski <kuba@kernel.org>
Cc:     linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
        netdev@vger.kernel.org,
        Masahiro Yamada <yamada.masahiro@socionext.com>
Date:   Thu, 17 Feb 2022 07:42:04 -0800
In-Reply-To: <978951d76d8cb84bab347c7623bc163e9a038452.1645100305.git.christophe.leroy@csgroup.eu>
References: <978951d76d8cb84bab347c7623bc163e9a038452.1645100305.git.christophe.leroy@csgroup.eu>
Content-Type: text/plain; charset="ISO-8859-1"
User-Agent: Evolution 3.40.4-1ubuntu2 
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Precedence: bulk

On Thu, 2022-02-17 at 13:19 +0100, Christophe Leroy wrote:
> All functions defined as static inline in net/checksum.h are
> meant to be inlined for performance reason.
> 
> But since commit ac7c3e4ff401 ("compiler: enable
> CONFIG_OPTIMIZE_INLINING forcibly") the compiler is allowed to
> uninline functions when it wants.
> 
> Fair enough in the general case, but for tiny performance critical
> checksum helpers that's counter-productive.

Thanks.  Trivial style notes:

> diff --git a/include/net/checksum.h b/include/net/checksum.h
[]
> @@ -22,7 +22,7 @@
>  #include <asm/checksum.h>
>  
>  #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
> -static inline
> +static __always_inline
>  __wsum csum_and_copy_from_user (const void __user *src, void *dst,
>  				      int len)
>  {

__wsum might be better placed on the previous line.

[]

> @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user
>  #endif
>  
>  #ifndef _HAVE_ARCH_CSUM_AND_COPY
> -static inline __wsum
> +static __always_inline __wsum
>  csum_partial_copy_nocheck(const void *src, void *dst, int len)

To be consistent with the location of the __wsum return value
when splitting the function definitions across multiple lines.

(like the below)

> @@ -88,42 +88,43 @@ static inline __wsum csum_shift(__wsum sum, int offset)
>  	return sum;
>  }
>  
> -static inline __wsum
> +static __always_inline __wsum
>  csum_block_add(__wsum csum, __wsum csum2, int offset)
>  {
>  	return csum_add(csum, csum_shift(csum2, offset));
>  }
>  
> -static inline __wsum
> +static __always_inline __wsum
>  csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
>  {
>  	return csum_block_add(csum, csum2, offset);
>  }
>  
> -static inline __wsum
> +static __always_inline __wsum
>  csum_block_sub(__wsum csum, __wsum csum2, int offset)
>  {
>  	return csum_block_add(csum, ~csum2, offset);
>  }
>  
> -static inline __wsum csum_unfold(__sum16 n)
> +static __always_inline __wsum csum_unfold(__sum16 n)
>  {
>  	return (__force __wsum)n;
>  }
>  

[]

> -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
> +static __always_inline
> +__wsum csum_partial_ext(const void *buff, int len, __wsum sum)
>  {
>  	return csum_partial(buff, len, sum);
>  }

And this __wsum could be moved too.

> @@ -150,15 +151,15 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
[]
> -static inline __wsum remcsum_adjust(void *ptr, __wsum csum,
> +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum,
>  				    int start, int offset)
>  {
>  	__sum16 *psum = (__sum16 *)(ptr + offset);

And this one could be split like the above

static __always_inline __wsum
remcsum_adjust(void *ptr, __wsum csum, int start, int offset)