Change the return types of bitops functions (ffs, fls, and fns) from
long to int. The expected return values are in the range [0, 64], for
which int is sufficient.
Additionally, int aligns well with the return types of the corresponding
__builtin_* functions, potentially reducing overall type conversions.
Many of the existing bitops functions already return an int and don't
need to be changed. The bitops functions in arch/ should be considered
separately.
Adjust some return variables to match the function return types.
With GCC 13 and defconfig, these changes reduced the size of a test
kernel image by 5,432 bytes on arm64 and by 248 bytes on riscv; there
were no changes in size on x86_64, powerpc, or m68k.
Signed-off-by: Thorsten Blum <[email protected]>
---
include/asm-generic/bitops/__ffs.h | 4 ++--
include/asm-generic/bitops/__fls.h | 4 ++--
include/asm-generic/bitops/builtin-__ffs.h | 2 +-
include/asm-generic/bitops/builtin-__fls.h | 2 +-
include/linux/bitops.h | 6 +++---
tools/include/asm-generic/bitops/__ffs.h | 4 ++--
tools/include/asm-generic/bitops/__fls.h | 4 ++--
tools/include/linux/bitops.h | 2 +-
8 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h
index 446fea6dda78..2d08c750c8a7 100644
--- a/include/asm-generic/bitops/__ffs.h
+++ b/include/asm-generic/bitops/__ffs.h
@@ -10,9 +10,9 @@
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long generic___ffs(unsigned long word)
+static __always_inline unsigned int generic___ffs(unsigned long word)
{
- int num = 0;
+ unsigned int num = 0;
#if BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
index 54ccccf96e21..e974ec932ec1 100644
--- a/include/asm-generic/bitops/__fls.h
+++ b/include/asm-generic/bitops/__fls.h
@@ -10,9 +10,9 @@
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long generic___fls(unsigned long word)
+static __always_inline unsigned int generic___fls(unsigned long word)
{
- int num = BITS_PER_LONG - 1;
+ unsigned int num = BITS_PER_LONG - 1;
#if BITS_PER_LONG == 64
if (!(word & (~0ul << 32))) {
diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h
index 87024da44d10..cf4b3d33bf96 100644
--- a/include/asm-generic/bitops/builtin-__ffs.h
+++ b/include/asm-generic/bitops/builtin-__ffs.h
@@ -8,7 +8,7 @@
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned int __ffs(unsigned long word)
{
return __builtin_ctzl(word);
}
diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h
index 43a5aa9afbdb..6d72fc8a5259 100644
--- a/include/asm-generic/bitops/builtin-__fls.h
+++ b/include/asm-generic/bitops/builtin-__fls.h
@@ -8,7 +8,7 @@
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __fls(unsigned long word)
+static __always_inline unsigned int __fls(unsigned long word)
{
return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
}
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2ba557e067fe..f60220f119e2 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -200,7 +200,7 @@ static __always_inline __s64 sign_extend64(__u64 value, int index)
return (__s64)(value << shift) >> shift;
}
-static inline unsigned fls_long(unsigned long l)
+static inline unsigned int fls_long(unsigned long l)
{
if (sizeof(l) == 4)
return fls(l);
@@ -236,7 +236,7 @@ static inline int get_count_order_long(unsigned long l)
* The result is not defined if no bits are set, so check that @word
* is non-zero before calling this.
*/
-static inline unsigned long __ffs64(u64 word)
+static inline unsigned int __ffs64(u64 word)
{
#if BITS_PER_LONG == 32
if (((u32)word) == 0UL)
@@ -252,7 +252,7 @@ static inline unsigned long __ffs64(u64 word)
* @word: The word to search
* @n: Bit to find
*/
-static inline unsigned long fns(unsigned long word, unsigned int n)
+static inline unsigned int fns(unsigned long word, unsigned int n)
{
unsigned int bit;
diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
index 9d1310519497..2d94c1e9b2f3 100644
--- a/tools/include/asm-generic/bitops/__ffs.h
+++ b/tools/include/asm-generic/bitops/__ffs.h
@@ -11,9 +11,9 @@
*
* Undefined if no bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long __ffs(unsigned long word)
+static __always_inline unsigned int __ffs(unsigned long word)
{
- int num = 0;
+ unsigned int num = 0;
#if __BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index 54ccccf96e21..e974ec932ec1 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -10,9 +10,9 @@
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned long generic___fls(unsigned long word)
+static __always_inline unsigned int generic___fls(unsigned long word)
{
- int num = BITS_PER_LONG - 1;
+ unsigned int num = BITS_PER_LONG - 1;
#if BITS_PER_LONG == 64
if (!(word & (~0ul << 32))) {
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index 7319f6ced108..8e073a111d2a 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -70,7 +70,7 @@ static inline unsigned long hweight_long(unsigned long w)
return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
}
-static inline unsigned fls_long(unsigned long l)
+static inline unsigned int fls_long(unsigned long l)
{
if (sizeof(l) == 4)
return fls(l);
--
2.39.2
> -----Original Message-----
> From: Thorsten Blum <[email protected]>
> Sent: Sunday, April 21, 2024 6:39 AM
> To: Arnd Bergmann <[email protected]>
> Cc: Wang, Xiao W <[email protected]>; Palmer Dabbelt
> <[email protected]>; Charlie Jenkins <[email protected]>; Namhyung
> Kim <[email protected]>; Huacai Chen <[email protected]>; Youling
> Tang <[email protected]>; Tiezhu Yang <[email protected]>;
> Jinyang He <[email protected]>; [email protected]; linux-
> [email protected]; Thorsten Blum <[email protected]>
> Subject: [PATCH] bitops: Change function return types from long to int
>
> Change the return types of bitops functions (ffs, fls, and fns) from
> long to int. The expected return values are in the range [0, 64], for
> which int is sufficient.
>
> Additionally, int aligns well with the return types of the corresponding
> __builtin_* functions, potentially reducing overall type conversions.
Could we change the return types to "int", instead of "unsigned int"?
https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html says that these __builtin_*
functions return "int".
>
> Many of the existing bitops functions already return an int and don't
> need to be changed. The bitops functions in arch/ should be considered
> separately.
>
> Adjust some return variables to match the function return types.
>
> With GCC 13 and defconfig, these changes reduced the size of a test
> kernel image by 5,432 bytes on arm64 and by 248 bytes on riscv; there
> were no changes in size on x86_64, powerpc, or m68k.
I guess your test is based on 64bit arch, right?
BRs,
Xiao
On 4/21/2024 12:38 AM, Thorsten Blum wrote:
> Change the return types of bitops functions (ffs, fls, and fns) from
> long to int. The expected return values are in the range [0, 64], for
> which int is sufficient.
>
> Additionally, int aligns well with the return types of the corresponding
> __builtin_* functions, potentially reducing overall type conversions.
>
> Many of the existing bitops functions already return an int and don't
> need to be changed. The bitops functions in arch/ should be considered
> separately.
>
> Adjust some return variables to match the function return types.
>
> With GCC 13 and defconfig, these changes reduced the size of a test
> kernel image by 5,432 bytes on arm64 and by 248 bytes on riscv; there
> were no changes in size on x86_64, powerpc, or m68k.
>
> Signed-off-by: Thorsten Blum <[email protected]>
> ---
> include/asm-generic/bitops/__ffs.h | 4 ++--
> include/asm-generic/bitops/__fls.h | 4 ++--
> include/asm-generic/bitops/builtin-__ffs.h | 2 +-
> include/asm-generic/bitops/builtin-__fls.h | 2 +-
> include/linux/bitops.h | 6 +++---
> tools/include/asm-generic/bitops/__ffs.h | 4 ++--
> tools/include/asm-generic/bitops/__fls.h | 4 ++--
> tools/include/linux/bitops.h | 2 +-
> 8 files changed, 14 insertions(+), 14 deletions(-)
I don't mind the idea, but in the past I've send some patches trying to
align some arch specific implementations with asm-generic ones. Now you
are changing only asm-generic implementation and leaving arch specific
ones untouched (that's probably why you see no size change on some of them).
For example on x86, there is:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/bitops.h?id=ed30a4a51bb196781c8058073ea720133a65596f#n293
and you probably need to check all architectures for those implementations.
Thanks,
Amadeusz
On 22. Apr 2024, at 07:24, Wang, Xiao W <[email protected]> wrote:
>
> Could we change the return types to "int", instead of "unsigned int"?
> https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html says that these __builtin_*
> functions return "int".
We could, but changing the signedness breaks assertions in other modules and
drivers (e.g., where min() and max() are used) and has quite a few side effects.
>> With GCC 13 and defconfig, these changes reduced the size of a test
>> kernel image by 5,432 bytes on arm64 and by 248 bytes on riscv; there
>> were no changes in size on x86_64, powerpc, or m68k.
>
> I guess your test is based on 64bit arch, right?
Yes, except for m68k.
On Mon, Apr 22, 2024, at 16:30, Thorsten Blum wrote:
> On 22. Apr 2024, at 09:44, Amadeusz Sławiński
> <[email protected]> wrote:
>>
>> I don't mind the idea, but in the past I've send some patches trying to align some arch specific implementations with asm-generic ones. Now you are changing only asm-generic implementation and leaving arch specific ones untouched (that's probably why you see no size change on some of them).
>
> I would submit architecture-specific changes in another patch set to keep it
> simple and to be able to review each arch separately.
I can generally merge such a series with architecture specific
changes through the asm-generic tree, with the appropriate Acks
from the maintainers.
Arnd
On 22. Apr 2024, at 09:44, Amadeusz Sławiński <[email protected]> wrote:
>
> I don't mind the idea, but in the past I've send some patches trying to align some arch specific implementations with asm-generic ones. Now you are changing only asm-generic implementation and leaving arch specific ones untouched (that's probably why you see no size change on some of them).
I would submit architecture-specific changes in another patch set to keep it
simple and to be able to review each arch separately.
On 22. Apr 2024, at 17:55, Arnd Bergmann <[email protected]> wrote:
> On Mon, Apr 22, 2024, at 16:30, Thorsten Blum wrote:
>> On 22. Apr 2024, at 09:44, Amadeusz Sławiński
>> <[email protected]> wrote:
>>>
>>> I don't mind the idea, but in the past I've send some patches trying to align some arch specific implementations with asm-generic ones. Now you are changing only asm-generic implementation and leaving arch specific ones untouched (that's probably why you see no size change on some of them).
>>
>> I would submit architecture-specific changes in another patch set to keep it
>> simple and to be able to review each arch separately.
>
> I can generally merge such a series with architecture specific
> changes through the asm-generic tree, with the appropriate Acks
> from the maintainers.
Ok.
I would still prefer to keep this patch free from arch-specific changes, if
possible. The patch improves architectures that use the generic bitops
functions (e.g., arm64) and doesn't impact any arch-specific implementations,
unless I'm missing something.
Thanks,
Thorsten
On 22. Apr 2024, at 17:55, Arnd Bergmann <[email protected]> wrote:
>>
>> I can generally merge such a series with architecture specific
>> changes through the asm-generic tree, with the appropriate Acks
>> from the maintainers.
What would it take for this patch (with only generic type changes) to be
applied?
I did some further investigations and disassembled my test kernel images.
The patch reduced the number of ARM instructions by 872 with GCC 13 and by
2,354 with GCC 14. Other architectures that rely on the generic bitops
functions will most likely also benefit from this patch.
Tests were done with base commit 9d1ddab261f3e2af7c384dc02238784ce0cf9f98.
Thanks,
Thorsten
On Fri, May 3, 2024, at 16:49, Thorsten Blum wrote:
> On 22. Apr 2024, at 17:55, Arnd Bergmann <[email protected]> wrote:
>>>
>>> I can generally merge such a series with architecture specific
>>> changes through the asm-generic tree, with the appropriate Acks
>>> from the maintainers.
>
> What would it take for this patch (with only generic type changes) to be
> applied?
>
> I did some further investigations and disassembled my test kernel images.
> The patch reduced the number of ARM instructions by 872 with GCC 13 and by
> 2,354 with GCC 14. Other architectures that rely on the generic bitops
> functions will most likely also benefit from this patch.
>
> Tests were done with base commit 9d1ddab261f3e2af7c384dc02238784ce0cf9f98.
Sorry for failing to follow up earlier, I think this is ok, thanks
for your thorough work on this. I've applied it to the asm-generic
tree now.
Arnd