From: Markus Elfring <[email protected]>
Date: Wed, 9 Oct 2019 13:53:59 +0200
Several functions return values with which useful data processing
should be performed. These values must not be ignored then.
Thus use the annotation “__must_check” in the shown function declarations.
Add also corresponding parameter names for adjusted functions.
Signed-off-by: Markus Elfring <[email protected]>
---
include/linux/string.h | 75 ++++++++++++++++++++++--------------------
1 file changed, 40 insertions(+), 35 deletions(-)
diff --git a/include/linux/string.h b/include/linux/string.h
index 3cf684db4bc6..5cece3a91434 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -9,10 +9,10 @@
#include <stdarg.h>
#include <uapi/linux/string.h>
-extern char *strndup_user(const char __user *, long);
-extern void *memdup_user(const void __user *, size_t);
-extern void *vmemdup_user(const void __user *, size_t);
-extern void *memdup_user_nul(const void __user *, size_t);
+extern char * __must_check strndup_user(const char __user *src, long n);
+extern void * __must_check memdup_user(const void __user *src, size_t len);
+extern void * __must_check vmemdup_user(const void __user *src, size_t len);
+extern void * __must_check memdup_user_nul(const void __user *src, size_t len);
/*
* Include machine specific inline routines
@@ -90,28 +90,29 @@ extern char * strncat(char *, const char *, __kernel_size_t);
extern size_t strlcat(char *, const char *, __kernel_size_t);
#endif
#ifndef __HAVE_ARCH_STRCMP
-extern int strcmp(const char *,const char *);
+extern int __must_check strcmp(const char *s1, const char *s2);
#endif
#ifndef __HAVE_ARCH_STRNCMP
-extern int strncmp(const char *,const char *,__kernel_size_t);
+extern int __must_check strncmp(const char *s1, const char *s2,
+ __kernel_size_t n);
#endif
#ifndef __HAVE_ARCH_STRCASECMP
-extern int strcasecmp(const char *s1, const char *s2);
+extern int __must_check strcasecmp(const char *s1, const char *s2);
#endif
#ifndef __HAVE_ARCH_STRNCASECMP
-extern int strncasecmp(const char *s1, const char *s2, size_t n);
+extern int __must_check strncasecmp(const char *s1, const char *s2, size_t n);
#endif
#ifndef __HAVE_ARCH_STRCHR
-extern char * strchr(const char *,int);
+extern char * __must_check strchr(const char *s, int c);
#endif
#ifndef __HAVE_ARCH_STRCHRNUL
-extern char * strchrnul(const char *,int);
+extern char * __must_check strchrnul(const char *s, int c);
#endif
#ifndef __HAVE_ARCH_STRNCHR
-extern char * strnchr(const char *, size_t, int);
+extern char * __must_check strnchr(const char *s, size_t n, int c);
#endif
#ifndef __HAVE_ARCH_STRRCHR
-extern char * strrchr(const char *,int);
+extern char * __must_check strrchr(const char *s, int c);
#endif
extern char * __must_check skip_spaces(const char *);
@@ -123,10 +124,10 @@ static inline __must_check char *strstrip(char *str)
}
#ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *, const char *);
+extern char * __must_check strstr(const char *s1, const char *s2);
#endif
#ifndef __HAVE_ARCH_STRNSTR
-extern char * strnstr(const char *, const char *, size_t);
+extern char * __must_check strnstr(const char *s1, const char *s2, size_t n);
#endif
#ifndef __HAVE_ARCH_STRLEN
extern __kernel_size_t strlen(const char *);
@@ -135,16 +136,16 @@ extern __kernel_size_t strlen(const char *);
extern __kernel_size_t strnlen(const char *,__kernel_size_t);
#endif
#ifndef __HAVE_ARCH_STRPBRK
-extern char * strpbrk(const char *,const char *);
+extern char * __must_check strpbrk(const char *s, const char *c);
#endif
#ifndef __HAVE_ARCH_STRSEP
extern char * strsep(char **,const char *);
#endif
#ifndef __HAVE_ARCH_STRSPN
-extern __kernel_size_t strspn(const char *,const char *);
+extern __kernel_size_t __must_check strspn(const char *s, const char *a);
#endif
#ifndef __HAVE_ARCH_STRCSPN
-extern __kernel_size_t strcspn(const char *,const char *);
+extern __kernel_size_t __must_check strcspn(const char *s, const char *r);
#endif
#ifndef __HAVE_ARCH_MEMSET
@@ -197,13 +198,13 @@ extern void * memmove(void *,const void *,__kernel_size_t);
extern void * memscan(void *,int,__kernel_size_t);
#endif
#ifndef __HAVE_ARCH_MEMCMP
-extern int memcmp(const void *,const void *,__kernel_size_t);
+extern int __must_check memcmp(const void *a, const void *b, __kernel_size_t n);
#endif
#ifndef __HAVE_ARCH_BCMP
-extern int bcmp(const void *,const void *,__kernel_size_t);
+extern int __must_check bcmp(const void *a, const void *b, __kernel_size_t n);
#endif
#ifndef __HAVE_ARCH_MEMCHR
-extern void * memchr(const void *,int,__kernel_size_t);
+extern void * __must_check memchr(const void *s, int c, __kernel_size_t n);
#endif
#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
static inline __must_check unsigned long memcpy_mcsafe(void *dst,
@@ -219,29 +220,31 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
memcpy(dst, src, cnt);
}
#endif
-void *memchr_inv(const void *s, int c, size_t n);
+void * __must_check memchr_inv(const void *s, int c, size_t n);
char *strreplace(char *s, char old, char new);
extern void kfree_const(const void *x);
-extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
-extern const char *kstrdup_const(const char *s, gfp_t gfp);
-extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
-extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
+extern char * __must_check kstrdup(const char *s, gfp_t gfp) __malloc;
+extern const char * __must_check kstrdup_const(const char *s, gfp_t gfp);
+extern char * __must_check kstrndup(const char *s, size_t len, gfp_t gfp);
+extern void * __must_check kmemdup(const void *src, size_t len, gfp_t gfp);
+extern char * __must_check kmemdup_nul(const char *s, size_t len, gfp_t gfp);
-extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
+extern char ** __must_check argv_split(gfp_t gfp, const char *str, int *argcp);
extern void argv_free(char **argv);
-extern bool sysfs_streq(const char *s1, const char *s2);
-extern int kstrtobool(const char *s, bool *res);
-static inline int strtobool(const char *s, bool *res)
+extern bool __must_check sysfs_streq(const char *s1, const char *s2);
+extern int __must_check kstrtobool(const char *s, bool *res);
+static inline int __must_check strtobool(const char *s, bool *res)
{
return kstrtobool(s, res);
}
-int match_string(const char * const *array, size_t n, const char *string);
-int __sysfs_match_string(const char * const *array, size_t n, const char *s);
+int __must_check match_string(const char * const *array,
+ size_t n, const char *string);
+int __must_check __sysfs_match_string(const char * const *array,
+ size_t n, const char *s);
/**
* sysfs_match_string - matches given string in an array
@@ -258,8 +261,10 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
#endif
-extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
- const void *from, size_t available);
+extern ssize_t __must_check memory_read_from_buffer(void *to, size_t count,
+ loff_t *ppos,
+ const void *from,
+ size_t available);
/**
* strstarts - does @str start with @prefix?
@@ -271,7 +276,7 @@ static inline bool strstarts(const char *str, const char *prefix)
return strncmp(str, prefix, strlen(prefix)) == 0;
}
-size_t memweight(const void *ptr, size_t bytes);
+size_t __must_check memweight(const void *ptr, size_t bytes);
void memzero_explicit(void *s, size_t count);
/**
--
2.23.0
On 09/10/2019 14.14, Markus Elfring wrote:
> From: Markus Elfring <[email protected]>
> Date: Wed, 9 Oct 2019 13:53:59 +0200
>
> Several functions return values with which useful data processing
> should be performed. These values must not be ignored then.
> Thus use the annotation “__must_check” in the shown function declarations.
This _might_ make sense for those that are basically kmalloc() wrappers
in one way or another [1]. But what's the point of annotating pure
functions such as strchr, strstr, memchr etc? Nobody is calling those
for their side effects (they don't have any...), so obviously the return
value is used. If somebody does a strcmp() without using the result, so
what? OK, it's odd code that might be worth flagging, but I don't think
that's the kind of thing one accidentally adds. You're also not
consistent - strlen() is not annotated. And, for the standard C
functions, -Wall already seems to warn about an unused call:
#include <string.h>
int f(const char *s)
{
strlen(s);
return 3;
}
$ gcc -Wall -o a.o -c a.c
a.c: In function ‘f’:
a.c:5:2: warning: statement with no effect [-Wunused-value]
strlen(s);
^~~~~~~~~
[1] Just might. The problem is the __must_check does not mean that the
return value must be followed by a comparison to NULL and bailing out
(that can't really be checked), it simply ensures the return value is
assigned somewhere or used in an if(). So foo->bar = kstrdup() not
followed by a check of foo->bar won't warn. So one would essentially
only catch instant-leaks. __must_check is much better suited for
functions that mutate a passed-in or global object, e.g.
start_engine(engine).
Rasmus
[ I haven't reviewed the original patch ]
On Wed, Oct 09, 2019 at 03:26:18PM +0200, Rasmus Villemoes wrote:
> On 09/10/2019 14.14, Markus Elfring wrote:
> > From: Markus Elfring <[email protected]>
> > Date: Wed, 9 Oct 2019 13:53:59 +0200
> >
> > Several functions return values with which useful data processing
> > should be performed. These values must not be ignored then.
> > Thus use the annotation “__must_check” in the shown function declarations.
>
> This _might_ make sense for those that are basically kmalloc() wrappers
> in one way or another [1]. But what's the point of annotating pure
> functions such as strchr, strstr, memchr etc? Nobody is calling those
> for their side effects (they don't have any...), so obviously the return
> value is used. If somebody does a strcmp() without using the result, so
> what? OK, it's odd code that might be worth flagging, but I don't think
> that's the kind of thing one accidentally adds.
if (ret) {
-EINVAL;
}
People do occasionally make mistakes like this. It can't hurt to
warn them as early as possible about nonsense code.
> You're also not consistent - strlen() is not annotated. And, for the
> standard C functions, -Wall already seems to warn about an unused
> call:
>
> #include <string.h>
> int f(const char *s)
> {
> strlen(s);
> return 3;
> }
> $ gcc -Wall -o a.o -c a.c
> a.c: In function ‘f’:
> a.c:5:2: warning: statement with no effect [-Wunused-value]
> strlen(s);
> ^~~~~~~~~
That's because glibc strlen is annotated with __attribute_pure__ which
means it has no side effects.
regards,
dan carpenter
On 09/10/2019 15.56, Dan Carpenter wrote:
> [ I haven't reviewed the original patch ]
>
> On Wed, Oct 09, 2019 at 03:26:18PM +0200, Rasmus Villemoes wrote:
>> On 09/10/2019 14.14, Markus Elfring wrote:
>>> From: Markus Elfring <[email protected]>
>>> Date: Wed, 9 Oct 2019 13:53:59 +0200
>>>
>>> Several functions return values with which useful data processing
>>> should be performed. These values must not be ignored then.
>>> Thus use the annotation “__must_check” in the shown function declarations.
>>
>> This _might_ make sense for those that are basically kmalloc() wrappers
>> in one way or another [1]. But what's the point of annotating pure
>> functions such as strchr, strstr, memchr etc? Nobody is calling those
>> for their side effects (they don't have any...), so obviously the return
>> value is used. If somebody does a strcmp() without using the result, so
>> what? OK, it's odd code that might be worth flagging, but I don't think
>> that's the kind of thing one accidentally adds.
>
>
> if (ret) {
> -EINVAL;
> }
>
> People do occasionally make mistakes like this. It can't hurt to
> warn them as early as possible about nonsense code.
In that case, ret (which I guess comes from one of these functions) is
indeed used. And gcc should already complain about that "statement with
no effect" for the -EINVAL; line. So I don't see how adding these
annotations would change anything.
>> And, for the
>> standard C functions, -Wall already seems to warn about an unused
>> call:
>>
>> #include <string.h>
>> int f(const char *s)
>> {
>> strlen(s);
>> return 3;
>> }
>> $ gcc -Wall -o a.o -c a.c
>> a.c: In function ‘f’:
>> a.c:5:2: warning: statement with no effect [-Wunused-value]
>> strlen(s);
>> ^~~~~~~~~
>
> That's because glibc strlen is annotated with __attribute_pure__ which
> means it has no side effects.
I know, except it has nothing to do with glibc headers. Just try the
same thing in the kernel. gcc itself knows this about __builtin_strlen()
etc. If anything, we could annotate some of our non-standard functions
(say, memchr_inv) with __pure - then we'd both get the Wunused-value in
the nonsense cases, and allow gcc to optimize or reorder the calls.
Rasmus
On Wed, Oct 09, 2019 at 04:21:20PM +0200, Rasmus Villemoes wrote:
> On 09/10/2019 15.56, Dan Carpenter wrote:
> > That's because glibc strlen is annotated with __attribute_pure__ which
> > means it has no side effects.
>
> I know, except it has nothing to do with glibc headers. Just try the
> same thing in the kernel. gcc itself knows this about __builtin_strlen()
> etc. If anything, we could annotate some of our non-standard functions
> (say, memchr_inv) with __pure - then we'd both get the Wunused-value in
> the nonsense cases, and allow gcc to optimize or reorder the calls.
Huh. You're right. GCC already knows. So this patch is pointless like
you say.
regards,
dan carpenter
On Wed, 9 Oct 2019 14:14:28 +0200
Markus Elfring <[email protected]> wrote:
> From: Markus Elfring <[email protected]>
> Date: Wed, 9 Oct 2019 13:53:59 +0200
>
> Several functions return values with which useful data processing
> should be performed. These values must not be ignored then.
> Thus use the annotation “__must_check” in the shown function declarations.
>
> Add also corresponding parameter names for adjusted functions.
>
> Signed-off-by: Markus Elfring <[email protected]>
>
I'm curious. How many warnings showed up when you applied this patch?
-- Steve
On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 9 Oct 2019 14:14:28 +0200
> Markus Elfring <[email protected]> wrote:
>
> > From: Markus Elfring <[email protected]>
> > Date: Wed, 9 Oct 2019 13:53:59 +0200
> >
> > Several functions return values with which useful data processing
> > should be performed. These values must not be ignored then.
> > Thus use the annotation “__must_check” in the shown function declarations.
> >
> > Add also corresponding parameter names for adjusted functions.
> >
> > Signed-off-by: Markus Elfring <[email protected]>
> >
>
> I'm curious. How many warnings showed up when you applied this patch?
I got zero for x86_64 and arm64 defconfig builds of linux-next with
this applied. Hopefully that's not an argument against the more
liberal application of it? I view __must_check as a good thing, and
encourage its application, unless someone can show that a certain
function would be useful to call without it.
--
Thanks,
~Nick Desaulniers
On Wed, 9 Oct 2019 09:13:17 -0700
Nick Desaulniers <[email protected]> wrote:
> On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
> >
> > On Wed, 9 Oct 2019 14:14:28 +0200
> > Markus Elfring <[email protected]> wrote:
> >
> > > From: Markus Elfring <[email protected]>
> > > Date: Wed, 9 Oct 2019 13:53:59 +0200
> > >
> > > Several functions return values with which useful data processing
> > > should be performed. These values must not be ignored then.
> > > Thus use the annotation “__must_check” in the shown function declarations.
> > >
> > > Add also corresponding parameter names for adjusted functions.
> > >
> > > Signed-off-by: Markus Elfring <[email protected]>
> > >
> >
> > I'm curious. How many warnings showed up when you applied this patch?
>
> I got zero for x86_64 and arm64 defconfig builds of linux-next with
> this applied. Hopefully that's not an argument against the more
> liberal application of it? I view __must_check as a good thing, and
> encourage its application, unless someone can show that a certain
> function would be useful to call without it.
Not at all, I was just curious, because I would have expected patches
to fix possible bugs with it.
-- Steve
On Wed, Oct 9, 2019 at 7:30 AM Dan Carpenter <[email protected]> wrote:
>
> On Wed, Oct 09, 2019 at 04:21:20PM +0200, Rasmus Villemoes wrote:
> > On 09/10/2019 15.56, Dan Carpenter wrote:
> > > That's because glibc strlen is annotated with __attribute_pure__ which
> > > means it has no side effects.
> >
> > I know, except it has nothing to do with glibc headers. Just try the
> > same thing in the kernel. gcc itself knows this about __builtin_strlen()
> > etc. If anything, we could annotate some of our non-standard functions
> > (say, memchr_inv) with __pure - then we'd both get the Wunused-value in
> > the nonsense cases, and allow gcc to optimize or reorder the calls.
>
> Huh. You're right. GCC already knows. So this patch is pointless like
> you say.
Is it? None of the functions in include/linux/string.h are currently
marked __pure today. (Side note, I'm surprised that any function that
accepts a pointer could be considered pure. I could reassign pointed
to value without changing the pointers value. I can see strlen being
"pure" for string literals, but not for char[]. This is something
I'll play with more, I've already spotted one missed optimization in
LLVM: https://bugs.llvm.org/show_bug.cgi?id=43624).
I think it would be an interesting study to see how often functions
that have return codes are ok to not check vs aren't ok (in a large
production codebase like the Linux kernel), similar to how 97% of
cases fallthrough is unintentional (which to me sounds like maybe the
default behavior of the language is incorrect).
--
Thanks,
~Nick Desaulniers
On Wed, Oct 9, 2019 at 6:26 AM Rasmus Villemoes
<[email protected]> wrote:
>
> On 09/10/2019 14.14, Markus Elfring wrote:
> > From: Markus Elfring <[email protected]>
> > Date: Wed, 9 Oct 2019 13:53:59 +0200
> >
> > Several functions return values with which useful data processing
> > should be performed. These values must not be ignored then.
> > Thus use the annotation “__must_check” in the shown function declarations.
>
> This _might_ make sense for those that are basically kmalloc() wrappers
> in one way or another [1]. But what's the point of annotating pure
> functions such as strchr, strstr, memchr etc? Nobody is calling those
> for their side effects (they don't have any...), so obviously the return
> value is used. If somebody does a strcmp() without using the result, so
> what? OK, it's odd code that might be worth flagging, but I don't think
> that's the kind of thing one accidentally adds. You're also not
Just seeing the amount of trivial errors that folks push that 0day bot
spots, I don't think this would hurt. "No true Scotsman" writes C
code without properly checking their return types (today), but if
anything it would help cut down on silly trivial mistakes before they
reach code review (assuming the code was compile tested before sent,
which a lot of it is not, as per the many many many 0day bot emails I
ignore because it's obvious folks didn't even try compiling their
code).
> consistent - strlen() is not annotated. And, for the standard C
> functions, -Wall already seems to warn about an unused call:
>
> #include <string.h>
> int f(const char *s)
> {
> strlen(s);
> return 3;
> }
> $ gcc -Wall -o a.o -c a.c
> a.c: In function ‘f’:
> a.c:5:2: warning: statement with no effect [-Wunused-value]
> strlen(s);
> ^~~~~~~~~
>
> [1] Just might. The problem is the __must_check does not mean that the
> return value must be followed by a comparison to NULL and bailing out
> (that can't really be checked), it simply ensures the return value is
> assigned somewhere or used in an if(). So foo->bar = kstrdup() not
Which is better than nothing, IMO.
> followed by a check of foo->bar won't warn. So one would essentially
> only catch instant-leaks. __must_check is much better suited for
> functions that mutate a passed-in or global object, e.g.
> start_engine(engine).
>
> Rasmus
--
Thanks,
~Nick Desaulniers
On Wed, 2019-10-09 at 09:13 -0700, Nick Desaulniers wrote:
> On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
> > On Wed, 9 Oct 2019 14:14:28 +0200 Markus Elfring <[email protected]> wrote:
[]
> > > Several functions return values with which useful data processing
> > > should be performed. These values must not be ignored then.
> > > Thus use the annotation “__must_check” in the shown function declarations.
[]
> > I'm curious. How many warnings showed up when you applied this patch?
>
> I got zero for x86_64 and arm64 defconfig builds of linux-next with
> this applied. Hopefully that's not an argument against the more
> liberal application of it? I view __must_check as a good thing, and
> encourage its application, unless someone can show that a certain
> function would be useful to call without it.
stylistic trivia, neither agreeing nor disagreeing with the patch
as I generally avoid reading Markus' patches.
I believe __must_check is best placed before the return type as
that makes grep for function return type easier to parse.
i.e. prefer
[static inline] __must_check <type> <function>(<args...>);
over
[static inline] <type> __must_check <function>(<args...>);
On Wed, Oct 9, 2019 at 9:27 AM Steven Rostedt <[email protected]> wrote:
>
> On Wed, 9 Oct 2019 09:13:17 -0700
> Nick Desaulniers <[email protected]> wrote:
>
> > On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
> > >
> > > I'm curious. How many warnings showed up when you applied this patch?
> >
> > I got zero for x86_64 and arm64 defconfig builds of linux-next with
> > this applied. Hopefully that's not an argument against the more
> > liberal application of it? I view __must_check as a good thing, and
> > encourage its application, unless someone can show that a certain
> > function would be useful to call without it.
>
> Not at all, I was just curious, because I would have expected patches
> to fix possible bugs with it.
Ah, granted, I was surprised, too. Maybe would be helpful to mention
that in the commit message.
--
Thanks,
~Nick Desaulniers
> You're also not consistent - strlen() is not annotated.
Would you like to integrate any additional function annotations?
> And, for the standard C functions, -Wall already seems to warn about
> an unused call:
This detail is nice, isn't it?
> a.c:5:2: warning: statement with no effect [-Wunused-value]
> strlen(s);
Are there any differences to consider for the Linux function variant?
> The problem is the __must_check does not mean that the
> return value must be followed by a comparison to NULL and bailing out
> (that can't really be checked), it simply ensures the return value is
> assigned somewhere or used in an if(). So foo->bar = kstrdup() not
> followed by a check of foo->bar won't warn. So one would essentially
> only catch instant-leaks.
How do you think about to improve the source code analysis support
any further?
Regards,
Markus
> Ah, granted, I was surprised, too.
Thanks for this view.
> Maybe would be helpful to mention that in the commit message.
My Linux software build resources might be too limited to take
more system configuration variations safely into account
for this issue.
Would you like to achieve further checks here?
Regards,
Markus
On Wed, Oct 9, 2019 at 9:38 AM Joe Perches <[email protected]> wrote:
>
> On Wed, 2019-10-09 at 09:13 -0700, Nick Desaulniers wrote:
> > On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
> > > On Wed, 9 Oct 2019 14:14:28 +0200 Markus Elfring <[email protected]> wrote:
> []
> > > > Several functions return values with which useful data processing
> > > > should be performed. These values must not be ignored then.
> > > > Thus use the annotation “__must_check” in the shown function declarations.
> []
> > > I'm curious. How many warnings showed up when you applied this patch?
> >
> > I got zero for x86_64 and arm64 defconfig builds of linux-next with
> > this applied. Hopefully that's not an argument against the more
> > liberal application of it? I view __must_check as a good thing, and
> > encourage its application, unless someone can show that a certain
> > function would be useful to call without it.
>
> stylistic trivia, neither agreeing nor disagreeing with the patch
> as I generally avoid reading Markus' patches.
>
> I believe __must_check is best placed before the return type as
> that makes grep for function return type easier to parse.
>
> i.e. prefer
> [static inline] __must_check <type> <function>(<args...>);
> over
> [static inline] <type> __must_check <function>(<args...>);
>
+ Miguel
So I just checked `__cold`, and `__cold` is all over the board in
style. I see it:
1. before anything fs/btrfs/super.c#L101
2. after static before return type (what you recommend) fs/btrfs/super.c#L2318
3. after return type fs/btrfs/inode.c#L9426
Can we pick a style and enforce it via checkpatch? (It's probably not
fun to check for each function attribute in
include/linux/compiler_attributes.h).
--
Thanks,
~Nick Desaulniers
On Wed, Oct 9, 2019 at 10:04 AM Markus Elfring <[email protected]> wrote:
>
> > Ah, granted, I was surprised, too.
>
> Thanks for this view.
I mean, it's a good thing that we don't have any issues that this
patch would catch today. Seems Steven and I were surprised
(pessimistic?).
>
>
> > Maybe would be helpful to mention that in the commit message.
>
> My Linux software build resources might be too limited to take
> more system configuration variations safely into account
> for this issue.
That's understandable. I think if the patch bakes in linux-next, it
might flush out some problematic cases in other ARCH's.
> Would you like to achieve further checks here?
I reviewed the functions here and believe the ones you added checks
for all look good. I value Rasmus' feedback, so I'd like to hear what
he thinks about my earlier comments. I have no comment if we should
go further/annotate more, other than that that can be done in a follow
up patch. Though Joe's comment on the relative order of where the
annotation appears in the function declarations should be addressed in
a V2 IMO.
--
Thanks,
~Nick Desaulniers
> I reviewed the functions here and believe the ones you added checks
> for all look good.
Thanks for your positive feedback.
> Though Joe's comment on the relative order of where the
> annotation appears in the function declarations should be addressed in
> a V2 IMO.
Would you be looking for a subsequent change also by the means of
the semantic patch language with which the (function) attributes
can be adjusted to the ordering that you would prefer finally?
Regards,
Markus
On Wed, Oct 09, 2019 at 09:31:41AM -0700, Nick Desaulniers wrote:
> On Wed, Oct 9, 2019 at 7:30 AM Dan Carpenter <[email protected]> wrote:
> >
> > On Wed, Oct 09, 2019 at 04:21:20PM +0200, Rasmus Villemoes wrote:
> > > On 09/10/2019 15.56, Dan Carpenter wrote:
> > > > That's because glibc strlen is annotated with __attribute_pure__ which
> > > > means it has no side effects.
> > >
> > > I know, except it has nothing to do with glibc headers. Just try the
> > > same thing in the kernel. gcc itself knows this about __builtin_strlen()
> > > etc. If anything, we could annotate some of our non-standard functions
> > > (say, memchr_inv) with __pure - then we'd both get the Wunused-value in
> > > the nonsense cases, and allow gcc to optimize or reorder the calls.
> >
> > Huh. You're right. GCC already knows. So this patch is pointless like
> > you say.
>
> Is it? None of the functions in include/linux/string.h are currently
> marked __pure today.
I've already embarrassed myself with my ignorance once so I may as well
keep talking now... GCC did complain about the unused result even
though we don't declare them as __pure. So GCC rule must have this rule
built in.
We were discussing in a different thread that standard says that
memcpy() pointers can't be NULL (even when we're copying zero bytes) so
GCC will assume that's true. If you have:
memcpy(foo, bar, 0);
if (foo)
*foo = 0;
GCC will sometimes remove the condition. This doesn't affect the kernel
because we use -fno-delete-null-pointer-checks.
https://groups.google.com/forum/#!msg/syzkaller-netbsd-bugs/8B4CIKN0Xz8/wRvIUWxiAgAJ
Weird, huh?
regards,
dan carpenter
> I'm curious. How many warnings showed up when you applied this patch?
I suggest to take another look at six places in a specific source file
(for example).
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/test_kasan.c?id=b92a953cb7f727c42a15ac2ea59bf3cf9c39370d#n595
https://elixir.bootlin.com/linux/v5.4-rc2/source/lib/test_kasan.c#L595
Regards,
Markus
On Wed, Oct 9, 2019 at 11:11 PM Markus Elfring <[email protected]> wrote:
>
> > I'm curious. How many warnings showed up when you applied this patch?
>
> I suggest to take another look at six places in a specific source file
> (for example).
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/test_kasan.c?id=b92a953cb7f727c42a15ac2ea59bf3cf9c39370d#n595
The *test* word must have given you a clue that the code you a looking
at is not an ordinary one.
--
With Best Regards,
Andy Shevchenko
On 09/10/2019 18.31, Nick Desaulniers wrote:
> On Wed, Oct 9, 2019 at 7:30 AM Dan Carpenter <[email protected]> wrote:
>>
>> On Wed, Oct 09, 2019 at 04:21:20PM +0200, Rasmus Villemoes wrote:
>>> On 09/10/2019 15.56, Dan Carpenter wrote:
>>>> That's because glibc strlen is annotated with __attribute_pure__ which
>>>> means it has no side effects.
>>>
>>> I know, except it has nothing to do with glibc headers. Just try the
>>> same thing in the kernel. gcc itself knows this about __builtin_strlen()
>>> etc. If anything, we could annotate some of our non-standard functions
>>> (say, memchr_inv) with __pure - then we'd both get the Wunused-value in
>>> the nonsense cases, and allow gcc to optimize or reorder the calls.
>>
>> Huh. You're right. GCC already knows. So this patch is pointless like
>> you say.
>
> Is it? None of the functions in include/linux/string.h are currently
> marked __pure today.
As I said, gcc knows this about the standard C functions, the ones it
has a __builtin_* version of. So it doesn't matter if the user adds a
__pure annotation or not (be it in libc or kernel headers). For example,
here's a line from gcc's builtins.def
DEF_LIB_BUILTIN (BUILT_IN_STRCMP, "strcmp",
BT_FN_INT_CONST_STRING_CONST_STRING, ATTR_PURE_NOTHROW_NONNULL_LEAF)
so gcc knows that __builtin_strcmp is not just pure, but its argument
can also be assumed to be non-NULL, etc. (OK, the kernel disables the
use of that knowledge, but this is irrelevant to this discussion).
In constrast, e.g. abs and most libm functions is
DEF_LIB_BUILTIN (BUILT_IN_ABS, "abs", BT_FN_INT_INT,
ATTR_CONST_NOTHROW_LEAF_LIST)
i.e. "attribute(__const__)" (se below).
(Side note, I'm surprised that any function that
> accepts a pointer could be considered pure. I could reassign pointed
> to value without changing the pointers value.
Probably depends on your definition of pure. What matters is gcc's, which is
'const'
Many functions do not examine any values except their arguments,
and have no effects except the return value. Basically this is
just slightly more strict class than the 'pure' attribute below,
since function is not allowed to read global memory.
Note that a function that has pointer arguments and examines the
data pointed to must _not_ be declared 'const'. Likewise, a
function that calls a non-'const' function usually must not be
'const'. It does not make sense for a 'const' function to return
'void'.
'pure'
Many functions have no effects except the return value and their
return value depends only on the parameters and/or global
variables. Such a function can be subject to common subexpression
elimination and loop optimization just as an arithmetic operator
would be. These functions should be declared with the attribute
'pure'. For example,
int square (int) __attribute__ ((pure));
says that the hypothetical function 'square' is safe to call fewer
times than the program says.
Some common examples of pure functions are 'strlen' or 'memcmp'.
Interesting non-pure functions are functions with infinite loops or
those depending on volatile memory or other system resource, that
may change between two consecutive calls (such as 'feof' in a
multithreading environment).
And yes, gcc knows perfectly well that more or less any write to memory
(except in the very few cases where it can prove no aliasing)
"invalidates" the result of a pure function. In some very simple cases,
that means it can hoist a strlen() call out of a badly written loop like
for (i = 0; i < strlen(s); ++i)
but the loop body can't be very complicated before that fails. Example:
https://godbolt.org/z/f0PPRt . I gave up trying to decipher what clang
produced.
(BTW, the pure example in the docs is bad, because that function is
likely to be const and not just pure.)
I can see strlen being
> "pure" for string literals, but not for char[]. This is something
> I'll play with more, I've already spotted one missed optimization in
> LLVM: https://bugs.llvm.org/show_bug.cgi?id=43624).
I see that compiler_attributes unconditionally #defines __pure, but
there's no reference to clang docs. Do they exist?
> I think it would be an interesting study to see how often functions
> that have return codes are ok to not check vs aren't ok (in a large
> production codebase like the Linux kernel), similar to how 97% of
> cases fallthrough is unintentional (which to me sounds like maybe the
> default behavior of the language is incorrect).
Again, _please_ do not confuse the name __must_check with what it
actually does and means. It only means you get a warning if you throw
away the result completely, it does not mean the return value must
soonish be subject to some if-condition (because how'd you define or
implement that...).
To reiterate:
(1) for __pure functions, it makes no sense to add __must_check, because
gcc already warns
(2) all the standard-C string functions are already __pure, even if
linux/string.h does not declare them that way - so one shouldn't expect
any new warnings at least for those functions
(3) for the non-standard string functions (memchr_inv), I'd rather mark
them __pure than __must_check, since the former is effectively a
superset of the latter. We can then bikeshed whether the standard ones
should be marked __pure as well for consistency (and perhaps benefit of
clang, in case clang does not know __builtin_strlen is pure).
(4) The kmalloc() wrappers (kstrdup etc.) are a whole different story.
We're not marking kmalloc() itself, only krealloc(), so I don't see why
kstrdup() should be __must_check. Yes, it would catch a silly
instant-leak like
kstrdup(foo);
but I really don't think that ever happens in practice. And again,
there's no point making kstrdup() __must_check unless we also decide
that it's worth it for kmalloc() and friends.
Rasmus
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/lib/test_kasan.c?id=b92a953cb7f727c42a15ac2ea59bf3cf9c39370d#n595
>
> The *test* word must have given you a clue that the code you a looking
> at is not an ordinary one.
The proposed extension of function annotations can be tested also together
with this source file example, can't it?
Other system configuration variations might become more interesting
for further software components.
Regards,
Markus
On Wed, Oct 09, 2019 at 10:33:45AM -0700, Nick Desaulniers wrote:
> On Wed, Oct 9, 2019 at 9:38 AM Joe Perches <[email protected]> wrote:
> >
> > On Wed, 2019-10-09 at 09:13 -0700, Nick Desaulniers wrote:
> > > On Wed, Oct 9, 2019 at 8:09 AM Steven Rostedt <[email protected]> wrote:
> > > > On Wed, 9 Oct 2019 14:14:28 +0200 Markus Elfring <[email protected]> wrote:
> > []
> > > > > Several functions return values with which useful data processing
> > > > > should be performed. These values must not be ignored then.
> > > > > Thus use the annotation “__must_check” in the shown function declarations.
> > []
> > > > I'm curious. How many warnings showed up when you applied this patch?
> > >
> > > I got zero for x86_64 and arm64 defconfig builds of linux-next with
> > > this applied. Hopefully that's not an argument against the more
> > > liberal application of it? I view __must_check as a good thing, and
> > > encourage its application, unless someone can show that a certain
> > > function would be useful to call without it.
> >
> > stylistic trivia, neither agreeing nor disagreeing with the patch
> > as I generally avoid reading Markus' patches.
> >
> > I believe __must_check is best placed before the return type as
> > that makes grep for function return type easier to parse.
> >
> > i.e. prefer
> > [static inline] __must_check <type> <function>(<args...>);
> > over
> > [static inline] <type> __must_check <function>(<args...>);
> >
>
> + Miguel
> So I just checked `__cold`, and `__cold` is all over the board in
> style. I see it:
> 1. before anything fs/btrfs/super.c#L101
> 2. after static before return type (what you recommend) fs/btrfs/super.c#L2318
> 3. after return type fs/btrfs/inode.c#L9426
As you can see in the git history, case 1 is from 2015 and the newer
changes put the attribute between type and name - that's my "current"
but hopefully final preference.
> Can we pick a style and enforce it via checkpatch? (It's probably not
> fun to check for each function attribute in
> include/linux/compiler_attributes.h).
Anything that has the return type, attributes and function name on one
line works for me, but I know that there are other style preferences
that put function name as the first word on a separate line. My reasons
are for better search results, ie.
extent_map.c:void __cold extent_map_exit(void)
extent_map.h:void __cold extent_map_exit(void);
file.c:void __cold btrfs_auto_defrag_exit(void)
inode.c:void __cold btrfs_destroy_cachep(void)
ordered-data.c:void __cold ordered_data_exit(void)
ordered-data.h:void __cold ordered_data_exit(void);
is better than
send.c:__cold
super.c:__cold
super.c:__cold
super.c:__cold
which I might get to fix eventually.
On Thu, 2019-10-10 at 16:27 +0200, David Sterba wrote:
> On Wed, Oct 09, 2019 at 10:33:45AM -0700, Nick Desaulniers wrote:
> > On Wed, Oct 9, 2019 at 9:38 AM Joe Perches <[email protected]> wrote:
> > > I believe __must_check is best placed before the return type as
> > > that makes grep for function return type easier to parse.
> > >
> > > i.e. prefer
> > > [static inline] __must_check <type> <function>(<args...>);
> > > over
> > > [static inline] <type> __must_check <function>(<args...>);
> > >
> >
> > + Miguel
> > So I just checked `__cold`, and `__cold` is all over the board in
> > style. I see it:
> > 1. before anything fs/btrfs/super.c#L101
> > 2. after static before return type (what you recommend) fs/btrfs/super.c#L2318
> > 3. after return type fs/btrfs/inode.c#L9426
>
> As you can see in the git history, case 1 is from 2015 and the newer
> changes put the attribute between type and name - that's my "current"
> but hopefully final preference.
>
> > Can we pick a style and enforce it via checkpatch? (It's probably not
> > fun to check for each function attribute in
> > include/linux/compiler_attributes.h).
>
> Anything that has the return type, attributes and function name on one
> line works for me, but I know that there are other style preferences
> that put function name as the first word on a separate line. My reasons
> are for better search results, ie.
>
> extent_map.c:void __cold extent_map_exit(void)
> extent_map.h:void __cold extent_map_exit(void);
> file.c:void __cold btrfs_auto_defrag_exit(void)
> inode.c:void __cold btrfs_destroy_cachep(void)
> ordered-data.c:void __cold ordered_data_exit(void)
> ordered-data.h:void __cold ordered_data_exit(void);
>
> is better than
>
> send.c:__cold
> super.c:__cold
> super.c:__cold
> super.c:__cold
>
> which I might get to fix eventually.
When your examples have no function arguments, line
length isn't much of an issue. But most functions
take arguments and line length might matter there.
Here's a possible checkpatch test for location of
various __<attributes> that are not particularly
standardized.
---
scripts/checkpatch.pl | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index cf7543a9d1b2..ed7e6319e061 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -390,6 +390,19 @@ our $Attribute = qr{
____cacheline_internodealigned_in_smp|
__weak
}x;
+
+our $PositionalAttribute = qr{
+ __must_check|
+ __printf|
+ __scanf|
+ __pure|
+ __cold|
+ __hot|
+ __visible|
+ __weak|
+ noinline
+ }x;
+
our $Modifier;
our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__};
our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]};
@@ -3773,6 +3786,17 @@ sub process {
"Avoid multiple line dereference - prefer '$ref'\n" . $hereprev);
}
+# check for declarations like __must_check ($PositionalAttribute) after the type
+ if ($line =~ /\b($Declare)\s+($PositionalAttribute)\b/) {
+ if (WARN("ATTRIBUTE_POSITION",
+ "Prefer position of attribute '$2' before '$1'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s@\b($Declare)(\s+)($PositionalAttribute)\b@$3$2$1@;
+ # 'static void noinline' becomes 'noinline static void', so fix noinline position if necessary
+ $fixed[$fixlinenr] =~ s@\bnoinline(\s+)static\b@static${1}noinline@;
+ }
+ }
+
# check for declarations of signed or unsigned without int
while ($line =~ m{\b($Declare)\s*(?!char\b|short\b|int\b|long\b)\s*($Ident)?\s*[=,;\[\)\(]}g) {
my $type = $1;
From: David Sterba
> Sent: 10 October 2019 15:28
...
> > Can we pick a style and enforce it via checkpatch? (It's probably not
> > fun to check for each function attribute in
> > include/linux/compiler_attributes.h).
>
> Anything that has the return type, attributes and function name on one
> line works for me, but I know that there are other style preferences
> that put function name as the first word on a separate line.
Having the function name first makes it much easier to find the function itself.
In the linux kernel tree searching for 'EXPORT.*function_name' works most f the time.
> My reasons are for better search results, ie.
>
> extent_map.c:void __cold extent_map_exit(void)
> extent_map.h:void __cold extent_map_exit(void);
> file.c:void __cold btrfs_auto_defrag_exit(void)
> inode.c:void __cold btrfs_destroy_cachep(void)
> ordered-data.c:void __cold ordered_data_exit(void)
> ordered-data.h:void __cold ordered_data_exit(void);
>
> is better than
>
> send.c:__cold
> super.c:__cold
> super.c:__cold
> super.c:__cold
>
> which I might get to fix eventually.
That is what -A1 in for :-)
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
> +our $PositionalAttribute = qr{
> + __must_check|
> + __printf|
I suggest to put all key words which share the leading underscores
into another alternation for an improved regular expression.
Regards,
Markus
> The problem is the __must_check does not mean that the
> return value must be followed by a comparison to NULL and bailing out
> (that can't really be checked), it simply ensures the return value is
> assigned somewhere or used in an if(). So foo->bar = kstrdup() not
> followed by a check of foo->bar won't warn.
Higher level source code analysis tools like the semantic patch language
(Coccinelle software) can help to find such missing checks.
Would you like to point any additional development tools out
for this purpose?
Regards,
Markus
> Several functions return values with which useful data processing
> should be performed. These values must not be ignored then.
> Thus use the annotation “__must_check” in the shown function declarations.
Will such a software development topic be clarified any further?
Would you like to continue the discussion?
Regards,
Markus