2020-08-24 22:24:44

by Alex Dewar

[permalink] [raw]
Subject: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

kernel/cpu.c: don't use snprintf() for sysfs attrs

As per the documentation (Documentation/filesystems/sysfs.rst),
snprintf() should not be used for formatting values returned by sysfs.

In all of these cases, sprintf() suffices as we know that the formatted
strings will be less than PAGE_SIZE in length.

Issue identified by Coccinelle.

Signed-off-by: Alex Dewar <[email protected]>
---
drivers/usb/atm/cxacru.c | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index ea66f8f385bae..e62a770a5d3bf 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -230,12 +230,12 @@ CXACRU__ATTR_INIT(_name)

static ssize_t cxacru_sysfs_showattr_u32(u32 value, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
+ return sprintf(buf, "%u\n", value);
}

static ssize_t cxacru_sysfs_showattr_s8(s8 value, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%d\n", value);
+ return sprintf(buf, "%d\n", value);
}

static ssize_t cxacru_sysfs_showattr_dB(s16 value, char *buf)
@@ -255,8 +255,8 @@ static ssize_t cxacru_sysfs_showattr_bool(u32 value, char *buf)
static char *str[] = { "no", "yes" };

if (unlikely(value >= ARRAY_SIZE(str)))
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
- return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ return sprintf(buf, "%u\n", value);
+ return sprintf(buf, "%s\n", str[value]);
}

static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf)
@@ -264,8 +264,8 @@ static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf)
static char *str[] = { NULL, "not connected", "connected", "lost" };

if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL))
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
- return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ return sprintf(buf, "%u\n", value);
+ return sprintf(buf, "%s\n", str[value]);
}

static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf)
@@ -275,8 +275,8 @@ static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf)
"waiting", "initialising"
};
if (unlikely(value >= ARRAY_SIZE(str)))
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
- return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ return sprintf(buf, "%u\n", value);
+ return sprintf(buf, "%s\n", str[value]);
}

static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf)
@@ -288,8 +288,8 @@ static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf)
"ITU-T G.992.2 (G.LITE)"
};
if (unlikely(value >= ARRAY_SIZE(str)))
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
- return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ return sprintf(buf, "%u\n", value);
+ return sprintf(buf, "%s\n", str[value]);
}

/*
@@ -309,8 +309,7 @@ static ssize_t mac_address_show(struct device *dev,
if (instance == NULL || instance->usbatm->atm_dev == NULL)
return -ENODEV;

- return snprintf(buf, PAGE_SIZE, "%pM\n",
- instance->usbatm->atm_dev->esi);
+ return sprintf(buf, "%pM\n", instance->usbatm->atm_dev->esi);
}

static ssize_t adsl_state_show(struct device *dev,
@@ -326,8 +325,8 @@ static ssize_t adsl_state_show(struct device *dev,

value = instance->card_info[CXINF_LINE_STARTABLE];
if (unlikely(value >= ARRAY_SIZE(str)))
- return snprintf(buf, PAGE_SIZE, "%u\n", value);
- return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
+ return sprintf(buf, "%u\n", value);
+ return sprintf(buf, "%s\n", str[value]);
}

static ssize_t adsl_state_store(struct device *dev,
--
2.28.0


2020-08-25 08:15:35

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

From: Alex Dewar
> Sent: 24 August 2020 23:23
> kernel/cpu.c: don't use snprintf() for sysfs attrs
>
> As per the documentation (Documentation/filesystems/sysfs.rst),
> snprintf() should not be used for formatting values returned by sysfs.
>
> In all of these cases, sprintf() suffices as we know that the formatted
> strings will be less than PAGE_SIZE in length.

Hmmmm....
I much prefer to see bounded string ops.
sysfs really ought to be passing through the buffer length.
The buffer size should probably be SYSFS_BUF_LEN not PAGE_SIZE
(even it happens to typically be the same).
If PAGE_SIZE is big (or small) passing a 4k buffer may be
more appropriate than a PAGE_SIZE one.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-08-25 09:01:29

by Alex Dewar

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Tue, Aug 25, 2020 at 08:12:05AM +0000, David Laight wrote:
> From: Alex Dewar
> > Sent: 24 August 2020 23:23
> > kernel/cpu.c: don't use snprintf() for sysfs attrs
> >
> > As per the documentation (Documentation/filesystems/sysfs.rst),
> > snprintf() should not be used for formatting values returned by sysfs.
> >
> > In all of these cases, sprintf() suffices as we know that the formatted
> > strings will be less than PAGE_SIZE in length.
>
> Hmmmm....
> I much prefer to see bounded string ops.
> sysfs really ought to be passing through the buffer length.
> The buffer size should probably be SYSFS_BUF_LEN not PAGE_SIZE
> (even it happens to typically be the same).
> If PAGE_SIZE is big (or small) passing a 4k buffer may be
> more appropriate than a PAGE_SIZE one.
>
> David

We could use scnprintf() instead I guess. But an expression like:
return sprintf(buf, "%d\n", value);
will never overflow if buf is PAGE_SIZE, right...?

>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>

2020-08-25 09:02:45

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Tue, Aug 25, 2020 at 08:12:05AM +0000, David Laight wrote:
> From: Alex Dewar
> > Sent: 24 August 2020 23:23
> > kernel/cpu.c: don't use snprintf() for sysfs attrs
> >
> > As per the documentation (Documentation/filesystems/sysfs.rst),
> > snprintf() should not be used for formatting values returned by sysfs.
> >
> > In all of these cases, sprintf() suffices as we know that the formatted
> > strings will be less than PAGE_SIZE in length.
>
> Hmmmm....
> I much prefer to see bounded string ops.
> sysfs really ought to be passing through the buffer length.

No.

> The buffer size should probably be SYSFS_BUF_LEN not PAGE_SIZE
> (even it happens to typically be the same).

Nope.

> If PAGE_SIZE is big (or small) passing a 4k buffer may be
> more appropriate than a PAGE_SIZE one.

Nope, sysfs is supposed to be "one value per file", and the buffer size
was specifically not passed in because you should _NEVER_ need to care
about it because all you are doing is printing out a single value.

If you really want to print more than just a single value, you should
not use sysfs.

So this is designed this way on purpose, you shouldn't have to worry
about any of this, and that way, you don't have to "program
defensively", it all just works in a simple manner.

thanks,

greg k-h

2020-08-25 12:12:39

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

From: Alex Dewar
> Sent: 25 August 2020 09:18
> On Tue, Aug 25, 2020 at 08:12:05AM +0000, David Laight wrote:
> > From: Alex Dewar
> > > Sent: 24 August 2020 23:23
> > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > >
> > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > snprintf() should not be used for formatting values returned by sysfs.
> > >
> > > In all of these cases, sprintf() suffices as we know that the formatted
> > > strings will be less than PAGE_SIZE in length.
> >
> > Hmmmm....
> > I much prefer to see bounded string ops.
> > sysfs really ought to be passing through the buffer length.
> > The buffer size should probably be SYSFS_BUF_LEN not PAGE_SIZE
> > (even it happens to typically be the same).
> > If PAGE_SIZE is big (or small) passing a 4k buffer may be
> > more appropriate than a PAGE_SIZE one.
> >
> > David
>
> We could use scnprintf() instead I guess. But an expression like:
> return sprintf(buf, "%d\n", value);
> will never overflow if buf is PAGE_SIZE, right...?

Certainly the return value from snprintf() isn't what you
want here (it almost never is) - so scnprintf() is much better.

A simple "%d" or "%u" wont overflow, but a "%s" might (even
if it is really expected that it shouldn't).
Even a "%*d" can go horribly wrong.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-08-27 06:43:21

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On 25/08/2020 00.23, Alex Dewar wrote:
> kernel/cpu.c: don't use snprintf() for sysfs attrs
>
> As per the documentation (Documentation/filesystems/sysfs.rst),
> snprintf() should not be used for formatting values returned by sysfs.
>

Sure. But then the security guys come along and send a patch saying
"sprintf is evil, always pass a buffer bound". Perhaps with a side of
"this code could get copy-pasted, better not promote the use of sprintf
more than strictly necessary".

Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
to make it clear to the next reader that we know we're in a sysfs show
method? It would make auditing uses of sprintf() much easier.

> static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf)
> @@ -275,8 +275,8 @@ static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf)
> "waiting", "initialising"
> };
> if (unlikely(value >= ARRAY_SIZE(str)))
> - return snprintf(buf, PAGE_SIZE, "%u\n", value);
> - return snprintf(buf, PAGE_SIZE, "%s\n", str[value]);
> + return sprintf(buf, "%u\n", value);
> + return sprintf(buf, "%s\n", str[value]);
> }

Not this patch in particular, but in some cases the string being printed
is not immediately adjacent to the sprintf call, making it rather hard
to subsequently verify that yes, that string is certainly reasonably
bounded. If you really want to save the few bytes of code that is the
practical effect of eliding the PAGE_SIZE argument, how about a
sysfs_print_string(buf, str) helper that prints the string and appends a
newline; that's another argument saved. And again it would make it
obvious to a reader that that particular helper is only to be used in
sysfs show methods.

Rasmus

2020-08-27 07:16:27

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> On 25/08/2020 00.23, Alex Dewar wrote:
> > kernel/cpu.c: don't use snprintf() for sysfs attrs
> >
> > As per the documentation (Documentation/filesystems/sysfs.rst),
> > snprintf() should not be used for formatting values returned by sysfs.
> >
>
> Sure. But then the security guys come along and send a patch saying
> "sprintf is evil, always pass a buffer bound". Perhaps with a side of
> "this code could get copy-pasted, better not promote the use of sprintf
> more than strictly necessary".
>
> Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> to make it clear to the next reader that we know we're in a sysfs show
> method? It would make auditing uses of sprintf() much easier.

Code churn to keep code checkers quiet for pointless reasons? What
could go wrong with that...

It should be pretty obvious to any reader that you are in a sysfs show
method, as almost all of them are trivially tiny and obvious. Yes, it
doesn't help with those that make blanket statements like "sprintf is
evil", but I think that kind of just shows them that they shouldn't be
making foolish blanket statements like that :)

Anyway, we've had this for 20 years, if sysfs calls are the only left
remaining user of sprintf(), then I'll be glad to consider using a
"wrapper" function or macro.

thanks,

greg k-h

2020-08-27 14:51:03

by Alex Dewar

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> On 27/08/2020 15.18, Alex Dewar wrote:
> > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> >> On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> >>> On 25/08/2020 00.23, Alex Dewar wrote:
> >>>> kernel/cpu.c: don't use snprintf() for sysfs attrs
> >>>>
> >>>> As per the documentation (Documentation/filesystems/sysfs.rst),
> >>>> snprintf() should not be used for formatting values returned by sysfs.
> >>>>
> >>>
> >>> Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> >>> to make it clear to the next reader that we know we're in a sysfs show
> >>> method? It would make auditing uses of sprintf() much easier.
> >>
> >> Code churn to keep code checkers quiet for pointless reasons? What
> >> could go wrong with that...
>
> I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> show methods. But when changes _are_ being made, such as when replacing
> snprintf() calls for whatever reasons, can we please not make it harder
> for people doing manual audits (those are "code checkers" as well, I
> suppose, but they do tend to only make noise when finding something).
>
> >> It should be pretty obvious to any reader that you are in a sysfs show
> >> method, as almost all of them are trivially tiny and obvious.
>
> git grep doesn't immediately show that, not even with a suitable -C
> argument, as you can't really know the potential callers unless you open
> the file and see that the function is only assigned as a .show method.
> And even that can be a pain because it's all hidden behind five levels
> of magic macros that build identifiers with ##.
>
> > Perhaps I should have mentioned this in the commit message, but the problem
> > is that snprintf() doesn't return the number of bytes written to the
> > destination buffer,
>
> I'm perfectly well aware of that, TYVM (you may want to 'git log
> --author Villemoes lib/vsprintf.c').
>
> but the number of bytes that *would have been written if
> > they fitted*, which may be more than the bounds specified [1]. So "return
> > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > false sense of security, because it isn't necessarily safe.
>
> Huh? This all seems utterly irrelevant WRT a change that replaces
> PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> you passed). You get the same return value.
>
> But I'm not at all concerned about whether one passes the proper buffer
> size or not in sysfs show methods; with my embedded hat on, I'm all for
> saving a few bytes of .text here and there. The problem, as far as I'm
> concerned, is merely that adding sprintf() callers makes it harder to
> find the problematic sprintf() instances.
>

Apologies, I think I might have expressed myself poorly, being a kernel noob
;-). I know that this is a stylistic change rather than a functional
one -- I meant that I was hoping that it would be helpful to get rid of bad
uses of snprintf().

I really like your idea of helper methods though :-). If in show()
methods we could have something like:
return sysfs_itoa(buf, i);
in place of:
return sprintf(buf, "%d\n", i);

... then we wouldn't be introducing any new calls to sprintf() as you
say, but we'd still be removing a call to snprintf() (which also may be
problematic). Plus we'd have type checking on the argument.

For returning strings, we could have a bounded and unbounded variant of
the function. As it seems like only single values should be returned via
sysfs, if we did things this way then it would only be these
string-returning functions which could cause buffer overflow problems
and kernel devs could focus their attention accordingly...

What do people think? I'm happy to have a crack, provided this is
actually a sensible thing to do! I'm looking for a newbie-level project
to get started with.

Thanks for the feedback all,
Alex
>
> >> Anyway, we've had this for 20 years,
>
> My bad, for a moment I forgot that code and patterns of that vintage
> cannot have bugs.
>
> Rasmus

2020-08-27 14:53:01

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On 27/08/2020 15.18, Alex Dewar wrote:
> On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
>> On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
>>> On 25/08/2020 00.23, Alex Dewar wrote:
>>>> kernel/cpu.c: don't use snprintf() for sysfs attrs
>>>>
>>>> As per the documentation (Documentation/filesystems/sysfs.rst),
>>>> snprintf() should not be used for formatting values returned by sysfs.
>>>>
>>>
>>> Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
>>> to make it clear to the next reader that we know we're in a sysfs show
>>> method? It would make auditing uses of sprintf() much easier.
>>
>> Code churn to keep code checkers quiet for pointless reasons? What
>> could go wrong with that...

I did not (mean to) suggest replacing existing sprintf() calls in sysfs
show methods. But when changes _are_ being made, such as when replacing
snprintf() calls for whatever reasons, can we please not make it harder
for people doing manual audits (those are "code checkers" as well, I
suppose, but they do tend to only make noise when finding something).

>> It should be pretty obvious to any reader that you are in a sysfs show
>> method, as almost all of them are trivially tiny and obvious.

git grep doesn't immediately show that, not even with a suitable -C
argument, as you can't really know the potential callers unless you open
the file and see that the function is only assigned as a .show method.
And even that can be a pain because it's all hidden behind five levels
of magic macros that build identifiers with ##.

> Perhaps I should have mentioned this in the commit message, but the problem
> is that snprintf() doesn't return the number of bytes written to the
> destination buffer,

I'm perfectly well aware of that, TYVM (you may want to 'git log
--author Villemoes lib/vsprintf.c').

but the number of bytes that *would have been written if
> they fitted*, which may be more than the bounds specified [1]. So "return
> snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> string ops, scnprintf() is the way to go. Using snprintf() can give a
> false sense of security, because it isn't necessarily safe.

Huh? This all seems utterly irrelevant WRT a change that replaces
PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
you passed). You get the same return value.

But I'm not at all concerned about whether one passes the proper buffer
size or not in sysfs show methods; with my embedded hat on, I'm all for
saving a few bytes of .text here and there. The problem, as far as I'm
concerned, is merely that adding sprintf() callers makes it harder to
find the problematic sprintf() instances.


>> Anyway, we've had this for 20 years,

My bad, for a moment I forgot that code and patterns of that vintage
cannot have bugs.

Rasmus

2020-08-27 14:54:09

by Alex Dewar

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > On 25/08/2020 00.23, Alex Dewar wrote:
> > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > >
> > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > snprintf() should not be used for formatting values returned by sysfs.
> > >
> >
> > Sure. But then the security guys come along and send a patch saying
> > "sprintf is evil, always pass a buffer bound". Perhaps with a side of
> > "this code could get copy-pasted, better not promote the use of sprintf
> > more than strictly necessary".
> >
> > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > to make it clear to the next reader that we know we're in a sysfs show
> > method? It would make auditing uses of sprintf() much easier.
>
> Code churn to keep code checkers quiet for pointless reasons? What
> could go wrong with that...
>
> It should be pretty obvious to any reader that you are in a sysfs show
> method, as almost all of them are trivially tiny and obvious. Yes, it
> doesn't help with those that make blanket statements like "sprintf is
> evil", but I think that kind of just shows them that they shouldn't be
> making foolish blanket statements like that :)

Perhaps I should have mentioned this in the commit message, but the problem
is that snprintf() doesn't return the number of bytes written to the
destination buffer, but the number of bytes that *would have been written if
they fitted*, which may be more than the bounds specified [1]. So "return
snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
string ops, scnprintf() is the way to go. Using snprintf() can give a
false sense of security, because it isn't necessarily safe.

[1] https://github.com/KSPP/linux/issues/105

>
> Anyway, we've had this for 20 years, if sysfs calls are the only left
> remaining user of sprintf(), then I'll be glad to consider using a
> "wrapper" function or macro.
>
> thanks,
>
> greg k-h

2020-08-27 16:52:17

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Tue, Aug 25, 2020 at 10:24:06AM +0200, Greg Kroah-Hartman wrote:
> On Tue, Aug 25, 2020 at 08:12:05AM +0000, David Laight wrote:
> > From: Alex Dewar
> > > Sent: 24 August 2020 23:23
> > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > >
> > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > snprintf() should not be used for formatting values returned by sysfs.
> > >
> > > In all of these cases, sprintf() suffices as we know that the formatted
> > > strings will be less than PAGE_SIZE in length.
> >
> > Hmmmm....
> > I much prefer to see bounded string ops.
> > sysfs really ought to be passing through the buffer length.
>
> No.

It really should, though. I _just_ got burned by this due to having
a binattr sysfs reachable through splice[1]. Most sysfs things aren't
binattr, but I've always considered this to be a weird fragility in the
sysfs implementation.

> So this is designed this way on purpose, you shouldn't have to worry
> about any of this, and that way, you don't have to "program
> defensively", it all just works in a simple manner.

Later in this thread there's a suggestion to alter the API to avoid
individual calls to sprintf(), which seems like a reasonable first step.

-Kees

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=11990a5bd7e558e9203c1070fc52fb6f0488e75b

--
Kees Cook

2020-08-27 16:59:58

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > On 27/08/2020 15.18, Alex Dewar wrote:
> > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > >
> > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > >
> > > > >
> > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > method? It would make auditing uses of sprintf() much easier.
> > > >
> > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > could go wrong with that...
> >
> > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > show methods. But when changes _are_ being made, such as when replacing
> > snprintf() calls for whatever reasons, can we please not make it harder
> > for people doing manual audits (those are "code checkers" as well, I
> > suppose, but they do tend to only make noise when finding something).
> >
> > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > method, as almost all of them are trivially tiny and obvious.
> >
> > git grep doesn't immediately show that, not even with a suitable -C
> > argument, as you can't really know the potential callers unless you open
> > the file and see that the function is only assigned as a .show method.
> > And even that can be a pain because it's all hidden behind five levels
> > of magic macros that build identifiers with ##.
> >
> > > Perhaps I should have mentioned this in the commit message, but the problem
> > > is that snprintf() doesn't return the number of bytes written to the
> > > destination buffer,
> >
> > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > --author Villemoes lib/vsprintf.c').
> >
> > but the number of bytes that *would have been written if
> > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > false sense of security, because it isn't necessarily safe.
> >
> > Huh? This all seems utterly irrelevant WRT a change that replaces
> > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > you passed). You get the same return value.
> >
> > But I'm not at all concerned about whether one passes the proper buffer
> > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > saving a few bytes of .text here and there. The problem, as far as I'm
> > concerned, is merely that adding sprintf() callers makes it harder to
> > find the problematic sprintf() instances.
> >
>
> Apologies, I think I might have expressed myself poorly, being a kernel noob
> ;-). I know that this is a stylistic change rather than a functional
> one -- I meant that I was hoping that it would be helpful to get rid of bad
> uses of snprintf().
>
> I really like your idea of helper methods though :-). If in show()
> methods we could have something like:
> return sysfs_itoa(buf, i);
> in place of:
> return sprintf(buf, "%d\n", i);
>
> ... then we wouldn't be introducing any new calls to sprintf() as you
> say, but we'd still be removing a call to snprintf() (which also may be
> problematic). Plus we'd have type checking on the argument.
>
> For returning strings, we could have a bounded and unbounded variant of
> the function. As it seems like only single values should be returned via
> sysfs, if we did things this way then it would only be these
> string-returning functions which could cause buffer overflow problems
> and kernel devs could focus their attention accordingly...
>
> What do people think? I'm happy to have a crack, provided this is
> actually a sensible thing to do! I'm looking for a newbie-level project
> to get started with.

Not a bad idea.

Coccinelle should be able to transform the various .show
methods to something sysfs_ prefixed in a fairly automated
way.




2020-08-27 17:45:59

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 09:49:04AM -0700, Kees Cook wrote:
> On Tue, Aug 25, 2020 at 10:24:06AM +0200, Greg Kroah-Hartman wrote:
> > On Tue, Aug 25, 2020 at 08:12:05AM +0000, David Laight wrote:
> > > From: Alex Dewar
> > > > Sent: 24 August 2020 23:23
> > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > >
> > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > snprintf() should not be used for formatting values returned by sysfs.
> > > >
> > > > In all of these cases, sprintf() suffices as we know that the formatted
> > > > strings will be less than PAGE_SIZE in length.
> > >
> > > Hmmmm....
> > > I much prefer to see bounded string ops.
> > > sysfs really ought to be passing through the buffer length.
> >
> > No.
>
> It really should, though. I _just_ got burned by this due to having
> a binattr sysfs reachable through splice[1]. Most sysfs things aren't
> binattr, but I've always considered this to be a weird fragility in the
> sysfs implementation.

binattr attributes do have the buffer size passed to it, for that very
reason :)

> > So this is designed this way on purpose, you shouldn't have to worry
> > about any of this, and that way, you don't have to "program
> > defensively", it all just works in a simple manner.
>
> Later in this thread there's a suggestion to alter the API to avoid
> individual calls to sprintf(), which seems like a reasonable first step.

I always review any patches submitted, so if someone feels like tackling
this, wonderful!

thanks,

greg k-h

2020-08-27 19:44:34

by Julia Lawall

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs



On Thu, 27 Aug 2020, Joe Perches wrote:

> On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> > On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > > On 27/08/2020 15.18, Alex Dewar wrote:
> > > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > > >
> > > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > > >
> > > > > >
> > > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > > method? It would make auditing uses of sprintf() much easier.
> > > > >
> > > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > > could go wrong with that...
> > >
> > > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > > show methods. But when changes _are_ being made, such as when replacing
> > > snprintf() calls for whatever reasons, can we please not make it harder
> > > for people doing manual audits (those are "code checkers" as well, I
> > > suppose, but they do tend to only make noise when finding something).
> > >
> > > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > > method, as almost all of them are trivially tiny and obvious.
> > >
> > > git grep doesn't immediately show that, not even with a suitable -C
> > > argument, as you can't really know the potential callers unless you open
> > > the file and see that the function is only assigned as a .show method.
> > > And even that can be a pain because it's all hidden behind five levels
> > > of magic macros that build identifiers with ##.
> > >
> > > > Perhaps I should have mentioned this in the commit message, but the problem
> > > > is that snprintf() doesn't return the number of bytes written to the
> > > > destination buffer,
> > >
> > > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > > --author Villemoes lib/vsprintf.c').
> > >
> > > but the number of bytes that *would have been written if
> > > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > > false sense of security, because it isn't necessarily safe.
> > >
> > > Huh? This all seems utterly irrelevant WRT a change that replaces
> > > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > > you passed). You get the same return value.
> > >
> > > But I'm not at all concerned about whether one passes the proper buffer
> > > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > > saving a few bytes of .text here and there. The problem, as far as I'm
> > > concerned, is merely that adding sprintf() callers makes it harder to
> > > find the problematic sprintf() instances.
> > >
> >
> > Apologies, I think I might have expressed myself poorly, being a kernel noob
> > ;-). I know that this is a stylistic change rather than a functional
> > one -- I meant that I was hoping that it would be helpful to get rid of bad
> > uses of snprintf().
> >
> > I really like your idea of helper methods though :-). If in show()
> > methods we could have something like:
> > return sysfs_itoa(buf, i);
> > in place of:
> > return sprintf(buf, "%d\n", i);
> >
> > ... then we wouldn't be introducing any new calls to sprintf() as you
> > say, but we'd still be removing a call to snprintf() (which also may be
> > problematic). Plus we'd have type checking on the argument.
> >
> > For returning strings, we could have a bounded and unbounded variant of
> > the function. As it seems like only single values should be returned via
> > sysfs, if we did things this way then it would only be these
> > string-returning functions which could cause buffer overflow problems
> > and kernel devs could focus their attention accordingly...
> >
> > What do people think? I'm happy to have a crack, provided this is
> > actually a sensible thing to do! I'm looking for a newbie-level project
> > to get started with.
>
> Not a bad idea.
>
> Coccinelle should be able to transform the various .show
> methods to something sysfs_ prefixed in a fairly automated
> way.

Something like

identifier f;
fresh identifier = "sysfs" ## f;

may be useful. Let me know if further help is needed.

julia



>
>
>
>
> _______________________________________________
> Cocci mailing list
> [email protected]
> https://systeme.lip6.fr/mailman/listinfo/cocci
>

2020-08-27 20:30:50

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 21:42 +0200, Julia Lawall wrote:
>
> On Thu, 27 Aug 2020, Joe Perches wrote:
>
> > On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> > > On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > > > On 27/08/2020 15.18, Alex Dewar wrote:
> > > > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > > > >
> > > > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > > > >
> > > > > > >
> > > > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > > > method? It would make auditing uses of sprintf() much easier.
> > > > > >
> > > > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > > > could go wrong with that...
> > > >
> > > > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > > > show methods. But when changes _are_ being made, such as when replacing
> > > > snprintf() calls for whatever reasons, can we please not make it harder
> > > > for people doing manual audits (those are "code checkers" as well, I
> > > > suppose, but they do tend to only make noise when finding something).
> > > >
> > > > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > > > method, as almost all of them are trivially tiny and obvious.
> > > >
> > > > git grep doesn't immediately show that, not even with a suitable -C
> > > > argument, as you can't really know the potential callers unless you open
> > > > the file and see that the function is only assigned as a .show method.
> > > > And even that can be a pain because it's all hidden behind five levels
> > > > of magic macros that build identifiers with ##.
> > > >
> > > > > Perhaps I should have mentioned this in the commit message, but the problem
> > > > > is that snprintf() doesn't return the number of bytes written to the
> > > > > destination buffer,
> > > >
> > > > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > > > --author Villemoes lib/vsprintf.c').
> > > >
> > > > but the number of bytes that *would have been written if
> > > > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > > > false sense of security, because it isn't necessarily safe.
> > > >
> > > > Huh? This all seems utterly irrelevant WRT a change that replaces
> > > > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > > > you passed). You get the same return value.
> > > >
> > > > But I'm not at all concerned about whether one passes the proper buffer
> > > > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > > > saving a few bytes of .text here and there. The problem, as far as I'm
> > > > concerned, is merely that adding sprintf() callers makes it harder to
> > > > find the problematic sprintf() instances.
> > > >
> > >
> > > Apologies, I think I might have expressed myself poorly, being a kernel noob
> > > ;-). I know that this is a stylistic change rather than a functional
> > > one -- I meant that I was hoping that it would be helpful to get rid of bad
> > > uses of snprintf().
> > >
> > > I really like your idea of helper methods though :-). If in show()
> > > methods we could have something like:
> > > return sysfs_itoa(buf, i);
> > > in place of:
> > > return sprintf(buf, "%d\n", i);
> > >
> > > ... then we wouldn't be introducing any new calls to sprintf() as you
> > > say, but we'd still be removing a call to snprintf() (which also may be
> > > problematic). Plus we'd have type checking on the argument.
> > >
> > > For returning strings, we could have a bounded and unbounded variant of
> > > the function. As it seems like only single values should be returned via
> > > sysfs, if we did things this way then it would only be these
> > > string-returning functions which could cause buffer overflow problems
> > > and kernel devs could focus their attention accordingly...
> > >
> > > What do people think? I'm happy to have a crack, provided this is
> > > actually a sensible thing to do! I'm looking for a newbie-level project
> > > to get started with.
> >
> > Not a bad idea.
> >
> > Coccinelle should be able to transform the various .show
> > methods to something sysfs_ prefixed in a fairly automated
> > way.
>
> Something like
>
> identifier f;
> fresh identifier = "sysfs" ## f;
>
> may be useful. Let me know if further help is needed.

Perhaps it's a bit more complicated.

Perhaps what's necessary is to find any
appropriate .show function and change
any use of strcpy/sprintf within those
function to some other name.

For instance:

drivers/isdn/mISDN/core.c-static ssize_t name_show(struct device *dev,
drivers/isdn/mISDN/core.c- struct device_attribute *attr, char *buf)
drivers/isdn/mISDN/core.c-{
drivers/isdn/mISDN/core.c: strcpy(buf, dev_name(dev));
drivers/isdn/mISDN/core.c- return strlen(buf);
drivers/isdn/mISDN/core.c-}
drivers/isdn/mISDN/core.c-static DEVICE_ATTR_RO(name);

and macroized uses like:

drivers/base/node.c-#define CACHE_ATTR(name, fmt) \
drivers/base/node.c-static ssize_t name##_show(struct device *dev, \
drivers/base/node.c- struct device_attribute *attr, \
drivers/base/node.c- char *buf) \
drivers/base/node.c-{ \
drivers/base/node.c- return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\
drivers/base/node.c-} \
drivers/base/node.c:DEVICE_ATTR_RO(name);
drivers/base/node.c-
drivers/base/node.c-CACHE_ATTR(size, "%llu")
drivers/base/node.c-CACHE_ATTR(line_size, "%u")
drivers/base/node.c-CACHE_ATTR(indexing, "%u")
drivers/base/node.c-CACHE_ATTR(write_policy, "%u")

2020-08-27 21:02:30

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 13:29 -0700, Joe Perches wrote:
> On Thu, 2020-08-27 at 21:42 +0200, Julia Lawall wrote:
> > On Thu, 27 Aug 2020, Joe Perches wrote:
> >
> > > On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> > > > On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > > > > On 27/08/2020 15.18, Alex Dewar wrote:
> > > > > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > > > > >
> > > > > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > > > > method? It would make auditing uses of sprintf() much easier.
> > > > > > >
> > > > > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > > > > could go wrong with that...
> > > > >
> > > > > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > > > > show methods. But when changes _are_ being made, such as when replacing
> > > > > snprintf() calls for whatever reasons, can we please not make it harder
> > > > > for people doing manual audits (those are "code checkers" as well, I
> > > > > suppose, but they do tend to only make noise when finding something).
> > > > >
> > > > > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > > > > method, as almost all of them are trivially tiny and obvious.
> > > > >
> > > > > git grep doesn't immediately show that, not even with a suitable -C
> > > > > argument, as you can't really know the potential callers unless you open
> > > > > the file and see that the function is only assigned as a .show method.
> > > > > And even that can be a pain because it's all hidden behind five levels
> > > > > of magic macros that build identifiers with ##.
> > > > >
> > > > > > Perhaps I should have mentioned this in the commit message, but the problem
> > > > > > is that snprintf() doesn't return the number of bytes written to the
> > > > > > destination buffer,
> > > > >
> > > > > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > > > > --author Villemoes lib/vsprintf.c').
> > > > >
> > > > > but the number of bytes that *would have been written if
> > > > > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > > > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > > > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > > > > false sense of security, because it isn't necessarily safe.
> > > > >
> > > > > Huh? This all seems utterly irrelevant WRT a change that replaces
> > > > > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > > > > you passed). You get the same return value.
> > > > >
> > > > > But I'm not at all concerned about whether one passes the proper buffer
> > > > > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > > > > saving a few bytes of .text here and there. The problem, as far as I'm
> > > > > concerned, is merely that adding sprintf() callers makes it harder to
> > > > > find the problematic sprintf() instances.
> > > > >
> > > >
> > > > Apologies, I think I might have expressed myself poorly, being a kernel noob
> > > > ;-). I know that this is a stylistic change rather than a functional
> > > > one -- I meant that I was hoping that it would be helpful to get rid of bad
> > > > uses of snprintf().
> > > >
> > > > I really like your idea of helper methods though :-). If in show()
> > > > methods we could have something like:
> > > > return sysfs_itoa(buf, i);
> > > > in place of:
> > > > return sprintf(buf, "%d\n", i);
> > > >
> > > > ... then we wouldn't be introducing any new calls to sprintf() as you
> > > > say, but we'd still be removing a call to snprintf() (which also may be
> > > > problematic). Plus we'd have type checking on the argument.
> > > >
> > > > For returning strings, we could have a bounded and unbounded variant of
> > > > the function. As it seems like only single values should be returned via
> > > > sysfs, if we did things this way then it would only be these
> > > > string-returning functions which could cause buffer overflow problems
> > > > and kernel devs could focus their attention accordingly...
> > > >
> > > > What do people think? I'm happy to have a crack, provided this is
> > > > actually a sensible thing to do! I'm looking for a newbie-level project
> > > > to get started with.
> > >
> > > Not a bad idea.
> > >
> > > Coccinelle should be able to transform the various .show
> > > methods to something sysfs_ prefixed in a fairly automated
> > > way.
> >
> > Something like
> >
> > identifier f;
> > fresh identifier = "sysfs" ## f;
> >
> > may be useful. Let me know if further help is needed.

cocci syntax eludes me, but I imagine something like:

@@
identifier f_show =~ "^.*_show$";
@@
ssize_t f_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
- sprintf
+ kobj_sprintf
(...);


2020-08-27 21:03:31

by Denis Efremov (Oracle)

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

Hi all,

On 8/27/20 10:42 PM, Julia Lawall wrote:
>
>
> On Thu, 27 Aug 2020, Joe Perches wrote:
>
>> On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
>>> On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
>>>> On 27/08/2020 15.18, Alex Dewar wrote:
>>>>> On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
>>>>>> On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
>>>>>>> On 25/08/2020 00.23, Alex Dewar wrote:
>>>>>>>> kernel/cpu.c: don't use snprintf() for sysfs attrs
>>>>>>>>
>>>>>>>> As per the documentation (Documentation/filesystems/sysfs.rst),
>>>>>>>> snprintf() should not be used for formatting values returned by sysfs.

Just FYI, I've send an addition to the device_attr_show.cocci script[1] to turn
simple cases of snprintf (e.g. "%i") to sprintf. Looks like many developers would
like it more than changing snprintf to scnprintf. As for me, I don't like the idea
of automated altering of the original logic from bounded snprint to unbouded one
with sprintf.

[1] https://lkml.org/lkml/2020/8/13/786

Regarding current device_attr_show.cocci implementation, it detects the functions
by declaration:
ssize_t any_name(struct device *dev, struct device_attribute *attr, char *buf)

and I limited the check to:
"return snprintf"
pattern because there are already too many warnings.

Actually, it looks more correct to check for:
ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
* snprintf@p(...);
...>
}

This pattern should also highlight the snprintf calls there we save returned
value in a var, e.g.:

ret += snprintf(...);
...
ret += snprintf(...);
...
ret += snprintf(...);

return ret;

>
> Something like
>
> identifier f;
> fresh identifier = "sysfs" ## f;
>
> may be useful. Let me know if further help is needed.

Initially, I wrote the rule to search for DEVICE_ATTR(..., ..., func_name, ...)
functions. However, it looks like matching function prototype is enough. At least,
I failed to find false positives. I rejected the initial DEVICE_ATTR() searching
because I thought that it's impossible to handle DEVICE_ATTR_RO()/DEVICE_ATTR_RW()
macroses with coccinelle as they "generate" function names internally with
"##". "fresh identifier" should really help here, but now I doubt it's required in
device_attr_show.cocci, function prototype is enough.

Thanks,
Denis

2020-08-27 21:32:59

by Julia Lawall

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs



On Thu, 27 Aug 2020, Joe Perches wrote:

> On Thu, 2020-08-27 at 21:42 +0200, Julia Lawall wrote:
> >
> > On Thu, 27 Aug 2020, Joe Perches wrote:
> >
> > > On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> > > > On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > > > > On 27/08/2020 15.18, Alex Dewar wrote:
> > > > > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > > > > >
> > > > > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > > > > >
> > > > > > > >
> > > > > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > > > > method? It would make auditing uses of sprintf() much easier.
> > > > > > >
> > > > > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > > > > could go wrong with that...
> > > > >
> > > > > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > > > > show methods. But when changes _are_ being made, such as when replacing
> > > > > snprintf() calls for whatever reasons, can we please not make it harder
> > > > > for people doing manual audits (those are "code checkers" as well, I
> > > > > suppose, but they do tend to only make noise when finding something).
> > > > >
> > > > > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > > > > method, as almost all of them are trivially tiny and obvious.
> > > > >
> > > > > git grep doesn't immediately show that, not even with a suitable -C
> > > > > argument, as you can't really know the potential callers unless you open
> > > > > the file and see that the function is only assigned as a .show method.
> > > > > And even that can be a pain because it's all hidden behind five levels
> > > > > of magic macros that build identifiers with ##.
> > > > >
> > > > > > Perhaps I should have mentioned this in the commit message, but the problem
> > > > > > is that snprintf() doesn't return the number of bytes written to the
> > > > > > destination buffer,
> > > > >
> > > > > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > > > > --author Villemoes lib/vsprintf.c').
> > > > >
> > > > > but the number of bytes that *would have been written if
> > > > > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > > > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > > > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > > > > false sense of security, because it isn't necessarily safe.
> > > > >
> > > > > Huh? This all seems utterly irrelevant WRT a change that replaces
> > > > > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > > > > you passed). You get the same return value.
> > > > >
> > > > > But I'm not at all concerned about whether one passes the proper buffer
> > > > > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > > > > saving a few bytes of .text here and there. The problem, as far as I'm
> > > > > concerned, is merely that adding sprintf() callers makes it harder to
> > > > > find the problematic sprintf() instances.
> > > > >
> > > >
> > > > Apologies, I think I might have expressed myself poorly, being a kernel noob
> > > > ;-). I know that this is a stylistic change rather than a functional
> > > > one -- I meant that I was hoping that it would be helpful to get rid of bad
> > > > uses of snprintf().
> > > >
> > > > I really like your idea of helper methods though :-). If in show()
> > > > methods we could have something like:
> > > > return sysfs_itoa(buf, i);
> > > > in place of:
> > > > return sprintf(buf, "%d\n", i);
> > > >
> > > > ... then we wouldn't be introducing any new calls to sprintf() as you
> > > > say, but we'd still be removing a call to snprintf() (which also may be
> > > > problematic). Plus we'd have type checking on the argument.
> > > >
> > > > For returning strings, we could have a bounded and unbounded variant of
> > > > the function. As it seems like only single values should be returned via
> > > > sysfs, if we did things this way then it would only be these
> > > > string-returning functions which could cause buffer overflow problems
> > > > and kernel devs could focus their attention accordingly...
> > > >
> > > > What do people think? I'm happy to have a crack, provided this is
> > > > actually a sensible thing to do! I'm looking for a newbie-level project
> > > > to get started with.
> > >
> > > Not a bad idea.
> > >
> > > Coccinelle should be able to transform the various .show
> > > methods to something sysfs_ prefixed in a fairly automated
> > > way.
> >
> > Something like
> >
> > identifier f;
> > fresh identifier = "sysfs" ## f;
> >
> > may be useful. Let me know if further help is needed.
>
> Perhaps it's a bit more complicated.
>
> Perhaps what's necessary is to find any
> appropriate .show function and change
> any use of strcpy/sprintf within those
> function to some other name.
>
> For instance:
>
> drivers/isdn/mISDN/core.c-static ssize_t name_show(struct device *dev,
> drivers/isdn/mISDN/core.c- struct device_attribute *attr, char *buf)
> drivers/isdn/mISDN/core.c-{
> drivers/isdn/mISDN/core.c: strcpy(buf, dev_name(dev));
> drivers/isdn/mISDN/core.c- return strlen(buf);
> drivers/isdn/mISDN/core.c-}
> drivers/isdn/mISDN/core.c-static DEVICE_ATTR_RO(name);
>
> and macroized uses like:
>
> drivers/base/node.c-#define CACHE_ATTR(name, fmt) \
> drivers/base/node.c-static ssize_t name##_show(struct device *dev, \
> drivers/base/node.c- struct device_attribute *attr, \
> drivers/base/node.c- char *buf) \
> drivers/base/node.c-{ \
> drivers/base/node.c- return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\
> drivers/base/node.c-} \
> drivers/base/node.c:DEVICE_ATTR_RO(name);
> drivers/base/node.c-
> drivers/base/node.c-CACHE_ATTR(size, "%llu")
> drivers/base/node.c-CACHE_ATTR(line_size, "%u")
> drivers/base/node.c-CACHE_ATTR(indexing, "%u")
> drivers/base/node.c-CACHE_ATTR(write_policy, "%u")

Coccinelle would fail on these.

julia

2020-08-27 21:37:41

by Julia Lawall

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs



On Fri, 28 Aug 2020, Denis Efremov wrote:

> Hi all,
>
> On 8/27/20 10:42 PM, Julia Lawall wrote:
> >
> >
> > On Thu, 27 Aug 2020, Joe Perches wrote:
> >
> >> On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> >>> On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> >>>> On 27/08/2020 15.18, Alex Dewar wrote:
> >>>>> On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> >>>>>> On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> >>>>>>> On 25/08/2020 00.23, Alex Dewar wrote:
> >>>>>>>> kernel/cpu.c: don't use snprintf() for sysfs attrs
> >>>>>>>>
> >>>>>>>> As per the documentation (Documentation/filesystems/sysfs.rst),
> >>>>>>>> snprintf() should not be used for formatting values returned by sysfs.
>
> Just FYI, I've send an addition to the device_attr_show.cocci script[1] to turn
> simple cases of snprintf (e.g. "%i") to sprintf. Looks like many developers would
> like it more than changing snprintf to scnprintf. As for me, I don't like the idea
> of automated altering of the original logic from bounded snprint to unbouded one
> with sprintf.
>
> [1] https://lkml.org/lkml/2020/8/13/786
>
> Regarding current device_attr_show.cocci implementation, it detects the functions
> by declaration:
> ssize_t any_name(struct device *dev, struct device_attribute *attr, char *buf)
>
> and I limited the check to:
> "return snprintf"
> pattern because there are already too many warnings.
>
> Actually, it looks more correct to check for:
> ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
> {
> <...
> * snprintf@p(...);
> ...>
> }
>
> This pattern should also highlight the snprintf calls there we save returned
> value in a var, e.g.:
>
> ret += snprintf(...);
> ...
> ret += snprintf(...);
> ...
> ret += snprintf(...);
>
> return ret;
>
> >
> > Something like
> >
> > identifier f;
> > fresh identifier = "sysfs" ## f;
> >
> > may be useful. Let me know if further help is needed.
>
> Initially, I wrote the rule to search for DEVICE_ATTR(..., ..., func_name, ...)

This is what I would have expected.

> functions. However, it looks like matching function prototype is enough. At least,
> I failed to find false positives. I rejected the initial DEVICE_ATTR() searching
> because I thought that it's impossible to handle DEVICE_ATTR_RO()/DEVICE_ATTR_RW()
> macroses with coccinelle as they "generate" function names internally with
> "##". "fresh identifier" should really help here, but now I doubt it's required in
> device_attr_show.cocci, function prototype is enough.

It's true that it is probably unique enough.

julia

2020-08-27 21:48:48

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 23:36 +0200, Julia Lawall wrote:
> On Fri, 28 Aug 2020, Denis Efremov wrote:
[]
> Regarding current device_attr_show.cocci implementation, it detects the functions
> > by declaration:
> > ssize_t any_name(struct device *dev, struct device_attribute *attr, char *buf)
> >
> > and I limited the check to:
> > "return snprintf"
> > pattern because there are already too many warnings.
> >
> > Actually, it looks more correct to check for:
> > ssize_t show(struct device *dev, struct device_attribute *attr, char *buf)
> > {
> > <...
> > * snprintf@p(...);
> > ...>
> > }
> >
> > This pattern should also highlight the snprintf calls there we save returned
> > value in a var, e.g.:
> >
> > ret += snprintf(...);
> > ...
> > ret += snprintf(...);
> > ...
> > ret += snprintf(...);
> >
> > return ret;
> >
> > > Something like
> > >
> > > identifier f;
> > > fresh identifier = "sysfs" ## f;
> > >
> > > may be useful. Let me know if further help is needed.
> >
> > Initially, I wrote the rule to search for DEVICE_ATTR(..., ..., func_name, ...)
>
> This is what I would have expected.
>
> > functions. However, it looks like matching function prototype is enough. At least,
> > I failed to find false positives. I rejected the initial DEVICE_ATTR() searching
> > because I thought that it's impossible to handle DEVICE_ATTR_RO()/DEVICE_ATTR_RW()
> > macroses with coccinelle as they "generate" function names internally with
> > "##". "fresh identifier" should really help here, but now I doubt it's required in
> > device_attr_show.cocci, function prototype is enough.
>
> It's true that it is probably unique enough.

I tried:
@@
identifier f_show =~ "^.*_show$";
identifier dev, attr, buf;
const char *chr;
@@
ssize_t f_show(struct device *dev, struct device_attribute *attr, char
*buf)
{
<...
(
- sprintf
+ sysfs_sprintf
(...);
|
- snprintf(buf, PAGE_SIZE,
+ sysfs_sprintf(buf,
...);
|
- scnprintf(buf, PAGE_SIZE,
+ sysfs_sprintf(buf,
...);
|
strcpy(buf, chr);
sysfs_strcpy(buf, chr);
)
...>
}

which finds direct statements without an assign
but that doesn't find

arch/arm/common/dmabounce.c:static ssize_t dmabounce_show(struct device *dev, struct device_attribute *attr, char *buf)
arch/arm/common/dmabounce.c-{
arch/arm/common/dmabounce.c- struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
arch/arm/common/dmabounce.c- return sprintf(buf, "%lu %lu %lu %lu %lu %lu\n",
arch/arm/common/dmabounce.c- device_info->small.allocs,
arch/arm/common/dmabounce.c- device_info->large.allocs,
arch/arm/common/dmabounce.c- device_info->total_allocs - device_info->small.allocs -
arch/arm/common/dmabounce.c- device_info->large.allocs,
arch/arm/common/dmabounce.c- device_info->total_allocs,
arch/arm/common/dmabounce.c- device_info->map_op_count,
arch/arm/common/dmabounce.c- device_info->bounce_count);
arch/arm/common/dmabounce.c-}


2020-08-27 21:56:02

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] usb: atm: don't use snprintf() for sysfs attrs

From: Joe Perches
> Sent: 27 August 2020 17:59
> To: Alex Dewar <[email protected]>; Rasmus Villemoes <[email protected]>; cocci
> <[email protected]>
> Cc: Greg Kroah-Hartman <[email protected]>; Kees Cook <[email protected]>; Gustavo A. R.
> Silva <[email protected]>; [email protected]; [email protected];
> [email protected]
> Subject: Re: [PATCH] usb: atm: don't use snprintf() for sysfs attrs
>
> On Thu, 2020-08-27 at 15:48 +0100, Alex Dewar wrote:
> > On Thu, Aug 27, 2020 at 03:41:06PM +0200, Rasmus Villemoes wrote:
> > > On 27/08/2020 15.18, Alex Dewar wrote:
> > > > On Thu, Aug 27, 2020 at 09:15:37AM +0200, Greg Kroah-Hartman wrote:
> > > > > On Thu, Aug 27, 2020 at 08:42:06AM +0200, Rasmus Villemoes wrote:
> > > > > > On 25/08/2020 00.23, Alex Dewar wrote:
> > > > > > > kernel/cpu.c: don't use snprintf() for sysfs attrs
> > > > > > >
> > > > > > > As per the documentation (Documentation/filesystems/sysfs.rst),
> > > > > > > snprintf() should not be used for formatting values returned by sysfs.
> > > > > > >
> > > > > >
> > > > > > Can we have a sysfs_sprintf() (could just be a macro that does sprintf)
> > > > > > to make it clear to the next reader that we know we're in a sysfs show
> > > > > > method? It would make auditing uses of sprintf() much easier.
> > > > >
> > > > > Code churn to keep code checkers quiet for pointless reasons? What
> > > > > could go wrong with that...
> > >
> > > I did not (mean to) suggest replacing existing sprintf() calls in sysfs
> > > show methods. But when changes _are_ being made, such as when replacing
> > > snprintf() calls for whatever reasons, can we please not make it harder
> > > for people doing manual audits (those are "code checkers" as well, I
> > > suppose, but they do tend to only make noise when finding something).
> > >
> > > > > It should be pretty obvious to any reader that you are in a sysfs show
> > > > > method, as almost all of them are trivially tiny and obvious.
> > >
> > > git grep doesn't immediately show that, not even with a suitable -C
> > > argument, as you can't really know the potential callers unless you open
> > > the file and see that the function is only assigned as a .show method.
> > > And even that can be a pain because it's all hidden behind five levels
> > > of magic macros that build identifiers with ##.
> > >
> > > > Perhaps I should have mentioned this in the commit message, but the problem
> > > > is that snprintf() doesn't return the number of bytes written to the
> > > > destination buffer,
> > >
> > > I'm perfectly well aware of that, TYVM (you may want to 'git log
> > > --author Villemoes lib/vsprintf.c').
> > >
> > > but the number of bytes that *would have been written if
> > > > they fitted*, which may be more than the bounds specified [1]. So "return
> > > > snprintf(...)" for sysfs attributes is an antipattern. If you need bounded
> > > > string ops, scnprintf() is the way to go. Using snprintf() can give a
> > > > false sense of security, because it isn't necessarily safe.
> > >
> > > Huh? This all seems utterly irrelevant WRT a change that replaces
> > > PAGE_SIZE by INT_MAX (because that's what sprintf() is going to pretend
> > > you passed). You get the same return value.
> > >
> > > But I'm not at all concerned about whether one passes the proper buffer
> > > size or not in sysfs show methods; with my embedded hat on, I'm all for
> > > saving a few bytes of .text here and there. The problem, as far as I'm
> > > concerned, is merely that adding sprintf() callers makes it harder to
> > > find the problematic sprintf() instances.
> > >
> >
> > Apologies, I think I might have expressed myself poorly, being a kernel noob
> > ;-). I know that this is a stylistic change rather than a functional
> > one -- I meant that I was hoping that it would be helpful to get rid of bad
> > uses of snprintf().
> >
> > I really like your idea of helper methods though :-). If in show()
> > methods we could have something like:
> > return sysfs_itoa(buf, i);
> > in place of:
> > return sprintf(buf, "%d\n", i);
> >
> > ... then we wouldn't be introducing any new calls to sprintf() as you
> > say, but we'd still be removing a call to snprintf() (which also may be
> > problematic). Plus we'd have type checking on the argument.
> >
> > For returning strings, we could have a bounded and unbounded variant of
> > the function. As it seems like only single values should be returned via
> > sysfs, if we did things this way then it would only be these
> > string-returning functions which could cause buffer overflow problems
> > and kernel devs could focus their attention accordingly...
> >
> > What do people think? I'm happy to have a crack, provided this is
> > actually a sensible thing to do! I'm looking for a newbie-level project
> > to get started with.

The problem with that idea is that is the code needs to
merge the output of two values or split an integer as nnn.nn
then it needs to do something different from the 'normal' code.

If the buffer is always PAGE_SIZE the why not embed it in
a structure.
The generated code will be the same, but it will be absolutely
explicit that the size is PAGE_SIZE if the code filling in the
buffer decides it needs to check.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-08-27 22:06:40

by David Laight

[permalink] [raw]
Subject: RE: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

From: Joe Perches
> Sent: 27 August 2020 21:30
...
> Perhaps what's necessary is to find any
> appropriate .show function and change
> any use of strcpy/sprintf within those
> function to some other name.
>
> For instance:
>
> drivers/isdn/mISDN/core.c-static ssize_t name_show(struct device *dev,
> drivers/isdn/mISDN/core.c- struct device_attribute *attr, char *buf)
> drivers/isdn/mISDN/core.c-{
> drivers/isdn/mISDN/core.c: strcpy(buf, dev_name(dev));
> drivers/isdn/mISDN/core.c- return strlen(buf);
> drivers/isdn/mISDN/core.c-}
> drivers/isdn/mISDN/core.c-static DEVICE_ATTR_RO(name);

That form ends up calculating the string length twice.
Better would be:
len = strlen(msg);
memcpy(buf, msg, len);
return len;


David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-08-27 22:13:28

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 22:03 +0000, David Laight wrote:
> From: Joe Perches
> > Sent: 27 August 2020 21:30
> ...
> > Perhaps what's necessary is to find any
> > appropriate .show function and change
> > any use of strcpy/sprintf within those
> > function to some other name.
> >
> > For instance:
> >
> > drivers/isdn/mISDN/core.c-static ssize_t name_show(struct device *dev,
> > drivers/isdn/mISDN/core.c- struct device_attribute *attr, char *buf)
> > drivers/isdn/mISDN/core.c-{
> > drivers/isdn/mISDN/core.c: strcpy(buf, dev_name(dev));
> > drivers/isdn/mISDN/core.c- return strlen(buf);
> > drivers/isdn/mISDN/core.c-}
> > drivers/isdn/mISDN/core.c-static DEVICE_ATTR_RO(name);
>
> That form ends up calculating the string length twice.
> Better would be:
> len = strlen(msg);
> memcpy(buf, msg, len);
> return len;

or given clang's requirement for stpcpy

return stpcpy(buf, dev_name(dev)) - buf;

(I do not advocate for this ;)

2020-08-27 22:20:24

by Kees Cook

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 03:11:57PM -0700, Joe Perches wrote:
> On Thu, 2020-08-27 at 22:03 +0000, David Laight wrote:
> > From: Joe Perches
> > > Sent: 27 August 2020 21:30
> > ...
> > > Perhaps what's necessary is to find any
> > > appropriate .show function and change
> > > any use of strcpy/sprintf within those
> > > function to some other name.
> > >
> > > For instance:
> > >
> > > drivers/isdn/mISDN/core.c-static ssize_t name_show(struct device *dev,
> > > drivers/isdn/mISDN/core.c- struct device_attribute *attr, char *buf)
> > > drivers/isdn/mISDN/core.c-{
> > > drivers/isdn/mISDN/core.c: strcpy(buf, dev_name(dev));
> > > drivers/isdn/mISDN/core.c- return strlen(buf);
> > > drivers/isdn/mISDN/core.c-}
> > > drivers/isdn/mISDN/core.c-static DEVICE_ATTR_RO(name);
> >
> > That form ends up calculating the string length twice.
> > Better would be:
> > len = strlen(msg);
> > memcpy(buf, msg, len);
> > return len;
>
> or given clang's requirement for stpcpy
>
> return stpcpy(buf, dev_name(dev)) - buf;
>
> (I do not advocate for this ;)

Heh. And humans aren't allowed to use stpcpy() in the kernel. :)

--
Kees Cook

2020-08-27 22:24:07

by Kees Cook

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Fri, Aug 28, 2020 at 12:01:34AM +0300, Denis Efremov wrote:
> Just FYI, I've send an addition to the device_attr_show.cocci script[1] to turn
> simple cases of snprintf (e.g. "%i") to sprintf. Looks like many developers would
> like it more than changing snprintf to scnprintf. As for me, I don't like the idea
> of automated altering of the original logic from bounded snprint to unbouded one
> with sprintf.

Agreed. This just makes me cringe. If the API design declares that when
a show() callback starts, buf has been allocated with PAGE_SIZE bytes,
then that's how the logic should proceed, and it should be using
scnprintf...

show(...) {
size_t remaining = PAGE_SIZE;

...
remaining -= scnprintf(buf, remaining, "fmt", var args ...);
remaining -= scnprintf(buf, remaining, "fmt", var args ...);
remaining -= scnprintf(buf, remaining, "fmt", var args ...);

return PAGE_SIZE - remaining;
}

--
Kees Cook

2020-08-27 22:41:46

by Denis Efremov (Oracle)

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

>
> I tried:
> @@
> identifier f_show =~ "^.*_show$";


This will miss this kind of functions:
./drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c:1953:static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version,
./drivers/gpu/drm/amd/amdgpu/df_v3_6.c:266:static DEVICE_ATTR(df_cntr_avail, S_IRUGO, df_v3_6_get_df_cntr_avail, NULL);
./drivers/input/touchscreen/melfas_mip4.c:1348:static DEVICE_ATTR(fw_version, S_IRUGO, mip4_sysfs_read_fw_version, NULL);
./drivers/input/touchscreen/melfas_mip4.c:1373:static DEVICE_ATTR(hw_version, S_IRUGO, mip4_sysfs_read_hw_version, NULL);
./drivers/input/touchscreen/melfas_mip4.c:1392:static DEVICE_ATTR(product_id, S_IRUGO, mip4_sysfs_read_product_id, NULL);
...

> identifier dev, attr, buf;
> const char *chr;
> @@
> ssize_t f_show(struct device *dev, struct device_attribute *attr, char
> *buf)
> {
> <...
> (
> - sprintf
> + sysfs_sprintf
> (...);
> |
> - snprintf(buf, PAGE_SIZE,
> + sysfs_sprintf(buf,
> ...);
> |
> - scnprintf(buf, PAGE_SIZE,
> + sysfs_sprintf(buf,
> ...);
> |
> strcpy(buf, chr);
> sysfs_strcpy(buf, chr);
> )
> ...>
> }
>
> which finds direct statements without an assign
> but that doesn't find
>
> arch/arm/common/dmabounce.c:static ssize_t dmabounce_show(struct device *dev, struct device_attribute *attr, char *buf)
> arch/arm/common/dmabounce.c-{
> arch/arm/common/dmabounce.c- struct dmabounce_device_info *device_info = dev->archdata.dmabounce;
> arch/arm/common/dmabounce.c- return sprintf(buf, "%lu %lu %lu %lu %lu %lu\n",
> arch/arm/common/dmabounce.c- device_info->small.allocs,
> arch/arm/common/dmabounce.c- device_info->large.allocs,
> arch/arm/common/dmabounce.c- device_info->total_allocs - device_info->small.allocs -
> arch/arm/common/dmabounce.c- device_info->large.allocs,
> arch/arm/common/dmabounce.c- device_info->total_allocs,
> arch/arm/common/dmabounce.c- device_info->map_op_count,
> arch/arm/common/dmabounce.c- device_info->bounce_count);
> arch/arm/common/dmabounce.c-}
>

This will match it (the difference is in the ';'):
@@
identifier f_show =~ "^.*_show$";
identifier dev, attr, buf;
@@

ssize_t f_show(struct device *dev, struct device_attribute *attr, char *buf)

{

<...
- sprintf
+ sysfs_sprintf
(...)
...>
}

Regards,
Denis

2020-08-27 22:48:31

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 15:20 -0700, Kees Cook wrote:
> On Fri, Aug 28, 2020 at 12:01:34AM +0300, Denis Efremov wrote:
> > Just FYI, I've send an addition to the device_attr_show.cocci script[1] to turn
> > simple cases of snprintf (e.g. "%i") to sprintf. Looks like many developers would
> > like it more than changing snprintf to scnprintf. As for me, I don't like the idea
> > of automated altering of the original logic from bounded snprint to unbouded one
> > with sprintf.
>
> Agreed. This just makes me cringe. If the API design declares that when
> a show() callback starts, buf has been allocated with PAGE_SIZE bytes,
> then that's how the logic should proceed, and it should be using
> scnprintf...
>
> show(...) {
> size_t remaining = PAGE_SIZE;
>
> ...
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);
>
> return PAGE_SIZE - remaining;
> }

It seems likely that coccinelle could do those transform
with any of sprintf/snprintf/scnprint too.

Though my bikeshed would use a single function and have
that function know the maximum output size

Something like:

With single line use:

return sysfs_emit(buf, buf, fmt, ...) - buf;

and multi-line use:

char *pos = buf;

pos = sysfs_emit(buf, pos, fmt1, ...);
pos = sysfs_emit(buf, pos, fmt2, ...);
...

return pos - buf;


2020-08-27 22:50:34

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Fri, 2020-08-28 at 01:38 +0300, Denis Efremov wrote:
> > This will match it (the difference is in the ';'):

thanks.


2020-08-28 04:17:04

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, 2020-08-27 at 15:45 -0700, Joe Perches wrote:
> On Thu, 2020-08-27 at 15:20 -0700, Kees Cook wrote:
> > On Fri, Aug 28, 2020 at 12:01:34AM +0300, Denis Efremov wrote:
> > > Just FYI, I've send an addition to the device_attr_show.cocci script[1] to turn
> > > simple cases of snprintf (e.g. "%i") to sprintf. Looks like many developers would
> > > like it more than changing snprintf to scnprintf. As for me, I don't like the idea
> > > of automated altering of the original logic from bounded snprint to unbouded one
> > > with sprintf.
> >
> > Agreed. This just makes me cringe. If the API design declares that when
> > a show() callback starts, buf has been allocated with PAGE_SIZE bytes,
> > then that's how the logic should proceed, and it should be using
> > scnprintf...
> >
> > show(...) {
> > size_t remaining = PAGE_SIZE;
> >
> > ...
> > remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> > remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> > remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> >
> > return PAGE_SIZE - remaining;
> > }
>
> It seems likely that coccinelle could do those transform
> with any of sprintf/snprintf/scnprint too.
>
> Though my bikeshed would use a single function and have
> that function know the maximum output size

Perhaps something like the below with a sample conversion
that uses single and multiple sysfs_emit uses.

I believe coccinelle can _mostly_ automated this.

---
fs/sysfs/file.c | 30 ++++++++++++++++++++++++++++++
include/linux/sysfs.h | 8 ++++++++
kernel/power/main.c | 49 ++++++++++++++++++++++++++-----------------------
3 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index eb6897ab78e7..c0ff3ba8e373 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -707,3 +707,33 @@ int sysfs_change_owner(struct kobject *kobj, kuid_t kuid, kgid_t kgid)
return 0;
}
EXPORT_SYMBOL_GPL(sysfs_change_owner);
+
+/**
+ * sysfs_emit - scnprintf equivalent, aware of PAGE_SIZE buffer.
+ * @buf: start of PAGE_SIZE buffer.
+ * @pos: current position in buffer
+ * (pos - buf) must always be < PAGE_SIZE
+ * @fmt: format
+ * @...: arguments to format
+ *
+ *
+ * Returns number of characters written at pos.
+ */
+int sysfs_emit(char *buf, char *pos, const char *fmt, ...)
+{
+ int len;
+ va_list args;
+
+ WARN(pos < buf, "pos < buf\n");
+ WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
+ pos - buf, PAGE_SIZE);
+ if (pos < buf || pos - buf >= PAGE_SIZE)
+ return 0;
+
+ va_start(args, fmt);
+ len = vscnprintf(pos, PAGE_SIZE - (pos - buf), fmt, args);
+ va_end(args);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(sysfs_emit);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 34e84122f635..5a21d3d30016 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -329,6 +329,8 @@ int sysfs_groups_change_owner(struct kobject *kobj,
int sysfs_group_change_owner(struct kobject *kobj,
const struct attribute_group *groups, kuid_t kuid,
kgid_t kgid);
+__printf(3, 4)
+int sysfs_emit(char *buf, char *pos, const char *fmt, ...);

#else /* CONFIG_SYSFS */

@@ -576,6 +578,12 @@ static inline int sysfs_group_change_owner(struct kobject *kobj,
return 0;
}

+__printf(3, 4)
+static inline int sysfs_emit(char *buf, char *pos, const char *fmt, ...)
+{
+ return 0;
+}
+
#endif /* CONFIG_SYSFS */

static inline int __must_check sysfs_create_file(struct kobject *kobj,
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 40f86ec4ab30..f3fb9f255234 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -100,7 +100,7 @@ int pm_async_enabled = 1;
static ssize_t pm_async_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", pm_async_enabled);
+ return sysfs_emit(buf, buf, "%d\n", pm_async_enabled);
}

static ssize_t pm_async_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -124,7 +124,7 @@ power_attr(pm_async);
static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- char *s = buf;
+ char *pos = buf;
suspend_state_t i;

for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++)
@@ -132,16 +132,16 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr,
const char *label = mem_sleep_states[i];

if (mem_sleep_current == i)
- s += sprintf(s, "[%s] ", label);
+ pos += sysfs_emit(buf, pos, "[%s] ", label);
else
- s += sprintf(s, "%s ", label);
+ pos += sysfs_emit(buf, pos, "%s ", label);
}

/* Convert the last space to a newline if needed. */
- if (s != buf)
- *(s-1) = '\n';
+ if (pos != buf)
+ *(pos - 1) = '\n';

- return (s - buf);
+ return pos - buf;
}

static suspend_state_t decode_suspend_state(const char *buf, size_t n)
@@ -202,7 +202,7 @@ bool sync_on_suspend_enabled = !IS_ENABLED(CONFIG_SUSPEND_SKIP_SYNC);
static ssize_t sync_on_suspend_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", sync_on_suspend_enabled);
+ return sysfs_emit(buf, buf, "%d\n", sync_on_suspend_enabled);
}

static ssize_t sync_on_suspend_store(struct kobject *kobj,
@@ -336,7 +336,7 @@ static ssize_t last_failed_dev_show(struct kobject *kobj,
index %= REC_FAILED_NUM;
last_failed_dev = suspend_stats.failed_devs[index];

- return sprintf(buf, "%s\n", last_failed_dev);
+ return sysfs_emit(buf, buf, "%s\n", last_failed_dev);
}
static struct kobj_attribute last_failed_dev = __ATTR_RO(last_failed_dev);

@@ -350,7 +350,7 @@ static ssize_t last_failed_errno_show(struct kobject *kobj,
index %= REC_FAILED_NUM;
last_failed_errno = suspend_stats.errno[index];

- return sprintf(buf, "%d\n", last_failed_errno);
+ return sysfs_emit(buf, buf, "%d\n", last_failed_errno);
}
static struct kobj_attribute last_failed_errno = __ATTR_RO(last_failed_errno);

@@ -366,7 +366,7 @@ static ssize_t last_failed_step_show(struct kobject *kobj,
step = suspend_stats.failed_steps[index];
last_failed_step = suspend_step_name(step);

- return sprintf(buf, "%s\n", last_failed_step);
+ return sysfs_emit(buf, buf, "%s\n", last_failed_step);
}
static struct kobj_attribute last_failed_step = __ATTR_RO(last_failed_step);

@@ -474,7 +474,7 @@ bool pm_print_times_enabled;
static ssize_t pm_print_times_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pm_print_times_enabled);
+ return sysfs_emit(buf, buf, "%d\n", pm_print_times_enabled);
}

static ssize_t pm_print_times_store(struct kobject *kobj,
@@ -504,7 +504,9 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
- return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA;
+ if (!pm_wakeup_irq)
+ return -ENODATA;
+ return sysfs_emit(buf, buf, "%u\n", pm_wakeup_irq);
}

power_attr_ro(pm_wakeup_irq);
@@ -514,7 +516,7 @@ bool pm_debug_messages_on __read_mostly;
static ssize_t pm_debug_messages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", pm_debug_messages_on);
+ return sysfs_emit(buf, buf, "%d\n", pm_debug_messages_on);
}

static ssize_t pm_debug_messages_store(struct kobject *kobj,
@@ -704,8 +706,9 @@ static ssize_t wakeup_count_show(struct kobject *kobj,
{
unsigned int val;

- return pm_get_wakeup_count(&val, true) ?
- sprintf(buf, "%u\n", val) : -EINTR;
+ if (!pm_get_wakeup_count(&val, true))
+ return -EINTR;
+ return sysfs_emit(buf, buf, "%u\n", val);
}

static ssize_t wakeup_count_store(struct kobject *kobj,
@@ -747,17 +750,17 @@ static ssize_t autosleep_show(struct kobject *kobj,
suspend_state_t state = pm_autosleep_state();

if (state == PM_SUSPEND_ON)
- return sprintf(buf, "off\n");
+ return sysfs_emit(buf, buf, "off\n");

#ifdef CONFIG_SUSPEND
if (state < PM_SUSPEND_MAX)
- return sprintf(buf, "%s\n", pm_states[state] ?
- pm_states[state] : "error");
+ return sysfs_emit(buf, buf, "%s\n",
+ pm_states[state] ?: "error");
#endif
#ifdef CONFIG_HIBERNATION
- return sprintf(buf, "disk\n");
+ return sysfs_emit(buf, buf, "disk\n");
#else
- return sprintf(buf, "error");
+ return sysfs_emit(buf, buf, "error\n");
#endif
}

@@ -826,7 +829,7 @@ int pm_trace_enabled;
static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", pm_trace_enabled);
+ return sysfs_emit(buf, buf, "%d\n", pm_trace_enabled);
}

static ssize_t
@@ -863,7 +866,7 @@ power_attr_ro(pm_trace_dev_match);
static ssize_t pm_freeze_timeout_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
- return sprintf(buf, "%u\n", freeze_timeout_msecs);
+ return sysfs_emit(buf, buf, "%u\n", freeze_timeout_msecs);
}

static ssize_t pm_freeze_timeout_store(struct kobject *kobj,


2020-08-28 07:40:41

by David Laight

[permalink] [raw]
Subject: RE: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

From: Kees Cook
> Sent: 27 August 2020 23:21
...
>
> Agreed. This just makes me cringe. If the API design declares that when
> a show() callback starts, buf has been allocated with PAGE_SIZE bytes,
> then that's how the logic should proceed, and it should be using
> scnprintf...
>
> show(...) {
> size_t remaining = PAGE_SIZE;
>
> ...
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);
> remaining -= scnprintf(buf, remaining, "fmt", var args ...);

Not quite what you had in mind, maybe:
char *end = buf + PAGE_SIZE;

buf += scnprintf(buf, end - buf, ...);

return PAGE_SIZE - (end - buf);

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-08-28 08:00:08

by Kees Cook

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Thu, Aug 27, 2020 at 09:12:06PM -0700, Joe Perches wrote:
> Perhaps something like the below with a sample conversion
> that uses single and multiple sysfs_emit uses.

On quick review, I like it. :)

> [...]
> +int sysfs_emit(char *buf, char *pos, const char *fmt, ...)
> +{
> + int len;
> + va_list args;
> +
> + WARN(pos < buf, "pos < buf\n");
> + WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
> + pos - buf, PAGE_SIZE);
> + if (pos < buf || pos - buf >= PAGE_SIZE)
> + return 0;

This can be:

if (WARN(pos < buf, "pos < buf\n") ||
WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
pos - buf, PAGE_SIZE))
return 0;

--
Kees Cook

2020-08-28 08:11:22

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Fri, 2020-08-28 at 00:58 -0700, Kees Cook wrote:
> On Thu, Aug 27, 2020 at 09:12:06PM -0700, Joe Perches wrote:
> > Perhaps something like the below with a sample conversion
> > that uses single and multiple sysfs_emit uses.
>
> On quick review, I like it. :)
>
> > [...]
> > +int sysfs_emit(char *buf, char *pos, const char *fmt, ...)
> > +{
> > + int len;
> > + va_list args;
> > +
> > + WARN(pos < buf, "pos < buf\n");
> > + WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
> > + pos - buf, PAGE_SIZE);
> > + if (pos < buf || pos - buf >= PAGE_SIZE)
> > + return 0;
>
> This can be:
>
> if (WARN(pos < buf, "pos < buf\n") ||
> WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
> pos - buf, PAGE_SIZE))
> return 0;

I had some vague recollection that WARN could be compiled
away to nothing somehow. True or false?

If false, sure, of course, it'd be faster too.

2020-08-28 08:25:47

by Joe Perches

[permalink] [raw]
Subject: Re: [Cocci] [PATCH] usb: atm: don't use snprintf() for sysfs attrs

On Fri, 2020-08-28 at 01:10 -0700, Joe Perches wrote:
> On Fri, 2020-08-28 at 00:58 -0700, Kees Cook wrote:
> > On Thu, Aug 27, 2020 at 09:12:06PM -0700, Joe Perches wrote:
> > > Perhaps something like the below with a sample conversion
> > > that uses single and multiple sysfs_emit uses.
> >
> > On quick review, I like it. :)
> >
> > > [...]
> > > +int sysfs_emit(char *buf, char *pos, const char *fmt, ...)
> > > +{
> > > + int len;
> > > + va_list args;
> > > +
> > > + WARN(pos < buf, "pos < buf\n");
> > > + WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
> > > + pos - buf, PAGE_SIZE);
> > > + if (pos < buf || pos - buf >= PAGE_SIZE)
> > > + return 0;
> >
> > This can be:
> >
> > if (WARN(pos < buf, "pos < buf\n") ||
> > WARN(pos - buf >= PAGE_SIZE, "pos >= PAGE_SIZE (%tu > %lu)\n",
> > pos - buf, PAGE_SIZE))
> > return 0;
>
> I have some vague recollection that WARN could be compiled
> away to nothing somehow. True or false?
>
> If false, sure, of course, it'd be faster too.

I can't find an instance where WARN doesn't return the
condition.

And likely even faster would be to just show "invalid pos"
instead of specific messages.

if (WARN(pos < buf || (pos - buf) >= PAGE_SIZE,
"Invalid pos\n");
return 0;

or maybe use WARN_ONCE or no WARN at all.