2019-05-23 17:30:05

by Fredrik Noring

[permalink] [raw]
Subject: [PATCH RFC] sscanf: Fix integer overflow with sscanf field width

Hi,

This fixes 53809751ac23 ("sscanf: don't ignore field widths for numeric
conversions"). sscanf overflows integers with simple strings such as dates.
As an example, consider

r = sscanf("20190523123456", "%4d%2d%2d%2d%2d%2d",
&year, &month, &day,
&hour, &minute, &second);

pr_info("%d %04d-%02d-%2d %02d:%02d:%02d\n",
r,
year, month, day,
hour, minute, second);

On a 32-bit machine this prints

6 0000-05-23 12:34:56

where the year is zero, and not 2019 as expected. The reason is that sscanf
attempts to read 20190523123456 as a whole integer and then divide it with
10^10 to obtain 2019, which obviously fails. Of course, 64-bit machines fail
similarly on longer numerical strings.

I'm offering a simple patch to correct this below. The idea is to have a
variant of _parse_integer() called _parse_integer_end(), with the ability
to stop consuming digits. The functions

simple_{strtol,strtoll,strtoul,strtoull}()

now have the corresponding

sscanf_{strtol,strtoll,strtoul,strtoull}()

taking a field width into account. There are some code duplication issues
etc. so one might consider making more extensive changes than these.

What are your thoughts?

Fredrik

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -45,14 +45,15 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
*
* Don't you dare use this function.
*/
-unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+unsigned int _parse_integer_end(const char *s, const char *e,
+ unsigned int base, unsigned long long *p)
{
unsigned long long res;
unsigned int rv;

res = 0;
rv = 0;
- while (1) {
+ while (!e || s < e) {
unsigned int c = *s;
unsigned int lc = c | 0x20; /* don't tolower() this line */
unsigned int val;
@@ -82,6 +83,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
return rv;
}

+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+ return _parse_integer_end(s, NULL, base, p);
+}
+
static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
{
unsigned long long _res;
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
--- a/lib/kstrtox.h
+++ b/lib/kstrtox.h
@@ -4,6 +4,8 @@

#define KSTRTOX_OVERFLOW (1U << 31)
const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer_end(const char *s, const char *e,
+ unsigned int base, unsigned long long *p);
unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);

#endif
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -123,6 +123,48 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base)
}
EXPORT_SYMBOL(simple_strtoll);

+static unsigned long long sscanf_strtoull(const char *cp, int field_width,
+ char **endp, unsigned int base)
+{
+ const char *e = field_width > 0 ? &cp[field_width] : NULL;
+ unsigned long long result;
+ unsigned int rv;
+
+ cp = _parse_integer_fixup_radix(cp, &base);
+ rv = _parse_integer_end(cp, e, base, &result);
+ /* FIXME */
+ cp += (rv & ~KSTRTOX_OVERFLOW);
+
+ if (endp)
+ *endp = (char *)cp;
+
+ return result;
+}
+
+static unsigned long sscanf_strtoul(const char *cp, int field_width,
+ char **endp, unsigned int base)
+{
+ return sscanf_strtoull(cp, field_width, endp, base);
+}
+
+static long sscanf_strtol(const char *cp, int field_width,
+ char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -sscanf_strtoul(cp + 1, field_width - 1, endp, base);
+
+ return sscanf_strtoul(cp, field_width, endp, base);
+}
+
+static long long sscanf_strtoll(const char *cp, int field_width,
+ char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -sscanf_strtoull(cp + 1, field_width - 1, endp, base);
+
+ return sscanf_strtoull(cp, field_width, endp, base);
+}
+
static noinline_for_stack
int skip_atoi(const char **s)
{
@@ -3330,24 +3372,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args)

if (is_sign)
val.s = qualifier != 'L' ?
- simple_strtol(str, &next, base) :
- simple_strtoll(str, &next, base);
+ sscanf_strtol(str, field_width, &next, base) :
+ sscanf_strtoll(str, field_width, &next, base);
else
val.u = qualifier != 'L' ?
- simple_strtoul(str, &next, base) :
- simple_strtoull(str, &next, base);
-
- if (field_width > 0 && next - str > field_width) {
- if (base == 0)
- _parse_integer_fixup_radix(str, &base);
- while (next - str > field_width) {
- if (is_sign)
- val.s = div_s64(val.s, base);
- else
- val.u = div_u64(val.u, base);
- --next;
- }
- }
+ sscanf_strtoul(str, field_width, &next, base) :
+ sscanf_strtoull(str, field_width, &next, base);

switch (qualifier) {
case 'H': /* that's 'hh' in format */


2019-05-23 18:08:55

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH RFC] sscanf: Fix integer overflow with sscanf field width

> What are your thoughts?

Don't use sscanf(3), it is misdesigned.

2019-05-24 07:28:04

by Jan Beulich

[permalink] [raw]
Subject: Re: [PATCH RFC] sscanf: Fix integer overflow with sscanf field width

>>> On 23.05.19 at 19:27, <[email protected]> wrote:
> This fixes 53809751ac23 ("sscanf: don't ignore field widths for numeric
> conversions"). sscanf overflows integers with simple strings such as dates.
> As an example, consider
>
> r = sscanf("20190523123456", "%4d%2d%2d%2d%2d%2d",
> &year, &month, &day,
> &hour, &minute, &second);
>
> pr_info("%d %04d-%02d-%2d %02d:%02d:%02d\n",
> r,
> year, month, day,
> hour, minute, second);
>
> On a 32-bit machine this prints
>
> 6 0000-05-23 12:34:56
>
> where the year is zero, and not 2019 as expected. The reason is that sscanf
> attempts to read 20190523123456 as a whole integer and then divide it with
> 10^10 to obtain 2019, which obviously fails. Of course, 64-bit machines fail
> similarly on longer numerical strings.

Right, and that's also what that commit's description says remains as
non-conforming behavior.

> I'm offering a simple patch to correct this below. The idea is to have a
> variant of _parse_integer() called _parse_integer_end(), with the ability
> to stop consuming digits. The functions
>
> simple_{strtol,strtoll,strtoul,strtoull}()
>
> now have the corresponding
>
> sscanf_{strtol,strtoll,strtoul,strtoull}()
>
> taking a field width into account. There are some code duplication issues
> etc. so one might consider making more extensive changes than these.

I'm not the maintainer here, but to me it looks mostly okay.

> +static long sscanf_strtol(const char *cp, int field_width,
> + char **endp, unsigned int base)
> +{
> + if (*cp == '-')
> + return -sscanf_strtoul(cp + 1, field_width - 1, endp, base);

I'm afraid you may neither convert a field width of zero to -1 here,
nor convert a field width of 1 to zero (unlimited).

I'd also like to note that the 'u' and 'x' format characters also accept
a sign as per the standard, but that's an orthogonal issue which you
may or may not want to address at the same time.

Jan