2024-01-04 13:37:43

by Christophe JAILLET

[permalink] [raw]
Subject: [PATCH] seq_file: Optimize seq_puts()

Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_write() can then be used instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
$ git grep seq_puts.*\" | wc -l
3391

Signed-off-by: Christophe JAILLET <[email protected]>
---
Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

call clk_prepare_unlock #
# drivers/clk/clk.c:3320: seq_puts(s, "}\n");
movq %r12, %rdi # s,
+ movl $2, %edx #,
movq $.LC66, %rsi #,
- call seq_puts #
+ call seq_write #
call __tsan_func_exit #
# drivers/clk/clk.c:3322: }
xorl %eax, %eax #
@@ -34520,6 +34521,7 @@
popq %rbp #
popq %r12 #
--
# drivers/clk/clk.c:3205: seq_puts(s, "-----");
call __sanitizer_cov_trace_pc #
+ movl $5, %edx #,
movq $.LC72, %rsi #,
movq %r13, %rdi # s,
- call seq_puts #
+ call seq_write #
jmp .L2134 #
.L2144:
# drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core);
@@ -35225,20 +35228,23 @@
leaq 240(%r12), %rdi #, tmp95
call __tsan_read8 #
--
movq %r12, %rdi # s,
+ movq $.LC77, %rsi #,
# drivers/clk/clk.c:3244: struct hlist_head **lists = s->private;
movq 240(%r12), %rbp # s_9(D)->private, lists
# drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n");
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
+ movl $142, %edx #,
movq $.LC78, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ movl $142, %edx #,
movq $.LC79, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3251: clk_prepare_lock();
call clk_prepare_lock #
.L2207:
@@ -37511,7 +37517,7 @@
subq $16, %rsp #,
# drivers/clk/clk.c:3082: {
---
fs/seq_file.c | 4 ++--
include/linux/seq_file.h | 10 +++++++++-
2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);

-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);

@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
memcpy(m->buf + m->count, s, len);
m->count += len;
}
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);

/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..15abf45d62c5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+#define seq_puts(m, s) \
+do { \
+ if (__builtin_constant_p(s)) \
+ seq_write(m, s, __builtin_strlen(s)); \
+ else \
+ __seq_puts(m, s); \
+} while (0)
+
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
--
2.34.1



2024-04-15 20:48:26

by Christophe JAILLET

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_write() can then be used instead.
>
> This saves a few cycles.
>
> To have an estimation of how often this optimization triggers:
> $ git grep seq_puts.*\" | wc -l
> 3391
>
> Signed-off-by: Christophe JAILLET <[email protected]>

Hi,

any feed-back on this small optimisation of seq_puts()?

Most of its usage would be optimized and a strlen() would be saved in
all the corresponding cases.


$ git grep seq_puts.*\" | wc -l
3436

$ git grep seq_puts | wc -l
3644

CJ

> ---
> Checked by comparing the output of a few .s files.
> Here is one of these outputs:
>
> $ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w
>
> call clk_prepare_unlock #
> # drivers/clk/clk.c:3320: seq_puts(s, "}\n");
> movq %r12, %rdi # s,
> + movl $2, %edx #,
> movq $.LC66, %rsi #,
> - call seq_puts #
> + call seq_write #
> call __tsan_func_exit #
> # drivers/clk/clk.c:3322: }
> xorl %eax, %eax #
> @@ -34520,6 +34521,7 @@
> popq %rbp #
> popq %r12 #
> --
> # drivers/clk/clk.c:3205: seq_puts(s, "-----");
> call __sanitizer_cov_trace_pc #
> + movl $5, %edx #,
> movq $.LC72, %rsi #,
> movq %r13, %rdi # s,
> - call seq_puts #
> + call seq_write #
> jmp .L2134 #
> .L2144:
> # drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core);
> @@ -35225,20 +35228,23 @@
> leaq 240(%r12), %rdi #, tmp95
> call __tsan_read8 #
> --
> movq %r12, %rdi # s,
> + movq $.LC77, %rsi #,
> # drivers/clk/clk.c:3244: struct hlist_head **lists = s->private;
> movq 240(%r12), %rbp # s_9(D)->private, lists
> # drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n");
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
> + movl $142, %edx #,
> movq $.LC78, %rsi #,
> movq %r12, %rdi # s,
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
> + movl $142, %edx #,
> movq $.LC79, %rsi #,
> movq %r12, %rdi # s,
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3251: clk_prepare_lock();
> call clk_prepare_lock #
> .L2207:
> @@ -37511,7 +37517,7 @@
> subq $16, %rsp #,
> # drivers/clk/clk.c:3082: {
> ---
> fs/seq_file.c | 4 ++--
> include/linux/seq_file.h | 10 +++++++++-
> 2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index f5fdaf3b1572..8ef0a07033ca 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
> }
> EXPORT_SYMBOL(seq_putc);
>
> -void seq_puts(struct seq_file *m, const char *s)
> +void __seq_puts(struct seq_file *m, const char *s)
> {
> int len = strlen(s);
>
> @@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
> memcpy(m->buf + m->count, s, len);
> m->count += len;
> }
> -EXPORT_SYMBOL(seq_puts);
> +EXPORT_SYMBOL(__seq_puts);
>
> /**
> * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
> diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
> index 234bcdb1fba4..15abf45d62c5 100644
> --- a/include/linux/seq_file.h
> +++ b/include/linux/seq_file.h
> @@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
> __printf(2, 3)
> void seq_printf(struct seq_file *m, const char *fmt, ...);
> void seq_putc(struct seq_file *m, char c);
> -void seq_puts(struct seq_file *m, const char *s);
> +void __seq_puts(struct seq_file *m, const char *s);
> +#define seq_puts(m, s) \
> +do { \
> + if (__builtin_constant_p(s)) \
> + seq_write(m, s, __builtin_strlen(s)); \
> + else \
> + __seq_puts(m, s); \
> +} while (0)
> +
> void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
> unsigned long long num, unsigned int width);
> void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,


2024-04-15 22:11:34

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

On Mon, Apr 15, 2024 at 10:47:59PM +0200, Christophe JAILLET wrote:
> Le 04/01/2024 ? 14:29, Christophe JAILLET a ?crit?:
> > Most of seq_puts() usages are done with a string literal. In such cases,
> > the length of the string car be computed at compile time in order to save
> > a strlen() call at run-time. seq_write() can then be used instead.
> >
> > This saves a few cycles.
> >
> > To have an estimation of how often this optimization triggers:
> > $ git grep seq_puts.*\" | wc -l
> > 3391
> >
> > Signed-off-by: Christophe JAILLET <[email protected]>
>
> Hi,
>
> any feed-back on this small optimisation of seq_puts()?

> > +#define seq_puts(m, s) \
> > +do { \
> > + if (__builtin_constant_p(s)) \
> > + seq_write(m, s, __builtin_strlen(s)); \
> > + else \
> > + __seq_puts(m, s); \
> > +} while (0)
> > +

No need to make it a macro, actually. And I would suggest going
a bit further:

static inline void seq_puts(struct seq_file *m, const char *s)
{
if (!__builtin_constant_p(*s))
__seq_puts(m, s);
else if (s[0] && !s[1])
seq_putc(m, s[0]);
else
seq_write(m, s, __builtin_strlen(s));
}

2024-04-16 20:57:54

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] seq_file: Optimize seq_puts()

From: Al Viro
> Sent: 15 April 2024 22:01
..
> No need to make it a macro, actually. And I would suggest going
> a bit further:
>
> static inline void seq_puts(struct seq_file *m, const char *s)

That probably needs to be 'always_inline'.

> {
> if (!__builtin_constant_p(*s))
> __seq_puts(m, s);
> else if (s[0] && !s[1])
> seq_putc(m, s[0]);
> else
> seq_write(m, s, __builtin_strlen(s));
> }

You missed seq_puts(m, "");

I did wonder about checking sizeof(s) <= 2 in the #define version.
That would pick up the cases where a separator is changed/added
in a loop.

Could you do:
size_t len = __builtin_strlen(s);
if (!__builtin_constant_p(len))
__seq_puts(m, s);
else switch (len){
case 0: break;
case 1: seq_putc(m, s[0]);
default: seq_write(m, s, len);
}

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)


2024-04-17 07:54:23

by Rasmus Villemoes

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

On 15/04/2024 22.47, Christophe JAILLET wrote:
> Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
>> Most of seq_puts() usages are done with a string literal. In such cases,
>> the length of the string car be computed at compile time in order to save
>> a strlen() call at run-time. seq_write() can then be used instead.
>>
>> This saves a few cycles.
>>
>> To have an estimation of how often this optimization triggers:
>>     $ git grep seq_puts.*\" | wc -l
>>     3391
>>
>> Signed-off-by: Christophe JAILLET <[email protected]>
>
> Hi,
>
> any feed-back on this small optimisation of seq_puts()?

While you're at it, could you change the implementation of the
out-of-line seq_puts (or __seq_puts if it gets renamed to that) to
simply be seq_write(seq, s, strlen(s)) instead of duplicating the
overflow/memcpy logic.

Rasmus


2024-04-17 08:12:35

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:

> > static inline void seq_puts(struct seq_file *m, const char *s)
>
> That probably needs to be 'always_inline'.

What for? If compiler fails to inline it (and I'd be very surprised
if that happened - if s is not a constant string, we get a straight call
of __seq_puts() and for constant strings it boils down to call of
seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
would happen; we'd still get correct behaviour.

> > {
> > if (!__builtin_constant_p(*s))
> > __seq_puts(m, s);
> > else if (s[0] && !s[1])
> > seq_putc(m, s[0]);
> > else
> > seq_write(m, s, __builtin_strlen(s));
> > }
>
> You missed seq_puts(m, "");

Where have you seen one? And if it gets less than optimal, who cares?

> Could you do:
> size_t len = __builtin_strlen(s);
> if (!__builtin_constant_p(len))
> __seq_puts(m, s);
> else switch (len){
> case 0: break;
> case 1: seq_putc(m, s[0]);
> default: seq_write(m, s, len);
> }

Umm... That's probably OK, but I wonder how useful would that
be...

2024-04-19 18:59:47

by Christophe JAILLET

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

Le 17/04/2024 à 03:04, Al Viro a écrit :
> On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:
>
>>> static inline void seq_puts(struct seq_file *m, const char *s)
>>
>> That probably needs to be 'always_inline'.
>
> What for? If compiler fails to inline it (and I'd be very surprised
> if that happened - if s is not a constant string, we get a straight call
> of __seq_puts() and for constant strings it boils down to call of
> seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
> would happen; we'd still get correct behaviour.
>
>>> {
>>> if (!__builtin_constant_p(*s))
>>> __seq_puts(m, s);
>>> else if (s[0] && !s[1])
>>> seq_putc(m, s[0]);
>>> else
>>> seq_write(m, s, __builtin_strlen(s));
>>> }
>>
>> You missed seq_puts(m, "");
>
> Where have you seen one?

Based on results from:
git grep seq_puts.*\"\"

there is no such cases.


> And if it gets less than optimal, who cares?
>
>> Could you do:
>> size_t len = __builtin_strlen(s);
>> if (!__builtin_constant_p(len))
>> __seq_puts(m, s);
>> else switch (len){
>> case 0: break;
>> case 1: seq_putc(m, s[0]);

missing break;

>> default: seq_write(m, s, len);
>> }
>
> Umm... That's probably OK, but I wonder how useful would that
> be...
>

Thanks all for your feedback.

I'll send a v2.

CJ

>


2024-04-19 20:47:49

by Christophe JAILLET

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

Le 16/04/2024 à 22:56, David Laight a écrit :
> From: Al Viro
>> Sent: 15 April 2024 22:01
> ...
>> No need to make it a macro, actually. And I would suggest going
>> a bit further:
>>
>> static inline void seq_puts(struct seq_file *m, const char *s)
>
> That probably needs to be 'always_inline'.
>
>> {
>> if (!__builtin_constant_p(*s))
>> __seq_puts(m, s);
>> else if (s[0] && !s[1])
>> seq_putc(m, s[0]);
>> else
>> seq_write(m, s, __builtin_strlen(s));
>> }
>
> You missed seq_puts(m, "");
>
> I did wonder about checking sizeof(s) <= 2 in the #define version.

git grep seq_puts.*\"[^\\].\" | wc -l
77

What would you do in this case?
2 seq_putc() in order to save a memcpy(..., 2), that's it?

It would also slightly change the behaviour, as only the 1st char could
be added. Actually, it is all or nothing.

Don't know what is the best.

Any thought?

CJ


> That would pick up the cases where a separator is changed/added
> in a loop.
>
> Could you do:
> size_t len = __builtin_strlen(s);
> if (!__builtin_constant_p(len))
> __seq_puts(m, s);
> else switch (len){
> case 0: break;
> case 1: seq_putc(m, s[0]);
> default: seq_write(m, s, len);
> }
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>
>
>


2024-04-20 02:21:48

by Al Viro

[permalink] [raw]
Subject: Re: [PATCH] seq_file: Optimize seq_puts()

On Fri, Apr 19, 2024 at 10:38:15PM +0200, Christophe JAILLET wrote:
> Le 16/04/2024 ? 22:56, David Laight a ?crit?:
> > From: Al Viro
> > > Sent: 15 April 2024 22:01
> > ...
> > > No need to make it a macro, actually. And I would suggest going
> > > a bit further:
> > >
> > > static inline void seq_puts(struct seq_file *m, const char *s)
> >
> > That probably needs to be 'always_inline'.
> >
> > > {
> > > if (!__builtin_constant_p(*s))
> > > __seq_puts(m, s);
> > > else if (s[0] && !s[1])
> > > seq_putc(m, s[0]);
> > > else
> > > seq_write(m, s, __builtin_strlen(s));
> > > }
> >
> > You missed seq_puts(m, "");
> >
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
>
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
>
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?

Not a damn thing - just have it call seq_write(). Note that
if (s[0] && !s[1])
which triggers only on single-character strings.

2024-04-21 17:22:32

by David Laight

[permalink] [raw]
Subject: RE: [PATCH] seq_file: Optimize seq_puts()

From: Christophe JAILLET
> Sent: 19 April 2024 21:38
...
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
>
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
>
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?
>
> It would also slightly change the behaviour, as only the 1st char could
> be added. Actually, it is all or nothing.

Doing:
if (sizeof(str) == 2 && str[0])
seq_putc(m. str[0]);
else
__seq_puts(m, str);
Would pick up loops that do:
char sep[2] = "";

for (;; sep[0] = ',') {
...
seq_puts(m, sep);
...
}
as well as seq_puts(m, "x");

Whether that is worthwhile is another matter.
But it might be used.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)