Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_write() can then be used instead.
This saves a few cycles.
To have an estimation of how often this optimization triggers:
$ git grep seq_puts.*\" | wc -l
3391
Signed-off-by: Christophe JAILLET <[email protected]>
---
Checked by comparing the output of a few .s files.
Here is one of these outputs:
$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w
call clk_prepare_unlock #
# drivers/clk/clk.c:3320: seq_puts(s, "}\n");
movq %r12, %rdi # s,
+ movl $2, %edx #,
movq $.LC66, %rsi #,
- call seq_puts #
+ call seq_write #
call __tsan_func_exit #
# drivers/clk/clk.c:3322: }
xorl %eax, %eax #
@@ -34520,6 +34521,7 @@
popq %rbp #
popq %r12 #
--
# drivers/clk/clk.c:3205: seq_puts(s, "-----");
call __sanitizer_cov_trace_pc #
+ movl $5, %edx #,
movq $.LC72, %rsi #,
movq %r13, %rdi # s,
- call seq_puts #
+ call seq_write #
jmp .L2134 #
.L2144:
# drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core);
@@ -35225,20 +35228,23 @@
leaq 240(%r12), %rdi #, tmp95
call __tsan_read8 #
--
movq %r12, %rdi # s,
+ movq $.LC77, %rsi #,
# drivers/clk/clk.c:3244: struct hlist_head **lists = s->private;
movq 240(%r12), %rbp # s_9(D)->private, lists
# drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n");
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
+ movl $142, %edx #,
movq $.LC78, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ movl $142, %edx #,
movq $.LC79, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3251: clk_prepare_lock();
call clk_prepare_lock #
.L2207:
@@ -37511,7 +37517,7 @@
subq $16, %rsp #,
# drivers/clk/clk.c:3082: {
---
fs/seq_file.c | 4 ++--
include/linux/seq_file.h | 10 +++++++++-
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);
-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);
@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
memcpy(m->buf + m->count, s, len);
m->count += len;
}
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);
/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..15abf45d62c5 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+#define seq_puts(m, s) \
+do { \
+ if (__builtin_constant_p(s)) \
+ seq_write(m, s, __builtin_strlen(s)); \
+ else \
+ __seq_puts(m, s); \
+} while (0)
+
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
--
2.34.1
Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_write() can then be used instead.
>
> This saves a few cycles.
>
> To have an estimation of how often this optimization triggers:
> $ git grep seq_puts.*\" | wc -l
> 3391
>
> Signed-off-by: Christophe JAILLET <[email protected]>
Hi,
any feed-back on this small optimisation of seq_puts()?
Most of its usage would be optimized and a strlen() would be saved in
all the corresponding cases.
$ git grep seq_puts.*\" | wc -l
3436
$ git grep seq_puts | wc -l
3644
CJ
> ---
> Checked by comparing the output of a few .s files.
> Here is one of these outputs:
>
> $ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w
>
> call clk_prepare_unlock #
> # drivers/clk/clk.c:3320: seq_puts(s, "}\n");
> movq %r12, %rdi # s,
> + movl $2, %edx #,
> movq $.LC66, %rsi #,
> - call seq_puts #
> + call seq_write #
> call __tsan_func_exit #
> # drivers/clk/clk.c:3322: }
> xorl %eax, %eax #
> @@ -34520,6 +34521,7 @@
> popq %rbp #
> popq %r12 #
> --
> # drivers/clk/clk.c:3205: seq_puts(s, "-----");
> call __sanitizer_cov_trace_pc #
> + movl $5, %edx #,
> movq $.LC72, %rsi #,
> movq %r13, %rdi # s,
> - call seq_puts #
> + call seq_write #
> jmp .L2134 #
> .L2144:
> # drivers/clk/clk.c:1793: return clk_core_get_accuracy_no_lock(core);
> @@ -35225,20 +35228,23 @@
> leaq 240(%r12), %rdi #, tmp95
> call __tsan_read8 #
> --
> movq %r12, %rdi # s,
> + movq $.LC77, %rsi #,
> # drivers/clk/clk.c:3244: struct hlist_head **lists = s->private;
> movq 240(%r12), %rbp # s_9(D)->private, lists
> # drivers/clk/clk.c:3246: seq_puts(s, " enable prepare protect duty hardware connection\n");
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3247: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
> + movl $142, %edx #,
> movq $.LC78, %rsi #,
> movq %r12, %rdi # s,
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3248: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
> + movl $142, %edx #,
> movq $.LC79, %rsi #,
> movq %r12, %rdi # s,
> - call seq_puts #
> + call seq_write #
> # drivers/clk/clk.c:3251: clk_prepare_lock();
> call clk_prepare_lock #
> .L2207:
> @@ -37511,7 +37517,7 @@
> subq $16, %rsp #,
> # drivers/clk/clk.c:3082: {
> ---
> fs/seq_file.c | 4 ++--
> include/linux/seq_file.h | 10 +++++++++-
> 2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/seq_file.c b/fs/seq_file.c
> index f5fdaf3b1572..8ef0a07033ca 100644
> --- a/fs/seq_file.c
> +++ b/fs/seq_file.c
> @@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
> }
> EXPORT_SYMBOL(seq_putc);
>
> -void seq_puts(struct seq_file *m, const char *s)
> +void __seq_puts(struct seq_file *m, const char *s)
> {
> int len = strlen(s);
>
> @@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
> memcpy(m->buf + m->count, s, len);
> m->count += len;
> }
> -EXPORT_SYMBOL(seq_puts);
> +EXPORT_SYMBOL(__seq_puts);
>
> /**
> * seq_put_decimal_ull_width - A helper routine for putting decimal numbers
> diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
> index 234bcdb1fba4..15abf45d62c5 100644
> --- a/include/linux/seq_file.h
> +++ b/include/linux/seq_file.h
> @@ -118,7 +118,15 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
> __printf(2, 3)
> void seq_printf(struct seq_file *m, const char *fmt, ...);
> void seq_putc(struct seq_file *m, char c);
> -void seq_puts(struct seq_file *m, const char *s);
> +void __seq_puts(struct seq_file *m, const char *s);
> +#define seq_puts(m, s) \
> +do { \
> + if (__builtin_constant_p(s)) \
> + seq_write(m, s, __builtin_strlen(s)); \
> + else \
> + __seq_puts(m, s); \
> +} while (0)
> +
> void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
> unsigned long long num, unsigned int width);
> void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
On Mon, Apr 15, 2024 at 10:47:59PM +0200, Christophe JAILLET wrote:
> Le 04/01/2024 ? 14:29, Christophe JAILLET a ?crit?:
> > Most of seq_puts() usages are done with a string literal. In such cases,
> > the length of the string car be computed at compile time in order to save
> > a strlen() call at run-time. seq_write() can then be used instead.
> >
> > This saves a few cycles.
> >
> > To have an estimation of how often this optimization triggers:
> > $ git grep seq_puts.*\" | wc -l
> > 3391
> >
> > Signed-off-by: Christophe JAILLET <[email protected]>
>
> Hi,
>
> any feed-back on this small optimisation of seq_puts()?
> > +#define seq_puts(m, s) \
> > +do { \
> > + if (__builtin_constant_p(s)) \
> > + seq_write(m, s, __builtin_strlen(s)); \
> > + else \
> > + __seq_puts(m, s); \
> > +} while (0)
> > +
No need to make it a macro, actually. And I would suggest going
a bit further:
static inline void seq_puts(struct seq_file *m, const char *s)
{
if (!__builtin_constant_p(*s))
__seq_puts(m, s);
else if (s[0] && !s[1])
seq_putc(m, s[0]);
else
seq_write(m, s, __builtin_strlen(s));
}
From: Al Viro
> Sent: 15 April 2024 22:01
..
> No need to make it a macro, actually. And I would suggest going
> a bit further:
>
> static inline void seq_puts(struct seq_file *m, const char *s)
That probably needs to be 'always_inline'.
> {
> if (!__builtin_constant_p(*s))
> __seq_puts(m, s);
> else if (s[0] && !s[1])
> seq_putc(m, s[0]);
> else
> seq_write(m, s, __builtin_strlen(s));
> }
You missed seq_puts(m, "");
I did wonder about checking sizeof(s) <= 2 in the #define version.
That would pick up the cases where a separator is changed/added
in a loop.
Could you do:
size_t len = __builtin_strlen(s);
if (!__builtin_constant_p(len))
__seq_puts(m, s);
else switch (len){
case 0: break;
case 1: seq_putc(m, s[0]);
default: seq_write(m, s, len);
}
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
On 15/04/2024 22.47, Christophe JAILLET wrote:
> Le 04/01/2024 à 14:29, Christophe JAILLET a écrit :
>> Most of seq_puts() usages are done with a string literal. In such cases,
>> the length of the string car be computed at compile time in order to save
>> a strlen() call at run-time. seq_write() can then be used instead.
>>
>> This saves a few cycles.
>>
>> To have an estimation of how often this optimization triggers:
>> $ git grep seq_puts.*\" | wc -l
>> 3391
>>
>> Signed-off-by: Christophe JAILLET <[email protected]>
>
> Hi,
>
> any feed-back on this small optimisation of seq_puts()?
While you're at it, could you change the implementation of the
out-of-line seq_puts (or __seq_puts if it gets renamed to that) to
simply be seq_write(seq, s, strlen(s)) instead of duplicating the
overflow/memcpy logic.
Rasmus
On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:
> > static inline void seq_puts(struct seq_file *m, const char *s)
>
> That probably needs to be 'always_inline'.
What for? If compiler fails to inline it (and I'd be very surprised
if that happened - if s is not a constant string, we get a straight call
of __seq_puts() and for constant strings it boils down to call of
seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
would happen; we'd still get correct behaviour.
> > {
> > if (!__builtin_constant_p(*s))
> > __seq_puts(m, s);
> > else if (s[0] && !s[1])
> > seq_putc(m, s[0]);
> > else
> > seq_write(m, s, __builtin_strlen(s));
> > }
>
> You missed seq_puts(m, "");
Where have you seen one? And if it gets less than optimal, who cares?
> Could you do:
> size_t len = __builtin_strlen(s);
> if (!__builtin_constant_p(len))
> __seq_puts(m, s);
> else switch (len){
> case 0: break;
> case 1: seq_putc(m, s[0]);
> default: seq_write(m, s, len);
> }
Umm... That's probably OK, but I wonder how useful would that
be...
Le 17/04/2024 à 03:04, Al Viro a écrit :
> On Tue, Apr 16, 2024 at 08:56:51PM +0000, David Laight wrote:
>
>>> static inline void seq_puts(struct seq_file *m, const char *s)
>>
>> That probably needs to be 'always_inline'.
>
> What for? If compiler fails to inline it (and I'd be very surprised
> if that happened - if s is not a constant string, we get a straight call
> of __seq_puts() and for constant strings it boils down to call of
> seq_putc(m, constant) or seq_write(m, s, constant)), nothing bad
> would happen; we'd still get correct behaviour.
>
>>> {
>>> if (!__builtin_constant_p(*s))
>>> __seq_puts(m, s);
>>> else if (s[0] && !s[1])
>>> seq_putc(m, s[0]);
>>> else
>>> seq_write(m, s, __builtin_strlen(s));
>>> }
>>
>> You missed seq_puts(m, "");
>
> Where have you seen one?
Based on results from:
git grep seq_puts.*\"\"
there is no such cases.
> And if it gets less than optimal, who cares?
>
>> Could you do:
>> size_t len = __builtin_strlen(s);
>> if (!__builtin_constant_p(len))
>> __seq_puts(m, s);
>> else switch (len){
>> case 0: break;
>> case 1: seq_putc(m, s[0]);
missing break;
>> default: seq_write(m, s, len);
>> }
>
> Umm... That's probably OK, but I wonder how useful would that
> be...
>
Thanks all for your feedback.
I'll send a v2.
CJ
>
Le 16/04/2024 à 22:56, David Laight a écrit :
> From: Al Viro
>> Sent: 15 April 2024 22:01
> ...
>> No need to make it a macro, actually. And I would suggest going
>> a bit further:
>>
>> static inline void seq_puts(struct seq_file *m, const char *s)
>
> That probably needs to be 'always_inline'.
>
>> {
>> if (!__builtin_constant_p(*s))
>> __seq_puts(m, s);
>> else if (s[0] && !s[1])
>> seq_putc(m, s[0]);
>> else
>> seq_write(m, s, __builtin_strlen(s));
>> }
>
> You missed seq_puts(m, "");
>
> I did wonder about checking sizeof(s) <= 2 in the #define version.
git grep seq_puts.*\"[^\\].\" | wc -l
77
What would you do in this case?
2 seq_putc() in order to save a memcpy(..., 2), that's it?
It would also slightly change the behaviour, as only the 1st char could
be added. Actually, it is all or nothing.
Don't know what is the best.
Any thought?
CJ
> That would pick up the cases where a separator is changed/added
> in a loop.
>
> Could you do:
> size_t len = __builtin_strlen(s);
> if (!__builtin_constant_p(len))
> __seq_puts(m, s);
> else switch (len){
> case 0: break;
> case 1: seq_putc(m, s[0]);
> default: seq_write(m, s, len);
> }
>
> David
>
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
>
>
>
On Fri, Apr 19, 2024 at 10:38:15PM +0200, Christophe JAILLET wrote:
> Le 16/04/2024 ? 22:56, David Laight a ?crit?:
> > From: Al Viro
> > > Sent: 15 April 2024 22:01
> > ...
> > > No need to make it a macro, actually. And I would suggest going
> > > a bit further:
> > >
> > > static inline void seq_puts(struct seq_file *m, const char *s)
> >
> > That probably needs to be 'always_inline'.
> >
> > > {
> > > if (!__builtin_constant_p(*s))
> > > __seq_puts(m, s);
> > > else if (s[0] && !s[1])
> > > seq_putc(m, s[0]);
> > > else
> > > seq_write(m, s, __builtin_strlen(s));
> > > }
> >
> > You missed seq_puts(m, "");
> >
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
>
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
>
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?
Not a damn thing - just have it call seq_write(). Note that
if (s[0] && !s[1])
which triggers only on single-character strings.
From: Christophe JAILLET
> Sent: 19 April 2024 21:38
...
> > I did wonder about checking sizeof(s) <= 2 in the #define version.
>
> git grep seq_puts.*\"[^\\].\" | wc -l
> 77
>
> What would you do in this case?
> 2 seq_putc() in order to save a memcpy(..., 2), that's it?
>
> It would also slightly change the behaviour, as only the 1st char could
> be added. Actually, it is all or nothing.
Doing:
if (sizeof(str) == 2 && str[0])
seq_putc(m. str[0]);
else
__seq_puts(m, str);
Would pick up loops that do:
char sep[2] = "";
for (;; sep[0] = ',') {
...
seq_puts(m, sep);
...
}
as well as seq_puts(m, "x");
Whether that is worthwhile is another matter.
But it might be used.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)