2024-04-22 10:25:58

by Christophe JAILLET

[permalink] [raw]
Subject: [PATCH v2 1/2] seq_file: Optimize seq_puts()

Most of seq_puts() usages are done with a string literal. In such cases,
the length of the string car be computed at compile time in order to save
a strlen() call at run-time. seq_putc() or seq_write() can then be used
instead.

This saves a few cycles.

To have an estimation of how often this optimization triggers:
$ git grep seq_puts.*\" | wc -l
3436

$ git grep seq_puts.*\".\" | wc -l
84

Signed-off-by: Christophe JAILLET <[email protected]>
---
Changes in v2:
- Use a function, instead of a macro [Al Viro]
- Handle the case of 1 char only strings, in order to use seq_putc() [Al Viro]
- Use __always_inline [David Laight]

V1: https://lore.kernel.org/all/5c4f7ad7b88f5026940efa9c8be36a58755ec1b3.1704374916.git.christophe.jaillet@wanadoo.fr/


Checked by comparing the output of a few .s files.
Here is one of these outputs:

$ diff -u drivers/clk/clk.s.old drivers/clk/clk.s | grep -C6 seq_w

.L2918:
@@ -46072,10 +46073,11 @@
call clk_prepare_unlock #
# drivers/clk/clk.c:3438: clk_pm_runtime_put_all();
call clk_pm_runtime_put_all #
-# drivers/clk/clk.c:3440: seq_puts(s, "}\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $2, %edx #,
movq $.LC94, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
# drivers/clk/clk.c:3441: return 0;
jmp .L2917 #
.size clk_dump_show, .-clk_dump_show
@@ -46200,7 +46202,7 @@
leaq 128(%rbx), %r15 #, _97
movq %r15, %rdi # _97,
--
# drivers/clk/clk.c:1987: __clk_recalc_rates(core, false, 0);
@@ -46480,15 +46482,17 @@
call seq_printf #
jmp .L2950 #
.L2946:
-# drivers/clk/clk.c:3315: seq_puts(s, "-----");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
call __sanitizer_cov_trace_pc #
+ movl $5, %edx #,
movq $.LC100, %rsi #,
movq %rbp, %rdi # s,
- call seq_puts #
+ call seq_write #
+# ./include/linux/seq_file.h:131: }
jmp .L2947 #
.L2957:
# drivers/clk/clk.c:1903: return clk_core_get_accuracy_no_lock(core);
- xorl %r14d, %r14d # _34
+ xorl %r14d, %r14d # _35
--
@@ -46736,21 +46740,22 @@
call __sanitizer_cov_trace_pc #
leaq 240(%r12), %rdi #, tmp101
call __tsan_read8 #
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- movq $.LC104, %rsi #,
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ movl $142, %edx #,
movq %r12, %rdi # s,
+ movq $.LC104, %rsi #,
# drivers/clk/clk.c:3352: struct hlist_head **lists = s->private;
movq 240(%r12), %r13 # s_10(D)->private, lists
-# drivers/clk/clk.c:3355: seq_puts(s, " enable prepare protect duty hardware connection\n");
- call seq_puts #
-# drivers/clk/clk.c:3356: seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n");
+# ./include/linux/seq_file.h:130: seq_write(m, s, __builtin_strlen(s));
+ call seq_write #
+ movl $142, %edx #,
movq $.LC105, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
-# drivers/clk/clk.c:3357: seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n");
+ call seq_write #
movq $.LC106, %rsi #,
movq %r12, %rdi # s,
- call seq_puts #
+ movl $142, %edx #,
+ call seq_write #
# drivers/clk/clk.c:3359: ret = clk_pm_runtime_get_all();
call clk_pm_runtime_get_all #
# drivers/clk/clk.c:3360: if (ret)
@@ -57943,7 +57948,7 @@
subq $88, %rsp #,
# drivers/clk/clk.c:5338: {


The output for seq_putc() generation has also be checked and works.
---
fs/seq_file.c | 4 ++--
include/linux/seq_file.h | 13 ++++++++++++-
2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index f5fdaf3b1572..8ef0a07033ca 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -669,7 +669,7 @@ void seq_putc(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_putc);

-void seq_puts(struct seq_file *m, const char *s)
+void __seq_puts(struct seq_file *m, const char *s)
{
int len = strlen(s);

@@ -680,7 +680,7 @@ void seq_puts(struct seq_file *m, const char *s)
memcpy(m->buf + m->count, s, len);
m->count += len;
}
-EXPORT_SYMBOL(seq_puts);
+EXPORT_SYMBOL(__seq_puts);

/**
* seq_put_decimal_ull_width - A helper routine for putting decimal numbers
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 234bcdb1fba4..8bd4fda6e027 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -118,7 +118,18 @@ void seq_vprintf(struct seq_file *m, const char *fmt, va_list args);
__printf(2, 3)
void seq_printf(struct seq_file *m, const char *fmt, ...);
void seq_putc(struct seq_file *m, char c);
-void seq_puts(struct seq_file *m, const char *s);
+void __seq_puts(struct seq_file *m, const char *s);
+
+static __always_inline void seq_puts(struct seq_file *m, const char *s)
+{
+ if (!__builtin_constant_p(*s))
+ __seq_puts(m, s);
+ else if (s[0] && !s[1])
+ seq_putc(m, s[0]);
+ else
+ seq_write(m, s, __builtin_strlen(s));
+}
+
void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter,
unsigned long long num, unsigned int width);
void seq_put_decimal_ull(struct seq_file *m, const char *delimiter,
--
2.44.0



2024-05-02 14:31:52

by Christian Brauner

[permalink] [raw]
Subject: Re: [PATCH v2 1/2] seq_file: Optimize seq_puts()

On Mon, 22 Apr 2024 12:24:06 +0200, Christophe JAILLET wrote:
> Most of seq_puts() usages are done with a string literal. In such cases,
> the length of the string car be computed at compile time in order to save
> a strlen() call at run-time. seq_putc() or seq_write() can then be used
> instead.
>
> This saves a few cycles.
>
> [...]

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree: https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/2] seq_file: Optimize seq_puts()
https://git.kernel.org/vfs/vfs/c/45751097aeb3
[2/2] seq_file: Simplify __seq_puts()
https://git.kernel.org/vfs/vfs/c/e035af9f6eba