2022-03-11 23:09:13

by Alexey Dobriyan

[permalink] [raw]
Subject: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

Avoid zero length check with clever whitespace placement in the format
string.

Signed-off-by: Alexey Dobriyan (CloudLinux) <[email protected]>
---
arch/x86/kernel/alternative.c | 21 +++++++++------------
1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5007c3ffe96f..6c9758ee6810 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -66,13 +66,10 @@ do { \
if (unlikely(debug_alternative)) { \
int j; \
\
- if (!(len)) \
- break; \
- \
printk(KERN_DEBUG pr_fmt(fmt), ##args); \
- for (j = 0; j < (len) - 1; j++) \
- printk(KERN_CONT "%02hhx ", buf[j]); \
- printk(KERN_CONT "%02hhx\n", buf[j]); \
+ for (j = 0; j < (len); j++) \
+ printk(KERN_CONT " %02hhx", buf[j]); \
+ printk(KERN_CONT "\n"); \
} \
} while (0)

@@ -214,7 +211,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
add_nops(instr + off, nnops);
local_irq_restore(flags);

- DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+ DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs:", instr, off, i);

return nnops;
}
@@ -303,8 +300,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen);

- DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
- DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+ DUMP_BYTES(instr, a->instrlen, "%px: old_insn:", instr);
+ DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn:", replacement);

memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
@@ -328,7 +325,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;

- DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
+ DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn:", instr);

text_poke_early(instr, insn_buff, insn_buff_sz);

@@ -499,8 +496,8 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(bytes, len);
- DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ DUMP_BYTES(((u8*)addr), len, "%px: orig:", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl:", addr);
text_poke_early(addr, bytes, len);
}
}
--
2.34.1


2022-03-13 19:07:13

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Sat, 2022-03-12 at 08:36 -0800, Joe Perches wrote:
> On Fri, 2022-03-11 at 17:43 +0300, Alexey Dobriyan wrote:
> > Avoid zero length check with clever whitespace placement in the format
> > string.
> []
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> []
> > @@ -66,13 +66,10 @@ do { \
> > if (unlikely(debug_alternative)) { \
> > int j; \
> > \
> > - if (!(len)) \
> > - break; \
> > - \
> > printk(KERN_DEBUG pr_fmt(fmt), ##args); \
> > - for (j = 0; j < (len) - 1; j++) \
> > - printk(KERN_CONT "%02hhx ", buf[j]); \
> > - printk(KERN_CONT "%02hhx\n", buf[j]); \
> > + for (j = 0; j < (len); j++) \
> > + printk(KERN_CONT " %02hhx", buf[j]); \
> > + printk(KERN_CONT "\n"); \
> > } \
>
> This could also use %02x and not %02hhx
>
> And MAX_PATCH_LEN is 255 but is that really possible?
>
> Maybe if the actual patch length is always <= 64 this could use
> printk(KERN_CONT "%*ph\n", (int)len, buf);
> instead and avoid all possible interleaving?

Another possibility would be to raise the arbitrary 64 byte
limit on %*ph to 256.
---
Documentation/core-api/printk-formats.rst | 6 +++---
lib/vsprintf.c | 2 +-
2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 5e89497ba314e..39f787e9b26e1 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -289,9 +289,9 @@ Raw buffer as a hex string
%*phD 00-01-02- ... -3f
%*phN 000102 ... 3f

-For printing small buffers (up to 64 bytes long) as a hex string with a
-certain separator. For larger buffers consider using
-:c:func:`print_hex_dump`.
+For printing small buffers (up to 256 bytes long) as a hex string with a
+certain separator. For buffers larger than 64 bytes consider using
+:c:func:`print_hex_dump` as its output can be more easily counted.

MAC/FDDI addresses
------------------
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 2a6c767cc2709..be6fa9fab1be8 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1194,7 +1194,7 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec,
}

if (spec.field_width > 0)
- len = min_t(int, spec.field_width, 64);
+ len = min_t(int, spec.field_width, 256);

for (i = 0; i < len; ++i) {
if (buf < end)


2022-03-14 08:06:10

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Fri, 2022-03-11 at 17:43 +0300, Alexey Dobriyan wrote:
> Avoid zero length check with clever whitespace placement in the format
> string.
[]
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
[]
> @@ -66,13 +66,10 @@ do { \
> if (unlikely(debug_alternative)) { \
> int j; \
> \
> - if (!(len)) \
> - break; \
> - \
> printk(KERN_DEBUG pr_fmt(fmt), ##args); \
> - for (j = 0; j < (len) - 1; j++) \
> - printk(KERN_CONT "%02hhx ", buf[j]); \
> - printk(KERN_CONT "%02hhx\n", buf[j]); \
> + for (j = 0; j < (len); j++) \
> + printk(KERN_CONT " %02hhx", buf[j]); \
> + printk(KERN_CONT "\n"); \
> } \

This could also use %02x and not %02hhx

And MAX_PATCH_LEN is 255 but is that really possible?

Maybe if the actual patch length is always <= 64 this could use
printk(KERN_CONT "%*ph\n", (int)len, buf);
instead and avoid all possible interleaving?

If so, maybe just remove DUMP_BYTES and use DPRINTK directly.

Perhaps:
---
arch/x86/kernel/alternative.c | 31 ++++++++++---------------------
1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 018b61febf0e7..74fa946093467 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -61,21 +61,6 @@ do { \
printk(KERN_DEBUG pr_fmt(fmt) "\n", ##args); \
} while (0)

-#define DUMP_BYTES(buf, len, fmt, args...) \
-do { \
- if (unlikely(debug_alternative)) { \
- int j; \
- \
- if (!(len)) \
- break; \
- \
- printk(KERN_DEBUG pr_fmt(fmt), ##args); \
- for (j = 0; j < (len) - 1; j++) \
- printk(KERN_CONT "%02hhx ", buf[j]); \
- printk(KERN_CONT "%02hhx\n", buf[j]); \
- } \
-} while (0)
-
static const unsigned char x86nops[] =
{
BYTES_NOP1,
@@ -214,7 +199,8 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
add_nops(instr + off, nnops);
local_irq_restore(flags);

- DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+ DPRINTK("%px: [%d:%d) optimized NOPs: %*ph",
+ instr, off, i, (int)instrlen, instr);

return nnops;
}
@@ -303,8 +289,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
instr, instr, a->instrlen,
replacement, a->replacementlen);

- DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
- DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+ DPRINTK("%px: old_insn: %*ph",
+ instr, (int)a->instrlen, instr);
+ DPRINTK("%px: rpl_insn: %*ph",
+ replacement, (int)a->replacementlen, replacement);

memcpy(insn_buff, replacement, a->replacementlen);
insn_buff_sz = a->replacementlen;
@@ -328,7 +316,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
insn_buff[insn_buff_sz] = 0x90;

- DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
+ DPRINTK("%px: final_insn: %*ph",
+ instr, (int)insn_buff_sz, insn_buff);

text_poke_early(instr, insn_buff, insn_buff_sz);

@@ -499,8 +488,8 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(bytes, len);
- DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
- DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ DPRINTK("%px: orig: %*ph", addr, (int)len, addr);
+ DPRINTK("%px: repl: %*ph", addr, (int)len, bytes);
text_poke_early(addr, bytes, len);
}
}


2022-03-14 20:31:10

by Alexey Dobriyan

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Sat, Mar 12, 2022 at 08:36:11AM -0800, Joe Perches wrote:
> On Fri, 2022-03-11 at 17:43 +0300, Alexey Dobriyan wrote:
> > Avoid zero length check with clever whitespace placement in the format
> > string.
> []
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> []
> > @@ -66,13 +66,10 @@ do { \
> > if (unlikely(debug_alternative)) { \
> > int j; \
> > \
> > - if (!(len)) \
> > - break; \
> > - \
> > printk(KERN_DEBUG pr_fmt(fmt), ##args); \
> > - for (j = 0; j < (len) - 1; j++) \
> > - printk(KERN_CONT "%02hhx ", buf[j]); \
> > - printk(KERN_CONT "%02hhx\n", buf[j]); \
> > + for (j = 0; j < (len); j++) \
> > + printk(KERN_CONT " %02hhx", buf[j]); \
> > + printk(KERN_CONT "\n"); \
> > } \
>
> This could also use %02x and not %02hhx

I doubt as there is funky stuff possible with 255 and such values.
Format specifiers aren't the purpose of the patch anyway.

> And MAX_PATCH_LEN is 255 but is that really possible?

Yes if you try hard enough.

> Maybe if the actual patch length is always <= 64 this could use
> printk(KERN_CONT "%*ph\n", (int)len, buf);
> instead and avoid all possible interleaving?

It is for debugging feature nobody uses (because it works).

2022-03-15 05:38:19

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Sun, 2022-03-13 at 21:09 +0300, Alexey Dobriyan wrote:
> On Sat, Mar 12, 2022 at 08:36:11AM -0800, Joe Perches wrote:
> > On Fri, 2022-03-11 at 17:43 +0300, Alexey Dobriyan wrote:
> > > Avoid zero length check with clever whitespace placement in the format
> > > string.
> > []
> > > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> > []
> > > @@ -66,13 +66,10 @@ do { \
> > > if (unlikely(debug_alternative)) { \
> > > int j; \
> > > \
> > > - if (!(len)) \
> > > - break; \
> > > - \
> > > printk(KERN_DEBUG pr_fmt(fmt), ##args); \
> > > - for (j = 0; j < (len) - 1; j++) \
> > > - printk(KERN_CONT "%02hhx ", buf[j]); \
> > > - printk(KERN_CONT "%02hhx\n", buf[j]); \
> > > + for (j = 0; j < (len); j++) \
> > > + printk(KERN_CONT " %02hhx", buf[j]); \
> > > + printk(KERN_CONT "\n"); \
> > > } \
> >
> > This could also use %02x and not %02hhx
>
> I doubt as there is funky stuff possible with 255 and such values.

'"%02hhx", u8' and '"%02x", u8' have the same output as the
u8 is converted anyway given the integer promotions.

https://lore.kernel.org/lkml/CAHk-=wgoxnmsj8GEVFJSvTwdnWm8wVJthefNk2n6+4TC=20e0Q@mail.gmail.com/

> Format specifiers aren't the purpose of the patch anyway.

IMO: If you are already touching the lines,
you might as well fix it at the same time.

cheers, Joe

2022-04-05 02:50:04

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Fri, Mar 11, 2022 at 05:43:08PM +0300, Alexey Dobriyan wrote:
> Avoid zero length check with clever whitespace placement in the format
> string.
>
> Signed-off-by: Alexey Dobriyan (CloudLinux) <[email protected]>
> ---
> arch/x86/kernel/alternative.c | 21 +++++++++------------
> 1 file changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 5007c3ffe96f..6c9758ee6810 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -66,13 +66,10 @@ do { \
> if (unlikely(debug_alternative)) { \
> int j; \
> \
> - if (!(len)) \
> - break; \
> - \
> printk(KERN_DEBUG pr_fmt(fmt), ##args); \
> - for (j = 0; j < (len) - 1; j++) \
> - printk(KERN_CONT "%02hhx ", buf[j]); \
> - printk(KERN_CONT "%02hhx\n", buf[j]); \
> + for (j = 0; j < (len); j++) \
> + printk(KERN_CONT " %02hhx", buf[j]); \
> + printk(KERN_CONT "\n"); \
> } \
> } while (0)

That doesn't work always.

Before:

SMP alternatives: feat: 9*32+0, old: (current_save_fsgs+0x32/0xa0 (ffffffff81017762) len: 5), repl: (ffffffff89997c78, len: 0)
SMP alternatives: ffffffff81017762: old_insn: eb 32 0f 1f 00
SMP alternatives: ffffffff81017762: final_insn: 90 90 90 90 90
SMP alternatives: ffffffff81017762: [0:5) optimized NOPs: 0f 1f 44 00 00


After:

SMP alternatives: feat: 9*32+0, old: (current_save_fsgs+0x32/0xa0 (ffffffff81017762) len: 5), repl: (ffffffff89997c78, len: 0)
SMP alternatives: ffffffff81017762: old_insn: eb 32 0f 1f 00
SMP alternatives: ffffffff89997c78: rpl_insn: <----- *
SMP alternatives: ffffffff81017762: final_insn: 90 90 90 90 90
SMP alternatives: ffffffff81017762: [0:5) optimized NOPs: 0f 1f 44 00 00

there is no replacement insn in this case:

static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
^^

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-04-06 08:53:18

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [PATCH 1/5] x86/alternative: simplify DUMP_BYTES macro

On Fri, Mar 11 2022 at 17:43, Alexey Dobriyan wrote:
> Avoid zero length check with clever whitespace placement in the format
> string.
>
> Signed-off-by: Alexey Dobriyan (CloudLinux) <[email protected]>
> ---
> arch/x86/kernel/alternative.c | 21 +++++++++------------
> 1 file changed, 9 insertions(+), 12 deletions(-)
>
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 5007c3ffe96f..6c9758ee6810 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -66,13 +66,10 @@ do { \
> if (unlikely(debug_alternative)) { \
> int j; \
> \
> - if (!(len)) \
> - break; \
> - \

How does that clever whitespace placement prevent this being printed in
the len == 0 case, which is a legit case?

> printk(KERN_DEBUG pr_fmt(fmt), ##args); \

This is debug muck. So why does it have to be "optimized"?

Thanks,

tglx