From: Borislav Petkov <[email protected]>
Previously, we did call an XSAVE/XRSTOR variant through alternatives
and did potential exception handling resulting from the instruction
execution in a second inline asm. Which was misleading and error prone,
see
06c8173eb92b ("x86/fpu/xsaves: Fix improper uses of __ex_table")
for an example.
Add single macros which combine the alternatives and the exception
handling.
While at it, remove the SYSTEM_BOOTING checks in favor of
static_cpu_has_safe() which works regardless of system state.
Cleanup comments.
Signed-off-by: Borislav Petkov <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Quentin Casasnovas <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
---
arch/x86/include/asm/xsave.h | 141 ++++++++++++++++++++++---------------------
1 file changed, 73 insertions(+), 68 deletions(-)
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index c9a6d68b8d62..e6c7986c95df 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -67,6 +67,66 @@ extern int init_fpu(struct task_struct *child);
_ASM_EXTABLE(1b, 3b) \
: [err] "=r" (err)
+#define XSTATE_OP(op, st, lmask, hmask, err) \
+ asm volatile("1:" op "\n\t" \
+ "2:\n\t" \
+ "xor %[err], %[err]\n" \
+ ".pushsection .fixup,\"ax\"\n\t" \
+ "3: movl $-1,%[err]\n\t" \
+ "jmp 2b\n\t" \
+ ".popsection\n\t" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * If XSAVES is enabled, it replaces XSAVEOPT because it supports a compact
+ * format and supervisor states in addition to modified optimization in
+ * XSAVEOPT.
+ *
+ * Otherwise, if XSAVEOPT is enabled, XSAVEOPT replaces XSAVE because XSAVEOPT
+ * supports modified optimization which is not supported by XSAVE.
+ *
+ * We use XSAVE as a fallback.
+ *
+ * 661 and alt_end_marker labels below are defined in ALTERNATIVE* and we're
+ * reusing them here so as not to clutter this macro unnecessarily.
+ */
+#define XSTATE_XSAVE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE_2(XSAVE, \
+ XSAVEOPT, X86_FEATURE_XSAVEOPT, \
+ XSAVES, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "3: movl $-1, %[err]\n" \
+ "jmp " alt_end_marker "b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+/*
+ * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact
+ * XSAVE area format.
+ */
+#define XSTATE_XRESTORE(st, lmask, hmask, err) \
+ asm volatile(ALTERNATIVE(XRSTOR, \
+ XRSTORS, X86_FEATURE_XSAVES) \
+ "\n" \
+ "xor %[err], %[err]\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "3: movl $-1, %[err]\n" \
+ "jmp 663b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(661b, 3b) \
+ : [err] "=r" (err) \
+ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
+ : "memory")
+
+
/*
* This function is called only during boot time when x86 caps are not set
* up and alternative can not be used yet.
@@ -75,22 +135,13 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
+ int err;
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XSAVES, fx, lmask, hmask, err);
else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ XSTATE_OP(XSAVE, fx, lmask, hmask, err);
+
return err;
}
@@ -102,22 +153,12 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
+ int err;
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ if (static_cpu_has_safe(X86_FEATURE_XSAVES))
+ XSTATE_OP(XRSTORS, fx, lmask, hmask, err);
else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
+ XSTATE_OP(XRSTOR, fx, lmask, hmask, err);
return err;
}
@@ -128,31 +169,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
- int err = 0;
+ int err;
- /*
- * If xsaves is enabled, xsaves replaces xsaveopt because
- * it supports compact format and supervisor states in addition to
- * modified optimization in xsaveopt.
- *
- * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
- * because xsaveopt supports modified optimization which is not
- * supported by xsave.
- *
- * If none of xsaves and xsaveopt is enabled, use xsave.
- */
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [fx] "D" (fx), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault
- : "0" (0)
- : "memory");
+ XSTATE_XSAVE(fx, lmask, hmask, err);
return err;
}
@@ -162,25 +181,11 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
*/
static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
{
- int err = 0;
+ int err;
u32 lmask = mask;
u32 hmask = mask >> 32;
- /*
- * Use xrstors to restore context if it is enabled. xrstors supports
- * compacted format of xsave area which is not supported by xrstor.
- */
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault
- : "0" (0)
- : "memory");
+ XSTATE_XRESTORE(fx, lmask, hmask, err);
return err;
}
--
2.3.3
On Thu, Apr 02, 2015 at 03:11:22PM +0200, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
>
> Previously, we did call an XSAVE/XRSTOR variant through alternatives
> and did potential exception handling resulting from the instruction
> execution in a second inline asm. Which was misleading and error prone,
> see
>
> 06c8173eb92b ("x86/fpu/xsaves: Fix improper uses of __ex_table")
>
> for an example.
>
> Add single macros which combine the alternatives and the exception
> handling.
FWIW I think this looks much nicer! I have a couple of comments though,
apologies in advance if they aren't relevant :)
>
> While at it, remove the SYSTEM_BOOTING checks in favor of
> static_cpu_has_safe() which works regardless of system state.
>
I thought the SYSTEM_BOOTING checks were present to make sure we call these
functions only when the alternative instructions had *not* been applied
(i.e. when SYSTEM_BOOTING). We could have added the opposite checks in
xsave_state()/xrstor_state() to make sure the alternative instructions are
applied when these are called (i.e. when !SYSTEM_BOOTING).
In the unlikely event where I'm not wrong about this, having a nicely named
helper altinstr_are_applied() instead of manually checking the system_state
variable would probably help!
But maybe we're pretty confident this will not happen anyway?
> Cleanup comments.
>
> Signed-off-by: Borislav Petkov <[email protected]>
> Cc: "H. Peter Anvin" <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Thomas Gleixner <[email protected]>
> Cc: Quentin Casasnovas <[email protected]>
> Cc: Oleg Nesterov <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> ---
> arch/x86/include/asm/xsave.h | 141 ++++++++++++++++++++++---------------------
> 1 file changed, 73 insertions(+), 68 deletions(-)
>
> diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
> index c9a6d68b8d62..e6c7986c95df 100644
> --- a/arch/x86/include/asm/xsave.h
> +++ b/arch/x86/include/asm/xsave.h
> @@ -67,6 +67,66 @@ extern int init_fpu(struct task_struct *child);
> _ASM_EXTABLE(1b, 3b) \
> : [err] "=r" (err)
>
> +#define XSTATE_OP(op, st, lmask, hmask, err) \
> + asm volatile("1:" op "\n\t" \
> + "2:\n\t" \
> + "xor %[err], %[err]\n" \
Are you not invariably clearing err here? If the instruction fault, we go
to label '3' which does 'err = -1; goto 2', which clears err. Same remark
for XSTATE_XSAVE()/XSTATE_RESTORE().
Probably missing something..
Also, tiny consistency nit, maybe use "\n\t" everywhere?
> + ".pushsection .fixup,\"ax\"\n\t" \
> + "3: movl $-1,%[err]\n\t" \
> + "jmp 2b\n\t" \
> + ".popsection\n\t" \
> + _ASM_EXTABLE(1b, 3b) \
> + : [err] "=r" (err) \
> + : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \
> + : "memory")
> +
I've tried compiling this on top of v4.0-rc5 and I get a compile error
because alt_end_marker isn't defined. Which other patches should I take to
test this?
Quentin
On Thu, Apr 02, 2015 at 05:52:10PM +0200, Quentin Casasnovas wrote:
> FWIW I think this looks much nicer! I have a couple of comments though,
> apologies in advance if they aren't relevant :)
No worries, I very much appreciate the looking at. :)
> I thought the SYSTEM_BOOTING checks were present to make sure we call these
> functions only when the alternative instructions had *not* been applied
> (i.e. when SYSTEM_BOOTING). We could have added the opposite checks in
> xsave_state()/xrstor_state() to make sure the alternative instructions are
> applied when these are called (i.e. when !SYSTEM_BOOTING).
Well, I think this was a clumsy way to say that we shouldn't be using
the _booting() variants when the system isn't booting anymore:
- WARN_ON(system_state != SYSTEM_BOOTING);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
WRT alternatives, the code didn't have any alternatives invocations
there - it is just a cluttered way of saying:
if (CPU has XSAVES support)
XSAVES
else
XSAVE
that's it. With exception handling of course.
> Are you not invariably clearing err here? If the instruction fault, we go
> to label '3' which does 'err = -1; goto 2', which clears err. Same remark
> for XSTATE_XSAVE()/XSTATE_RESTORE().
>
> Probably missing something..
No, you're not. The backwards jump label needs to be after the XOR.
Thanks for catching that.
> Also, tiny consistency nit, maybe use "\n\t" everywhere?
Yeah, didn't want to make the macro more unreadable than it is now. The
"\n\t" things are only for when looking at the .s file and almost no one
does that :-)
> I've tried compiling this on top of v4.0-rc5 and I get a compile error
> because alt_end_marker isn't defined. Which other patches should I take to
> test this?
It needs tip/master.
Let me redo the patch.
Thanks.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, Apr 02, 2015 at 06:12:59PM +0200, Borislav Petkov wrote:
> On Thu, Apr 02, 2015 at 05:52:10PM +0200, Quentin Casasnovas wrote:
> > FWIW I think this looks much nicer! I have a couple of comments though,
> > apologies in advance if they aren't relevant :)
>
> No worries, I very much appreciate the looking at. :)
>
:)
> > I thought the SYSTEM_BOOTING checks were present to make sure we call these
> > functions only when the alternative instructions had *not* been applied
> > (i.e. when SYSTEM_BOOTING). We could have added the opposite checks in
> > xsave_state()/xrstor_state() to make sure the alternative instructions are
> > applied when these are called (i.e. when !SYSTEM_BOOTING).
>
> Well, I think this was a clumsy way to say that we shouldn't be using
> the _booting() variants when the system isn't booting anymore:
>
> - WARN_ON(system_state != SYSTEM_BOOTING);
>
> - if (boot_cpu_has(X86_FEATURE_XSAVES))
> - asm volatile("1:"XSAVES"\n\t"
> - "2:\n\t"
> - xstate_fault
> - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
> - : "memory");
> else
> - asm volatile("1:"XSAVE"\n\t"
> - "2:\n\t"
> - xstate_fault
> - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
> - : "memory");
>
>
> WRT alternatives, the code didn't have any alternatives invocations
> there - it is just a cluttered way of saying:
>
> if (CPU has XSAVES support)
> XSAVES
> else
> XSAVE
So I'm not saying the function is using alternative, just that if the
alternative _are_ applied, then we do not want to use the *_booting()
variants (likely for performance reasons), hence the WARN_ON().
So IMO it does not hurt to keep it here, with maybe renaming it something
like the following so it is obvious why it's there:
/* Use the non _booting() variants if the alternatives are applied. */
WARN_ON(altinstr_are_applied());
I would personnaly add it to the non _booting() variants as well to make
sure the alternative instructions _are_ applied, since otherwise that would
probably cause random failures to restore the xsaveopt/xsaves context
previously saved. Obviously very paranoid check anyway so if you still
want to drop it then fine :)
Quentin
On Thu, Apr 02, 2015 at 06:33:40PM +0200, Quentin Casasnovas wrote:
> I would personnaly add it to the non _booting() variants as well to make
> sure the alternative instructions _are_ applied, since otherwise that would
> probably cause random failures to restore the xsaveopt/xsaves context
> previously saved.
This is probably the only reason why I should keep them: so that there
are no mismatched save and restore uses. But this needs to be cleaned up
properly, maybe the two even merged. It would need more analysis at the
callsites. Definitely future work.
Thanks.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Thu, Apr 02, 2015 at 06:12:59PM +0200, Borislav Petkov wrote:
> On Thu, Apr 02, 2015 at 05:52:10PM +0200, Quentin Casasnovas wrote:
> > I've tried compiling this on top of v4.0-rc5 and I get a compile error
> > because alt_end_marker isn't defined. Which other patches should I take to
> > test this?
>
> It needs tip/master.
>
So I've had a look at tip/master and I don't _think_ commit
4332195 ("x86/alternatives: Add instruction padding")
is correct.
> +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
> +140:
> + \oldinstr
> +141:
> + .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
> + .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
I'm missing how the second .skip directive is supposed to work. For
example, assuming:
sizeof(repl2) = (145f-144f) = 5 and
sizeof(repl1) = (144f-143f) = 4 and
sizeof(oldinsn) = (141b-140b) = 3
it'll do:
=> .skip -(((5) - (4) - (3)) > 0) * ((5) - (4) -(3)), 0x90
=> .skip -((-2) > 0) * (-2), 0x90
=> .skip 0*-2, 0x90 ; we're not skipping anything here are we?
=> .skip 0, 0x90
Whereas AIUI, we were supposed to have skipped another extra byte: the
original instruction is 3 bytes long, and we added one NOP byte in the
first .skip directive because repl1 was 4 bytes, and we're not adding the
remaining one byte needed to apply repl2. Provided this is correct and I'm
not missing something, the ALTERNATIVE_2() macro defined in
asm/alternative.h is suffering from the same problem.
I think we want to substract to sizeof(repl2) the _max_ between
sizeof(repl1) and sizeof(oldinsn), so it would probably look something like
this horrible line:
.skip -(((145f - 144f) - max((144f - 143f), (141b - 140b))) > 0) *
((145f - 144f) - max((144f - 143f), (141b - 140b))), 0x90
And if we can't really use max(a,b) here, we can use something like this
even more horrible line:
.skip -(((145f - 144f) - ((-(((144f - 143f) - (141b - 140b)) > 0) * (144f - 143f)) +
(-(((144f - 143f) - (141b - 140b)) <= 0) * (141b - 143b)))) > 0) *
((145f - 144f) - ((-(((144f - 143f) - (141b - 140b)) > 0) * (144f - 143f)) +
(-(((144f - 143f) - (141b - 140b)) <= 0) * (141b - 143b))))
This is obviously completely un-tested and not even compiled! :)
Now that I think about it though, can we not make use of the .if directive
to make the two .skip directives much easier to parse? That question
applies even more if your original code is correct and I missed something
above, since I'd blame the un-readability to explain my misunderstanding :P
e.g.
#define size_orig_insn (141b - 140b)
#define size_repl1 (144f - 143f)
#define size_repl2 (145f - 144f)
.macro PAD_ALTERNATIVE_1
; Pad with NOP if orig_insn is smaller than repl1
.if (size_repl1 > size_orig_insn)
.skip size_repl1 - size_orig_insn, 0x90
.endif
.endmacro
.macro PAD_ALTERNATIVE_2
PAD_ALTERNATIVE_1
; Pad with NOP if orig_insn + above padding is smaller than repl2
.if ((size_repl2 > size_repl1) || (size_repl2 > size_orig_insn))
.if (size_repl1 > size_orig_insn)
.skip size_repl2 - size_repl1, 0x90
.else
.skip size_repl2 - size_orig_insn, 090
.endif
.endif
.endmacro
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
PAD_ALTERNATIVE_2
...
Again this is un-tested so maybe it's not possible to do it this way. What
do you think?
Quentin
On Fri, Apr 03, 2015 at 04:06:30PM +0200, Quentin Casasnovas wrote:
> On Thu, Apr 02, 2015 at 06:12:59PM +0200, Borislav Petkov wrote:
> > On Thu, Apr 02, 2015 at 05:52:10PM +0200, Quentin Casasnovas wrote:
> > > I've tried compiling this on top of v4.0-rc5 and I get a compile error
> > > because alt_end_marker isn't defined. Which other patches should I take to
> > > test this?
> >
> > It needs tip/master.
> >
>
> So I've had a look at tip/master and I don't _think_ commit
>
> 4332195 ("x86/alternatives: Add instruction padding")
>
> is correct.
>
> [...]
>
> we can use something like this even more horrible line:
Derp! Already spotted a couple of errors below..
>
> .skip -(((145f - 144f) - ((-(((144f - 143f) - (141b - 140b)) > 0) * (144f - 143f)) +
> (-(((144f - 143f) - (141b - 140b)) <= 0) * (141b - 143b)))) > 0) *
^ 140b
> ((145f - 144f) - ((-(((144f - 143f) - (141b - 140b)) > 0) * (144f - 143f)) +
> (-(((144f - 143f) - (141b - 140b)) <= 0) * (141b - 143b))))
^ 140b
>
> This is obviously completely un-tested and not even compiled! :)
>
Told you!
Quentin
On Fri, Apr 03, 2015 at 04:14:26PM +0200, Quentin Casasnovas wrote:
> > This is obviously completely un-tested and not even compiled! :)
> >
>
> Told you!
:-)
So all clear or we need to do more discussing?
Basically, the .skip is supposed to add 0x90 only when the evaluated
expression is true (yeah, gas works with signed 32-bit values so it
evaluates either to 0 or -1, thus the games with "-" in front).
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Fri, Apr 03, 2015 at 05:23:24PM +0200, Borislav Petkov wrote:
> On Fri, Apr 03, 2015 at 04:14:26PM +0200, Quentin Casasnovas wrote:
> > > This is obviously completely un-tested and not even compiled! :)
> > >
> >
> > Told you!
>
> :-)
>
> So all clear or we need to do more discussing?
>
> Basically, the .skip is supposed to add 0x90 only when the evaluated
> expression is true (yeah, gas works with signed 32-bit values so it
> evaluates either to 0 or -1, thus the games with "-" in front).
>
Oh yeah that was clear from the beggings, there was just a typo in my
initial suggestion to fix what I _think_ the problem is (which I've fixed
inlined so you might have missed it).
So yeah I still think we're not properly padding, if you take my earlier
example where repl2 = 5 bytes, repl1 = 4 bytes and orin_insn = 3.
I'll let you re-read my original mail and come back to me to tell me what'd
I really miss! :)
Quentin
On Fri, Apr 03, 2015 at 05:40:55PM +0200, Quentin Casasnovas wrote:
> So yeah I still think we're not properly padding, if you take my earlier
> example where repl2 = 5 bytes, repl1 = 4 bytes and orin_insn = 3.
>
> I'll let you re-read my original mail and come back to me to tell me what'd
> I really miss! :)
Dammit, dammit, dammit!
And I thought this aspect was taken care of. I went into the old
branches where I had done this and there I have:
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ __OLDINSTR(oldinstr, num1) \
+ ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")) > 0) * " \
+ "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")),0x90\n" \
+ alt_end_marker ":\n"
+
without the size of the orig_insn factored in into the padding.
And that would work for your example because it would add 1+1 bytes
padding.
Basically, the idea was:
.skip len(repl1) - len(orig), 0x90
.skip len(repl2) - len(repl1), 0x90
BUT!, for some reason I changed it to what's there now and I can't
remember why anymore.
IOW, this should fix your example but I need to think about it on a
clear head first, to try to remember what was the problem with that
original approach:
---
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 524bddce0b76..708838260f7c 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -50,7 +50,7 @@
\oldinstr
141:
.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
- .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
+ .skip -(((145f-144f)-(144f-143f)) > 0) * ((145f-144f)-(144f-143f)),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 5aef6a97d80e..2b8cc1dd7dbf 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -101,8 +101,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
*/
#define OLDINSTR_2(oldinstr, num1, num2) \
__OLDINSTR(oldinstr, num1) \
- ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
- "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
+ ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")) > 0) * " \
+ "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")),0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
---
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Fri, Apr 03, 2015 at 07:06:25PM +0200, Borislav Petkov wrote:
> On Fri, Apr 03, 2015 at 05:40:55PM +0200, Quentin Casasnovas wrote:
> > So yeah I still think we're not properly padding, if you take my earlier
> > example where repl2 = 5 bytes, repl1 = 4 bytes and orin_insn = 3.
> >
> > I'll let you re-read my original mail and come back to me to tell me what'd
> > I really miss! :)
>
> Dammit, dammit, dammit!
>
> And I thought this aspect was taken care of. I went into the old
> branches where I had done this and there I have:
>
> +#define OLDINSTR_2(oldinstr, num1, num2) \
> + __OLDINSTR(oldinstr, num1) \
> + ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")) > 0) * " \
> + "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")),0x90\n" \
> + alt_end_marker ":\n"
> +
>
> without the size of the orig_insn factored in into the padding.
>
> And that would work for your example because it would add 1+1 bytes
> padding.
>
> Basically, the idea was:
>
> .skip len(repl1) - len(orig), 0x90
> .skip len(repl2) - len(repl1), 0x90
>
> BUT!, for some reason I changed it to what's there now and I can't
> remember why anymore.
I think it would not work in the case where repl1 is smaller or equal than
orig_insn (i.e. no padding in the first .skip) but orig_insn is strictly
smaller than repl2 (since we're never comparing repl2 with insn in this
new-old code).
Anything wrong with the two different approaches I've suggested in my
original mail? One is using a one-liner .skip directive inspired by yours,
and the other is using .if directives. FWIW I think exploding the logic
using conditionnals '.if' is way more readable and less error-prone.
Quentin
On Fri, Apr 03, 2015 at 07:33:06PM +0200, Quentin Casasnovas wrote:
> > Basically, the idea was:
> >
> > .skip len(repl1) - len(orig), 0x90
> > .skip len(repl2) - len(repl1), 0x90
> >
> > BUT!, for some reason I changed it to what's there now and I can't
> > remember why anymore.
>
> I think it would not work in the case where repl1 is smaller or equal than
> orig_insn (i.e. no padding in the first .skip) but orig_insn is strictly
> smaller than repl2 (since we're never comparing repl2 with insn in this
> new-old code).
orig_insn=4
repl1=3
repl2=5
.skip 0, 0x90
.skip 2, 0x90
I think that still works, only the padding is larger than it needs to
be. And it is so many bytes larger as len(abs(repl1 - orig_insn)) is.
In the example above, we'll get two bytes padding while only 1 suffices.
> Anything wrong with the two different approaches I've suggested in my
> original mail?
Right now, I want to have a minimal fix for obvious reasons. We can
always improve stuff later when there's more time.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Fri, Apr 03, 2015 at 07:48:24PM +0200, Borislav Petkov wrote:
> On Fri, Apr 03, 2015 at 07:33:06PM +0200, Quentin Casasnovas wrote:
> > > Basically, the idea was:
> > >
> > > .skip len(repl1) - len(orig), 0x90
> > > .skip len(repl2) - len(repl1), 0x90
> > >
> > > BUT!, for some reason I changed it to what's there now and I can't
> > > remember why anymore.
> >
> > I think it would not work in the case where repl1 is smaller or equal than
> > orig_insn (i.e. no padding in the first .skip) but orig_insn is strictly
> > smaller than repl2 (since we're never comparing repl2 with insn in this
> > new-old code).
>
> orig_insn=4
> repl1=3
> repl2=5
>
> .skip 0, 0x90
> .skip 2, 0x90
>
> I think that still works, only the padding is larger than it needs to
> be. And it is so many bytes larger as len(abs(repl1 - orig_insn)) is.
>
> In the example above, we'll get two bytes padding while only 1 suffices.
>
Right.
> > Anything wrong with the two different approaches I've suggested in my
> > original mail?
>
> Right now, I want to have a minimal fix for obvious reasons. We can
> always improve stuff later when there's more time.
>
If you're happy with the extra padding in such cases then your second
approach looks okay to me. But IMO, even if taking the '.if' directive
approach is certainly bigger LOC-wise, it should be much easier to review
in a rush than some other .skip trickery.
It all depends on your definition of minimal change really, and whether
that extra padding is acceptable or not for you :)
Quentin
On Fri, Apr 03, 2015 at 10:42:17PM +0200, Quentin Casasnovas wrote:
> If you're happy with the extra padding in such cases then your second
> approach looks okay to me. But IMO, even if taking the '.if' directive
> approach is certainly bigger LOC-wise, it should be much easier to review
> in a rush than some other .skip trickery.
.if needs absolute expressions and I can't get it to even compile with the
experiments I've done so far.
How about this instead?
It basically computes the padding length by doing
max(len(repl1), len(repl2)) - len(orig)
and without conditionals. The macros all do string expansion so that the
strings can get parsed by gas. Initial smoke testing in kvm seems to
work, I need to test it on real metal:
---
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 524bddce0b76..44a1fc5439d3 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -45,12 +45,22 @@
.popsection
.endm
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * Shamelessly stolen and adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a,b) (((a) - (((a) - (b)) & (((a) - (b)) >> 15))) & 0xffff)
+
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
- .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
+ .skip -((alt_max_short(new_len1, new_len2) - old_len) > 0) * \
+ (alt_max_short(new_len1, new_len2) - old_len),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 5aef6a97d80e..2c515ebcc767 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -96,13 +96,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
+ * max without conditionals. Shamelessly stolen and adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) "(((" a ") - (((" a ") - (" b ")) & (((" a ") - (" b ")) >> 15))) & 0xffff)"
+
+/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
-#define OLDINSTR_2(oldinstr, num1, num2) \
- __OLDINSTR(oldinstr, num1) \
- ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
- "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5c993c94255e..7c4ad005d7a0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -369,11 +369,11 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+ DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
- replacement, a->replacementlen);
+ replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Sat, Apr 04, 2015 at 09:34:54AM +0200, Borislav Petkov wrote:
> On Fri, Apr 03, 2015 at 10:42:17PM +0200, Quentin Casasnovas wrote:
> > If you're happy with the extra padding in such cases then your second
> > approach looks okay to me. But IMO, even if taking the '.if' directive
> > approach is certainly bigger LOC-wise, it should be much easier to review
> > in a rush than some other .skip trickery.
>
> .if needs absolute expressions and I can't get it to even compile with the
> experiments I've done so far.
>
> How about this instead?
>
> It basically computes the padding length by doing
>
> max(len(repl1), len(repl2)) - len(orig)
>
Nice!
> and without conditionals. The macros all do string expansion so that the
> strings can get parsed by gas. Initial smoke testing in kvm seems to
> work, I need to test it on real metal:
>
> ---
> diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
> index 524bddce0b76..44a1fc5439d3 100644
> --- a/arch/x86/include/asm/alternative-asm.h
> +++ b/arch/x86/include/asm/alternative-asm.h
> @@ -45,12 +45,22 @@
> .popsection
> .endm
>
> +#define old_len 141b-140b
> +#define new_len1 144f-143f
> +#define new_len2 145f-144f
> +
> +/*
> + * Shamelessly stolen and adapted from:
> + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
> + */
> +#define alt_max_short(a,b) (((a) - (((a) - (b)) & (((a) - (b)) >> 15))) & 0xffff)
> +
Since all of these are compile time constants, could we not use the safe
variant on that same page? Not that I'm too worried about the signed right
shift but heh that would be portable and should not impact performance
anyway, so no added value in using the optimized version is there?
> .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
> 140:
> \oldinstr
> 141:
> - .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
> - .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
> + .skip -((alt_max_short(new_len1, new_len2) - old_len) > 0) * \
> + (alt_max_short(new_len1, new_len2) - old_len),0x90
> 142:
>
> .pushsection .altinstructions,"a"
> diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> index 5aef6a97d80e..2c515ebcc767 100644
> --- a/arch/x86/include/asm/alternative.h
> +++ b/arch/x86/include/asm/alternative.h
> @@ -96,13 +96,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
> alt_end_marker ":\n"
>
> /*
> + * max without conditionals. Shamelessly stolen and adapted from:
> + * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
> + */
> +#define alt_max_short(a, b) "(((" a ") - (((" a ") - (" b ")) & (((" a ") - (" b ")) >> 15))) & 0xffff)"
> +
> +/*
> * Pad the second replacement alternative with additional NOPs if it is
> * additionally longer than the first replacement alternative.
> */
> -#define OLDINSTR_2(oldinstr, num1, num2) \
> - __OLDINSTR(oldinstr, num1) \
> - ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
> - "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
> +#define OLDINSTR_2(oldinstr, num1, num2) \
> + "661:\n\t" oldinstr "\n662:\n" \
> + ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
> + "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
> alt_end_marker ":\n"
On the bonus side, you're getting rid of the double 'alt_end_marker' label
in case of an alternative_2()!
Looks good to me and I find it much easier to understand here :)
Thanks,
Quentin
On Sat, Apr 04, 2015 at 10:36:11AM +0200, Quentin Casasnovas wrote:
> Since all of these are compile time constants, could we not use the safe
> variant on that same page? Not that I'm too worried about the signed right
> shift but heh that would be portable and should not impact performance
> anyway, so no added value in using the optimized version is there?
Seems to work with the experimental diff below. I need to do
-(-(x < y))
with the last term though as we're working with s32s.
---
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 44a1fc5439d3..2cb6da2716bf 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -53,14 +53,14 @@
* Shamelessly stolen and adapted from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*/
-#define alt_max_short(a,b) (((a) - (((a) - (b)) & (((a) - (b)) >> 15))) & 0xffff)
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
- .skip -((alt_max_short(new_len1, new_len2) - old_len) > 0) * \
- (alt_max_short(new_len1, new_len2) - old_len),0x90
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 93118fb23976..453b6a05a07e 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -29,6 +29,14 @@ ENTRY(__memset)
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memset_erms", X86_FEATURE_ERMS
+ ALTERNATIVE_2 \
+ ".byte 0xc3, 0xc3, 0xc3", \
+ ".byte 0x66, 0x66, 0x66, 0x90", \
+ X86_FEATURE_ALWAYS, \
+ ".byte 0x66, 0x66, 0x66, 0x66, 0xcc", \
+ X86_FEATURE_ALWAYS
+
+
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
> On the bonus side, you're getting rid of the double 'alt_end_marker' label
> in case of an alternative_2()!
>
> Looks good to me and I find it much easier to understand here :)
Cool. Please give it more critical staring as we're under time pressure
here.
Thanks!
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Sat, Apr 04, 2015 at 11:25:36AM +0200, Borislav Petkov wrote:
> On Sat, Apr 04, 2015 at 10:36:11AM +0200, Quentin Casasnovas wrote:
> > Since all of these are compile time constants, could we not use the safe
> > variant on that same page? Not that I'm too worried about the signed right
> > shift but heh that would be portable and should not impact performance
> > anyway, so no added value in using the optimized version is there?
>
> Seems to work with the experimental diff below. I need to do
>
> -(-(x < y))
>
> with the last term though as we're working with s32s.
>
> ---
> diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
> index 44a1fc5439d3..2cb6da2716bf 100644
> --- a/arch/x86/include/asm/alternative-asm.h
> +++ b/arch/x86/include/asm/alternative-asm.h
> @@ -53,14 +53,14 @@
> * Shamelessly stolen and adapted from:
> * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
> */
> -#define alt_max_short(a,b) (((a) - (((a) - (b)) & (((a) - (b)) >> 15))) & 0xffff)
> +#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
>
So I'm not claiming I've spent the time to fully understand this macro but
it looks like it's doing the right thing on my dummy tests:
http://pastebin.com/DDhtZQgX
Did you also change it in the alternative.h file BTW?
> .macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
> 140:
> \oldinstr
> 141:
> - .skip -((alt_max_short(new_len1, new_len2) - old_len) > 0) * \
> - (alt_max_short(new_len1, new_len2) - old_len),0x90
> + .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
> + (alt_max_short(new_len1, new_len2) - (old_len)),0x90
> 142:
Good catch for the missing parenthesis!
> >
> > Looks good to me and I find it much easier to understand here :)
> >
>
> Cool. Please give it more critical staring as we're under time pressure
> here.
>
So I _think_ it's OK but it would be re-assuring if somebody else could
have a look as well just in case.. :)
Do you have a cleaned up version of the patch you're planning to apply on
top of tip/master instead of just snippets? This way we can hammer it with
different calls to ALTERNATIVE_2 and alternative_2 to check it's good?
I'll have to leave soonish though..
Quentin
On Sat, Apr 04, 2015 at 12:11:55PM +0200, Quentin Casasnovas wrote:
> So I'm not claiming I've spent the time to fully understand this macro but
> it looks like it's doing the right thing on my dummy tests:
>
> http://pastebin.com/DDhtZQgX
Cool, I'll play with it a bit when I get back.
> Did you also change it in the alternative.h file BTW?
Yeah, see below.
> Good catch for the missing parenthesis!
:-)
> So I _think_ it's OK but it would be re-assuring if somebody else could
> have a look as well just in case.. :)
Yeah, once I test it on everything here, I'll send it out for people to
poke holes too. Current version below.
> Do you have a cleaned up version of the patch you're planning to apply on
> top of tip/master instead of just snippets?
See below.
> This way we can hammer it with
> different calls to ALTERNATIVE_2 and alternative_2 to check it's good?
> I'll have to leave soonish though..
Thanks a lot for this, much appreciated!
---
From: Borislav Petkov <[email protected]>
Date: Sat, 4 Apr 2015 10:02:42 +0200
Subject: [PATCH] ALTERNATIVE_2 tentative fix
Signed-off-by: Borislav Petkov <[email protected]>
Reported-by: Quentin Casasnovas <[email protected]>
---
arch/x86/include/asm/alternative-asm.h | 14 ++++++++++++--
arch/x86/include/asm/alternative.h | 14 ++++++++++----
arch/x86/kernel/alternative.c | 4 ++--
3 files changed, 24 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 524bddce0b76..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -45,12 +45,22 @@
.popsection
.endm
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
- .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 5aef6a97d80e..a542c8017a89 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -96,13 +96,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
-#define OLDINSTR_2(oldinstr, num1, num2) \
- __OLDINSTR(oldinstr, num1) \
- ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
- "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5c993c94255e..7c4ad005d7a0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -369,11 +369,11 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+ DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
- replacement, a->replacementlen);
+ replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
--
2.3.3
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Sat, Apr 04, 2015 at 12:29:22PM +0200, Borislav Petkov wrote:
> On Sat, Apr 04, 2015 at 12:11:55PM +0200, Quentin Casasnovas wrote:
> > So I'm not claiming I've spent the time to fully understand this macro but
> > it looks like it's doing the right thing on my dummy tests:
> >
> > http://pastebin.com/DDhtZQgX
>
> Cool, I'll play with it a bit when I get back.
Ok, final version passes on everything here and dumping patching debug
info with "debug-alternative" looks ok. I'm sending it as a reply to
this note. Please poke holes.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
From: Borislav Petkov <[email protected]>
Quentin caught a corner case with the generation of instruction padding
in the ALTERNATIVE_2 macro: if len(orig_insn) < len(alt1) < len(alt2),
then not enough padding gets added and that is not good(tm) as we could
overwrite the beginning of the next instruction.
Luckily, at the time of this writing, we don't have ALTERNATIVE_2()
invocations which have that problem and even if we did, a simple fix
would be to prepend the instructions with enough prefixes so that that
corner case doesn't happen.
However, best it would be if we fixed it properly. See below for a
simple, abstracted example of what we're doing.
So what we ended up doing is, we compute the
max(len(alt1), len(alt2)) - len(orig_insn)
and feed that value to the .skip gas directive. The max() cannot have
conditionals due to gas limitations, thus the fancy integer math.
With this patch, all ALTERNATIVE_2 sites get padded correctly; generating
obscure test cases pass too:
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
#define gen_skip(orig, alt1, alt2, marker) \
.skip -((alt_max_short(alt1, alt2) - (orig)) > 0) * \
(alt_max_short(alt1, alt2) - (orig)),marker
.pushsection .text, "ax"
.globl main
main:
gen_skip(1, 2, 4, 0x09)
gen_skip(4, 1, 2, 0x10)
...
.popsection
Thanks to Quentin for catching it and double-checking the fix!
Signed-off-by: Borislav Petkov <[email protected]>
Reported-by: Quentin Casasnovas <[email protected]>
---
arch/x86/include/asm/alternative-asm.h | 14 ++++++++++++--
arch/x86/include/asm/alternative.h | 16 ++++++++++++----
arch/x86/kernel/alternative.c | 4 ++--
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 524bddce0b76..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -45,12 +45,22 @@
.popsection
.endm
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
- .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 5aef6a97d80e..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -96,13 +96,21 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
-#define OLDINSTR_2(oldinstr, num1, num2) \
- __OLDINSTR(oldinstr, num1) \
- ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
- "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5c993c94255e..7c4ad005d7a0 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -369,11 +369,11 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+ DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
- replacement, a->replacementlen);
+ replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
--
2.3.3
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
On Sat, Apr 04, 2015 at 03:34:43PM +0200, Borislav Petkov wrote:
> From: Borislav Petkov <[email protected]>
>
> Quentin caught a corner case with the generation of instruction padding
> in the ALTERNATIVE_2 macro: if len(orig_insn) < len(alt1) < len(alt2),
> then not enough padding gets added and that is not good(tm) as we could
> overwrite the beginning of the next instruction.
>
FWIW, this patch looks good to me (I have not tested it though).
Thanks!
Quentin
Commit-ID: dbe4058a6a44af4ca5d146aebe01b0a1f9b7fd2a
Gitweb: http://git.kernel.org/tip/dbe4058a6a44af4ca5d146aebe01b0a1f9b7fd2a
Author: Borislav Petkov <[email protected]>
AuthorDate: Sat, 4 Apr 2015 15:34:43 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Sat, 4 Apr 2015 15:58:23 +0200
x86/alternatives: Fix ALTERNATIVE_2 padding generation properly
Quentin caught a corner case with the generation of instruction
padding in the ALTERNATIVE_2 macro: if len(orig_insn) <
len(alt1) < len(alt2), then not enough padding gets added and
that is not good(tm) as we could overwrite the beginning of the
next instruction.
Luckily, at the time of this writing, we don't have
ALTERNATIVE_2() invocations which have that problem and even if
we did, a simple fix would be to prepend the instructions with
enough prefixes so that that corner case doesn't happen.
However, best it would be if we fixed it properly. See below for
a simple, abstracted example of what we're doing.
So what we ended up doing is, we compute the
max(len(alt1), len(alt2)) - len(orig_insn)
and feed that value to the .skip gas directive. The max() cannot
have conditionals due to gas limitations, thus the fancy integer
math.
With this patch, all ALTERNATIVE_2 sites get padded correctly;
generating obscure test cases pass too:
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
#define gen_skip(orig, alt1, alt2, marker) \
.skip -((alt_max_short(alt1, alt2) - (orig)) > 0) * \
(alt_max_short(alt1, alt2) - (orig)),marker
.pushsection .text, "ax"
.globl main
main:
gen_skip(1, 2, 4, 0x09)
gen_skip(4, 1, 2, 0x10)
...
.popsection
Thanks to Quentin for catching it and double-checking the fix!
Reported-by: Quentin Casasnovas <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/alternative-asm.h | 14 ++++++++++++--
arch/x86/include/asm/alternative.h | 16 ++++++++++++----
arch/x86/kernel/alternative.c | 4 ++--
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 524bddc..bdf02ee 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -45,12 +45,22 @@
.popsection
.endm
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
140:
\oldinstr
141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
- .skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
142:
.pushsection .altinstructions,"a"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 5aef6a9..ba32af0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -96,13 +96,21 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
* Pad the second replacement alternative with additional NOPs if it is
* additionally longer than the first replacement alternative.
*/
-#define OLDINSTR_2(oldinstr, num1, num2) \
- __OLDINSTR(oldinstr, num1) \
- ".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
- "((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n" \
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
alt_end_marker ":\n"
#define ALTINSTR_ENTRY(feature, num) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5c993c9..7c4ad00 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -369,11 +369,11 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
continue;
}
- DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+ DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
a->cpuid >> 5,
a->cpuid & 0x1f,
instr, a->instrlen,
- replacement, a->replacementlen);
+ replacement, a->replacementlen, a->padlen);
DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);