2020-04-02 08:19:26

by Alexandre Chartre

[permalink] [raw]
Subject: [PATCH 4/7] objtool: Add support for return trampoline call

With retpoline, the return instruction is used to branch to an address
stored on the stack. So, unlike a regular return instruction, when a
retpoline return instruction is reached the stack has been modified
compared to what we have when the function was entered.

Provide the mechanism to explicitly call-out such return instruction
so that objtool can correctly handle them.

Signed-off-by: Alexandre Chartre <[email protected]>
---
tools/objtool/check.c | 78 +++++++++++++++++++++++++++++++++++++++++--
tools/objtool/check.h | 1 +
2 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 0cec91291d46..ed8e3ea1d8da 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1344,6 +1344,48 @@ static int read_intra_function_call(struct objtool_file *file)
return 0;
}

+static int read_retpoline_ret(struct objtool_file *file)
+{
+ struct section *sec;
+ struct instruction *insn;
+ struct rela *rela;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.retpoline_ret");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s",
+ sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.retpoline_ret entry");
+ return -1;
+ }
+
+ if (insn->type != INSN_RETURN) {
+ WARN_FUNC("retpoline_ret not a return",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ insn->retpoline_ret = true;
+ /*
+ * For the impact on the stack, make a return trampoline
+ * behaves like a pop of the return address.
+ */
+ insn->stack_op.src.type = OP_SRC_POP;
+ insn->stack_op.dest.type = OP_DEST_REG;
+ insn->stack_op.dest.reg = CFI_RA;
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1403,6 +1445,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;

+ ret = read_retpoline_ret(file);
+ if (ret)
+ return ret;
+
ret = add_call_destinations(file);
if (ret)
return ret;
@@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction *insn)
return false;
}

-static bool has_modified_stack_frame(struct insn_state *state)
+static bool has_modified_stack_frame(struct insn_state *state,
+ bool check_registers)
{
int i;

@@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct insn_state *state)
state->drap)
return true;

+ if (!check_registers)
+ return false;
+
for (i = 0; i < CFI_NUM_REGS; i++)
if (state->regs[i].base != initial_func_cfi.regs[i].base ||
state->regs[i].offset != initial_func_cfi.regs[i].offset)
@@ -1987,7 +2037,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)

static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
{
- if (has_modified_stack_frame(state)) {
+ if (has_modified_stack_frame(state, true)) {
WARN_FUNC("sibling call from callable instruction with modified stack frame",
insn->sec, insn->offset);
return 1;
@@ -2009,6 +2059,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
struct alternative *alt;
struct instruction *insn, *next_insn;
struct section *sec;
+ bool check_registers;
u8 visited;
int ret;

@@ -2130,7 +2181,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 1;
}

- if (func && has_modified_stack_frame(&state)) {
+ /*
+ * With retpoline, the return instruction is used
+ * to branch to an address stored on the stack.
+ * So when we reach the ret instruction, the stack
+ * frame has been modified with the address to
+ * branch to and we need update the stack state.
+ *
+ * The retpoline address to branch to is typically
+ * pushed on the stack from a register, but this
+ * confuses the logic which checks callee saved
+ * registers. So we don't check if registers have
+ * been modified if we have a return trampoline.
+ */
+ if (insn->retpoline_ret) {
+ update_insn_state(insn, &state);
+ check_registers = false;
+ } else {
+ check_registers = true;
+ }
+
+ if (func && has_modified_stack_frame(&state,
+ check_registers)) {
WARN_FUNC("return with modified stack frame",
sec, insn->offset);
return 1;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 2bd6d2f46baa..5ecd16ad71a8 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -37,6 +37,7 @@ struct instruction {
bool dead_end, ignore, hint, save, restore, ignore_alts;
bool intra_function_call;
bool retpoline_safe;
+ bool retpoline_ret;
u8 visited;
struct symbol *call_dest;
struct instruction *jump_dest;
--
2.18.2


2020-04-02 13:51:10

by Julien Thierry

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

Hi Alexandre,

On 4/2/20 9:22 AM, Alexandre Chartre wrote:
> With retpoline, the return instruction is used to branch to an address
> stored on the stack. So, unlike a regular return instruction, when a
> retpoline return instruction is reached the stack has been modified
> compared to what we have when the function was entered.
>
> Provide the mechanism to explicitly call-out such return instruction
> so that objtool can correctly handle them.
>
> Signed-off-by: Alexandre Chartre <[email protected]>
> ---
> tools/objtool/check.c | 78 +++++++++++++++++++++++++++++++++++++++++--
> tools/objtool/check.h | 1 +
> 2 files changed, 76 insertions(+), 3 deletions(-)
>
> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
> index 0cec91291d46..ed8e3ea1d8da 100644
> --- a/tools/objtool/check.c
> +++ b/tools/objtool/check.c
> @@ -1344,6 +1344,48 @@ static int read_intra_function_call(struct objtool_file *file)
> return 0;
> }
>
> +static int read_retpoline_ret(struct objtool_file *file)
> +{
> + struct section *sec;
> + struct instruction *insn;
> + struct rela *rela;
> +
> + sec = find_section_by_name(file->elf, ".rela.discard.retpoline_ret");
> + if (!sec)
> + return 0;
> +
> + list_for_each_entry(rela, &sec->rela_list, list) {
> + if (rela->sym->type != STT_SECTION) {
> + WARN("unexpected relocation symbol type in %s",
> + sec->name);
> + return -1;
> + }
> +
> + insn = find_insn(file, rela->sym->sec, rela->addend);
> + if (!insn) {
> + WARN("bad .discard.retpoline_ret entry");
> + return -1;
> + }
> +
> + if (insn->type != INSN_RETURN) {
> + WARN_FUNC("retpoline_ret not a return",
> + insn->sec, insn->offset);
> + return -1;
> + }
> +
> + insn->retpoline_ret = true;
> + /*
> + * For the impact on the stack, make a return trampoline
> + * behaves like a pop of the return address.
> + */
> + insn->stack_op.src.type = OP_SRC_POP;
> + insn->stack_op.dest.type = OP_DEST_REG;
> + insn->stack_op.dest.reg = CFI_RA;
> + }
> +
> + return 0;
> +}
> +
> static void mark_rodata(struct objtool_file *file)
> {
> struct section *sec;
> @@ -1403,6 +1445,10 @@ static int decode_sections(struct objtool_file *file)
> if (ret)
> return ret;
>
> + ret = read_retpoline_ret(file);
> + if (ret)
> + return ret;
> +
> ret = add_call_destinations(file);
> if (ret)
> return ret;
> @@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction *insn)
> return false;
> }
>
> -static bool has_modified_stack_frame(struct insn_state *state)
> +static bool has_modified_stack_frame(struct insn_state *state,
> + bool check_registers)
> {
> int i;
>
> @@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct insn_state *state)
> state->drap)
> return true;
>
> + if (!check_registers)
> + return false;
> +
> for (i = 0; i < CFI_NUM_REGS; i++)
> if (state->regs[i].base != initial_func_cfi.regs[i].base ||
> state->regs[i].offset != initial_func_cfi.regs[i].offset)
> @@ -1987,7 +2037,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
>
> static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
> {
> - if (has_modified_stack_frame(state)) {
> + if (has_modified_stack_frame(state, true)) {
> WARN_FUNC("sibling call from callable instruction with modified stack frame",
> insn->sec, insn->offset);
> return 1;
> @@ -2009,6 +2059,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
> struct alternative *alt;
> struct instruction *insn, *next_insn;
> struct section *sec;
> + bool check_registers;
> u8 visited;
> int ret;
>
> @@ -2130,7 +2181,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
> return 1;
> }
>
> - if (func && has_modified_stack_frame(&state)) {
> + /*
> + * With retpoline, the return instruction is used
> + * to branch to an address stored on the stack.
> + * So when we reach the ret instruction, the stack
> + * frame has been modified with the address to
> + * branch to and we need update the stack state.
> + *
> + * The retpoline address to branch to is typically
> + * pushed on the stack from a register, but this
> + * confuses the logic which checks callee saved
> + * registers. So we don't check if registers have
> + * been modified if we have a return trampoline.

I think there are two different things to consider here.

First, the update of the stack frame which I believe should be done when
returning from intra_function_calls, as it undoes what the call
instruction did (push more stuff on the stack in the case of x86).

This might mean that intra_function_call should be part of the state (as
intra_function_calls pass a modified state to validate_branch() ).

Second is supporting retpoline_ret which is just accepting that the
return address in the stack frame has changed.

> + */
> + if (insn->retpoline_ret) {
> + update_insn_state(insn, &state);
> + check_registers = false;
> + } else {
> + check_registers = true;
> + }
> +
> + if (func && has_modified_stack_frame(&state,
> + check_registers)) {
> WARN_FUNC("return with modified stack frame",
> sec, insn->offset);
> return 1;
> diff --git a/tools/objtool/check.h b/tools/objtool/check.h
> index 2bd6d2f46baa..5ecd16ad71a8 100644
> --- a/tools/objtool/check.h
> +++ b/tools/objtool/check.h
> @@ -37,6 +37,7 @@ struct instruction {
> bool dead_end, ignore, hint, save, restore, ignore_alts;
> bool intra_function_call;
> bool retpoline_safe;
> + bool retpoline_ret;
> u8 visited;
> struct symbol *call_dest;
> struct instruction *jump_dest;
>

Cheers,

--
Julien Thierry

2020-04-02 14:44:13

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call


On 4/2/20 3:26 PM, Julien Thierry wrote:
> Hi Alexandre,
>
> On 4/2/20 9:22 AM, Alexandre Chartre wrote:
>> With retpoline, the return instruction is used to branch to an address
>> stored on the stack. So, unlike a regular return instruction, when a
>> retpoline return instruction is reached the stack has been modified
>> compared to what we have when the function was entered.
>>
>> Provide the mechanism to explicitly call-out such return instruction
>> so that objtool can correctly handle them.
>>
>> Signed-off-by: Alexandre Chartre <[email protected]>
>> ---
>> ? tools/objtool/check.c | 78 +++++++++++++++++++++++++++++++++++++++++--
>> ? tools/objtool/check.h |? 1 +
>> ? 2 files changed, 76 insertions(+), 3 deletions(-)
>>
>> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
>> index 0cec91291d46..ed8e3ea1d8da 100644
>> --- a/tools/objtool/check.c
>> +++ b/tools/objtool/check.c
>> @@ -1344,6 +1344,48 @@ static int read_intra_function_call(struct objtool_file *file)
>> ????? return 0;
>> ? }
>> +static int read_retpoline_ret(struct objtool_file *file)
>> +{
>> +??? struct section *sec;
>> +??? struct instruction *insn;
>> +??? struct rela *rela;
>> +
>> +??? sec = find_section_by_name(file->elf, ".rela.discard.retpoline_ret");
>> +??? if (!sec)
>> +??????? return 0;
>> +
>> +??? list_for_each_entry(rela, &sec->rela_list, list) {
>> +??????? if (rela->sym->type != STT_SECTION) {
>> +??????????? WARN("unexpected relocation symbol type in %s",
>> +???????????????? sec->name);
>> +??????????? return -1;
>> +??????? }
>> +
>> +??????? insn = find_insn(file, rela->sym->sec, rela->addend);
>> +??????? if (!insn) {
>> +??????????? WARN("bad .discard.retpoline_ret entry");
>> +??????????? return -1;
>> +??????? }
>> +
>> +??????? if (insn->type != INSN_RETURN) {
>> +??????????? WARN_FUNC("retpoline_ret not a return",
>> +????????????????? insn->sec, insn->offset);
>> +??????????? return -1;
>> +??????? }
>> +
>> +??????? insn->retpoline_ret = true;
>> +??????? /*
>> +???????? * For the impact on the stack, make a return trampoline
>> +???????? * behaves like a pop of the return address.
>> +???????? */
>> +??????? insn->stack_op.src.type = OP_SRC_POP;
>> +??????? insn->stack_op.dest.type = OP_DEST_REG;
>> +??????? insn->stack_op.dest.reg = CFI_RA;
>> +??? }
>> +
>> +??? return 0;
>> +}
>> +
>> ? static void mark_rodata(struct objtool_file *file)
>> ? {
>> ????? struct section *sec;
>> @@ -1403,6 +1445,10 @@ static int decode_sections(struct objtool_file *file)
>> ????? if (ret)
>> ????????? return ret;
>> +??? ret = read_retpoline_ret(file);
>> +??? if (ret)
>> +??????? return ret;
>> +
>> ????? ret = add_call_destinations(file);
>> ????? if (ret)
>> ????????? return ret;
>> @@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction *insn)
>> ????? return false;
>> ? }
>> -static bool has_modified_stack_frame(struct insn_state *state)
>> +static bool has_modified_stack_frame(struct insn_state *state,
>> +???????????????????? bool check_registers)
>> ? {
>> ????? int i;
>> @@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct insn_state *state)
>> ????????? state->drap)
>> ????????? return true;
>> +??? if (!check_registers)
>> +??????? return false;
>> +
>> ????? for (i = 0; i < CFI_NUM_REGS; i++)
>> ????????? if (state->regs[i].base != initial_func_cfi.regs[i].base ||
>> ????????????? state->regs[i].offset != initial_func_cfi.regs[i].offset)
>> @@ -1987,7 +2037,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
>> ? static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
>> ? {
>> -??? if (has_modified_stack_frame(state)) {
>> +??? if (has_modified_stack_frame(state, true)) {
>> ????????? WARN_FUNC("sibling call from callable instruction with modified stack frame",
>> ????????????????? insn->sec, insn->offset);
>> ????????? return 1;
>> @@ -2009,6 +2059,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
>> ????? struct alternative *alt;
>> ????? struct instruction *insn, *next_insn;
>> ????? struct section *sec;
>> +??? bool check_registers;
>> ????? u8 visited;
>> ????? int ret;
>> @@ -2130,7 +2181,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
>> ????????????????? return 1;
>> ????????????? }
>> -??????????? if (func && has_modified_stack_frame(&state)) {
>> +??????????? /*
>> +???????????? * With retpoline, the return instruction is used
>> +???????????? * to branch to an address stored on the stack.
>> +???????????? * So when we reach the ret instruction, the stack
>> +???????????? * frame has been modified with the address to
>> +???????????? * branch to and we need update the stack state.
>> +???????????? *
>> +???????????? * The retpoline address to branch to is typically
>> +???????????? * pushed on the stack from a register, but this
>> +???????????? * confuses the logic which checks callee saved
>> +???????????? * registers. So we don't check if registers have
>> +???????????? * been modified if we have a return trampoline.
>
> I think there are two different things to consider here.
>
> First, the update of the stack frame which I believe should be done
> when returning from intra_function_calls, as it undoes what the call
> instruction did (push more stuff on the stack in the case of x86).

The problem is that an intra-function call is not necessarily going
to return. With retpoline (or RSB stuffing) intra-function calls are
basically fake calls only present to fill the RSB buffer. Such calls
won't return, the stack pointer is just adjusted to cancel the impact
of these calls on the stack.

> This might mean that intra_function_call should be part of the state
> (as intra_function_calls pass a modified state to validate_branch()).
>
> Second is supporting retpoline_ret which is just accepting that the
> return address in the stack frame has changed.
With retpoline_ret, the stack just has an extra address we are going to
jump to (this will be like an indirect jump). If we remove that extra
address from the stack, we should have the regular stack we have at
the end of a function. This is precisely what the code is doing here.

alex.

>> +???????????? */
>> +??????????? if (insn->retpoline_ret) {
>> +??????????????? update_insn_state(insn, &state);
>> +??????????????? check_registers = false;
>> +??????????? } else {
>> +??????????????? check_registers = true;
>> +??????????? }
>> +
>> +??????????? if (func && has_modified_stack_frame(&state,
>> +???????????????????????????????? check_registers)) {
>> ????????????????? WARN_FUNC("return with modified stack frame",
>> ??????????????????????? sec, insn->offset);
>> ????????????????? return 1;
>> diff --git a/tools/objtool/check.h b/tools/objtool/check.h
>> index 2bd6d2f46baa..5ecd16ad71a8 100644
>> --- a/tools/objtool/check.h
>> +++ b/tools/objtool/check.h
>> @@ -37,6 +37,7 @@ struct instruction {
>> ????? bool dead_end, ignore, hint, save, restore, ignore_alts;
>> ????? bool intra_function_call;
>> ????? bool retpoline_safe;
>> +??? bool retpoline_ret;
>> ????? u8 visited;
>> ????? struct symbol *call_dest;
>> ????? struct instruction *jump_dest;
>>
>
> Cheers,
>

2020-04-02 15:32:29

by Julien Thierry

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call



On 4/2/20 3:46 PM, Alexandre Chartre wrote:
>
> On 4/2/20 3:26 PM, Julien Thierry wrote:
>> Hi Alexandre,
>>
>> On 4/2/20 9:22 AM, Alexandre Chartre wrote:
>>> With retpoline, the return instruction is used to branch to an address
>>> stored on the stack. So, unlike a regular return instruction, when a
>>> retpoline return instruction is reached the stack has been modified
>>> compared to what we have when the function was entered.
>>>
>>> Provide the mechanism to explicitly call-out such return instruction
>>> so that objtool can correctly handle them.
>>>
>>> Signed-off-by: Alexandre Chartre <[email protected]>
>>> ---
>>> ? tools/objtool/check.c | 78 +++++++++++++++++++++++++++++++++++++++++--
>>> ? tools/objtool/check.h |? 1 +
>>> ? 2 files changed, 76 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/tools/objtool/check.c b/tools/objtool/check.c
>>> index 0cec91291d46..ed8e3ea1d8da 100644
>>> --- a/tools/objtool/check.c
>>> +++ b/tools/objtool/check.c
>>> @@ -1344,6 +1344,48 @@ static int read_intra_function_call(struct
>>> objtool_file *file)
>>> ????? return 0;
>>> ? }
>>> +static int read_retpoline_ret(struct objtool_file *file)
>>> +{
>>> +??? struct section *sec;
>>> +??? struct instruction *insn;
>>> +??? struct rela *rela;
>>> +
>>> +??? sec = find_section_by_name(file->elf,
>>> ".rela.discard.retpoline_ret");
>>> +??? if (!sec)
>>> +??????? return 0;
>>> +
>>> +??? list_for_each_entry(rela, &sec->rela_list, list) {
>>> +??????? if (rela->sym->type != STT_SECTION) {
>>> +??????????? WARN("unexpected relocation symbol type in %s",
>>> +???????????????? sec->name);
>>> +??????????? return -1;
>>> +??????? }
>>> +
>>> +??????? insn = find_insn(file, rela->sym->sec, rela->addend);
>>> +??????? if (!insn) {
>>> +??????????? WARN("bad .discard.retpoline_ret entry");
>>> +??????????? return -1;
>>> +??????? }
>>> +
>>> +??????? if (insn->type != INSN_RETURN) {
>>> +??????????? WARN_FUNC("retpoline_ret not a return",
>>> +????????????????? insn->sec, insn->offset);
>>> +??????????? return -1;
>>> +??????? }
>>> +
>>> +??????? insn->retpoline_ret = true;
>>> +??????? /*
>>> +???????? * For the impact on the stack, make a return trampoline
>>> +???????? * behaves like a pop of the return address.
>>> +???????? */
>>> +??????? insn->stack_op.src.type = OP_SRC_POP;
>>> +??????? insn->stack_op.dest.type = OP_DEST_REG;
>>> +??????? insn->stack_op.dest.reg = CFI_RA;
>>> +??? }
>>> +
>>> +??? return 0;
>>> +}
>>> +
>>> ? static void mark_rodata(struct objtool_file *file)
>>> ? {
>>> ????? struct section *sec;
>>> @@ -1403,6 +1445,10 @@ static int decode_sections(struct objtool_file
>>> *file)
>>> ????? if (ret)
>>> ????????? return ret;
>>> +??? ret = read_retpoline_ret(file);
>>> +??? if (ret)
>>> +??????? return ret;
>>> +
>>> ????? ret = add_call_destinations(file);
>>> ????? if (ret)
>>> ????????? return ret;
>>> @@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction
>>> *insn)
>>> ????? return false;
>>> ? }
>>> -static bool has_modified_stack_frame(struct insn_state *state)
>>> +static bool has_modified_stack_frame(struct insn_state *state,
>>> +???????????????????? bool check_registers)
>>> ? {
>>> ????? int i;
>>> @@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct
>>> insn_state *state)
>>> ????????? state->drap)
>>> ????????? return true;
>>> +??? if (!check_registers)
>>> +??????? return false;
>>> +
>>> ????? for (i = 0; i < CFI_NUM_REGS; i++)
>>> ????????? if (state->regs[i].base != initial_func_cfi.regs[i].base ||
>>> ????????????? state->regs[i].offset != initial_func_cfi.regs[i].offset)
>>> @@ -1987,7 +2037,7 @@ static int validate_call(struct instruction
>>> *insn, struct insn_state *state)
>>> ? static int validate_sibling_call(struct instruction *insn, struct
>>> insn_state *state)
>>> ? {
>>> -??? if (has_modified_stack_frame(state)) {
>>> +??? if (has_modified_stack_frame(state, true)) {
>>> ????????? WARN_FUNC("sibling call from callable instruction with
>>> modified stack frame",
>>> ????????????????? insn->sec, insn->offset);
>>> ????????? return 1;
>>> @@ -2009,6 +2059,7 @@ static int validate_branch(struct objtool_file
>>> *file, struct symbol *func,
>>> ????? struct alternative *alt;
>>> ????? struct instruction *insn, *next_insn;
>>> ????? struct section *sec;
>>> +??? bool check_registers;
>>> ????? u8 visited;
>>> ????? int ret;
>>> @@ -2130,7 +2181,28 @@ static int validate_branch(struct objtool_file
>>> *file, struct symbol *func,
>>> ????????????????? return 1;
>>> ????????????? }
>>> -??????????? if (func && has_modified_stack_frame(&state)) {
>>> +??????????? /*
>>> +???????????? * With retpoline, the return instruction is used
>>> +???????????? * to branch to an address stored on the stack.
>>> +???????????? * So when we reach the ret instruction, the stack
>>> +???????????? * frame has been modified with the address to
>>> +???????????? * branch to and we need update the stack state.
>>> +???????????? *
>>> +???????????? * The retpoline address to branch to is typically
>>> +???????????? * pushed on the stack from a register, but this
>>> +???????????? * confuses the logic which checks callee saved
>>> +???????????? * registers. So we don't check if registers have
>>> +???????????? * been modified if we have a return trampoline.
>>
>> I think there are two different things to consider here.
>>
>> First, the update of the stack frame which I believe should be done
>> when returning from intra_function_calls, as it undoes what the call
>> instruction did (push more stuff on the stack in the case of x86).
>
> The problem is that an intra-function call is not necessarily going
> to return. With retpoline (or RSB stuffing) intra-function calls are
> basically fake calls only present to fill the RSB buffer. Such calls
> won't return, the stack pointer is just adjusted to cancel the impact
> of these calls on the stack.
>

Right, but running into an intra-function call will start a validate
branch with a modified stack frame.

So, starting from an intra-function call, if we run into a return, I
guess objtool will complain about a return with modified stack frame.

My understanding is that once you find an intra-function call, either
you hit a return, ending the branch, so the return should undo the
modification the intra-function call did (whether is it a retpoline
return or not). Otherwise, the intra-function call branch will need to
reach an end in some way (e.g. hitting a CONTEXT_SWITCH instruction,
calling a dead_end_function).

Am I missing something?

--
Julien Thierry

2020-04-02 15:41:52

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Thu, Apr 02, 2020 at 04:31:05PM +0100, Julien Thierry wrote:
> My understanding is that once you find an intra-function call, either you
> hit a return, ending the branch, so the return should undo the modification
> the intra-function call did (whether is it a retpoline return or not).
> Otherwise, the intra-function call branch will need to reach an end in some
> way (e.g. hitting a CONTEXT_SWITCH instruction, calling a
> dead_end_function).
>
> Am I missing something?

The thing is basically doing:

mov $n, cx
1: call 2f
2: dec cx
jnz 1b
add 8*n, sp

So it does N calls to self, then subtracts N words from the stack.

The reason being that the CPU has a return-stack-buffer for predicting
returns, and call/ret being naturally paired, that works. The above
is a software flush of the RSB.

2020-04-02 16:41:30

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Thu, Apr 02, 2020 at 10:22:17AM +0200, Alexandre Chartre wrote:
> With retpoline, the return instruction is used to branch to an address
> stored on the stack. So, unlike a regular return instruction, when a
> retpoline return instruction is reached the stack has been modified
> compared to what we have when the function was entered.
>
> Provide the mechanism to explicitly call-out such return instruction
> so that objtool can correctly handle them.

https://lkml.kernel.org/r/[email protected]

And also, the split out version:

https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=core/objtool&id=ec9d9549901dfd2ff411676dfc624e50219e4d5a

2020-04-03 07:16:45

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call


On 4/2/20 5:27 PM, Peter Zijlstra wrote:
> On Thu, Apr 02, 2020 at 10:22:17AM +0200, Alexandre Chartre wrote:
>> With retpoline, the return instruction is used to branch to an address
>> stored on the stack. So, unlike a regular return instruction, when a
>> retpoline return instruction is reached the stack has been modified
>> compared to what we have when the function was entered.
>>
>> Provide the mechanism to explicitly call-out such return instruction
>> so that objtool can correctly handle them.
>
> https://lkml.kernel.org/r/[email protected]
>
> And also, the split out version:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=core/objtool&id=ec9d9549901dfd2ff411676dfc624e50219e4d5a
>

Okay, so I can get rid of my retpoline_ret annotation and use UNWIND_HINT_RET_OFFSET
instead (which is more generic). Basically I can change my RETPOLINE_RET macro to:

.macro RETPOLINE_RET
UNWIND_HINT_RET_OFFSET
ret
.endm

Thanks,

alex.

2020-04-03 08:30:03

by Julien Thierry

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call



On 4/2/20 4:40 PM, Peter Zijlstra wrote:
> On Thu, Apr 02, 2020 at 04:31:05PM +0100, Julien Thierry wrote:
>> My understanding is that once you find an intra-function call, either you
>> hit a return, ending the branch, so the return should undo the modification
>> the intra-function call did (whether is it a retpoline return or not).
>> Otherwise, the intra-function call branch will need to reach an end in some
>> way (e.g. hitting a CONTEXT_SWITCH instruction, calling a
>> dead_end_function).
>>
>> Am I missing something?
>
> The thing is basically doing:
>
> mov $n, cx
> 1: call 2f
> 2: dec cx
> jnz 1b
> add 8*n, sp
>
> So it does N calls to self, then subtracts N words from the stack.
>
> The reason being that the CPU has a return-stack-buffer for predicting
> returns, and call/ret being naturally paired, that works. The above
> is a software flush of the RSB.
>

Ah, lovely... Maybe that's where SAVE/RESTORE unwind hints could be nice
;) .

Otherwise, I don't really have a good suggestion for this...

--
Julien Thierry

2020-04-03 15:18:55

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 09:11:55AM +0100, Julien Thierry wrote:
>
>
> On 4/2/20 4:40 PM, Peter Zijlstra wrote:
> > On Thu, Apr 02, 2020 at 04:31:05PM +0100, Julien Thierry wrote:
> > > My understanding is that once you find an intra-function call, either you
> > > hit a return, ending the branch, so the return should undo the modification
> > > the intra-function call did (whether is it a retpoline return or not).
> > > Otherwise, the intra-function call branch will need to reach an end in some
> > > way (e.g. hitting a CONTEXT_SWITCH instruction, calling a
> > > dead_end_function).
> > >
> > > Am I missing something?
> >
> > The thing is basically doing:
> >
> > mov $n, cx
> > 1: call 2f
> > 2: dec cx
> > jnz 1b
> > add 8*n, sp
> >
> > So it does N calls to self, then subtracts N words from the stack.
> >
> > The reason being that the CPU has a return-stack-buffer for predicting
> > returns, and call/ret being naturally paired, that works. The above
> > is a software flush of the RSB.
> >
>
> Ah, lovely... Maybe that's where SAVE/RESTORE unwind hints could be nice ;)
> .
>
> Otherwise, I don't really have a good suggestion for this...

Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
work here?

--
Josh

2020-04-03 15:24:12

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> On Fri, Apr 03, 2020 at 09:11:55AM +0100, Julien Thierry wrote:
> >
> >
> > On 4/2/20 4:40 PM, Peter Zijlstra wrote:
> > > On Thu, Apr 02, 2020 at 04:31:05PM +0100, Julien Thierry wrote:
> > > > My understanding is that once you find an intra-function call, either you
> > > > hit a return, ending the branch, so the return should undo the modification
> > > > the intra-function call did (whether is it a retpoline return or not).
> > > > Otherwise, the intra-function call branch will need to reach an end in some
> > > > way (e.g. hitting a CONTEXT_SWITCH instruction, calling a
> > > > dead_end_function).
> > > >
> > > > Am I missing something?
> > >
> > > The thing is basically doing:
> > >
> > > mov $n, cx
> > > 1: call 2f
> > > 2: dec cx
> > > jnz 1b
> > > add 8*n, sp
> > >
> > > So it does N calls to self, then subtracts N words from the stack.
> > >
> > > The reason being that the CPU has a return-stack-buffer for predicting
> > > returns, and call/ret being naturally paired, that works. The above
> > > is a software flush of the RSB.
> > >
> >
> > Ah, lovely... Maybe that's where SAVE/RESTORE unwind hints could be nice ;)
> > .
> >
> > Otherwise, I don't really have a good suggestion for this...
>
> Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> work here?

And if we're going to need that hint anyway, maybe we could get rid of
the nasty arch_exception_frame_size for the IRET thing and just use a
hint there after all ;-)

--
Josh

2020-04-03 15:33:05

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 10:22:14AM -0500, Josh Poimboeuf wrote:
> On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > On Fri, Apr 03, 2020 at 09:11:55AM +0100, Julien Thierry wrote:
> > >
> > >
> > > On 4/2/20 4:40 PM, Peter Zijlstra wrote:
> > > > On Thu, Apr 02, 2020 at 04:31:05PM +0100, Julien Thierry wrote:
> > > > > My understanding is that once you find an intra-function call, either you
> > > > > hit a return, ending the branch, so the return should undo the modification
> > > > > the intra-function call did (whether is it a retpoline return or not).
> > > > > Otherwise, the intra-function call branch will need to reach an end in some
> > > > > way (e.g. hitting a CONTEXT_SWITCH instruction, calling a
> > > > > dead_end_function).
> > > > >
> > > > > Am I missing something?
> > > >
> > > > The thing is basically doing:
> > > >
> > > > mov $n, cx
> > > > 1: call 2f
> > > > 2: dec cx
> > > > jnz 1b
> > > > add 8*n, sp
> > > >
> > > > So it does N calls to self, then subtracts N words from the stack.
> > > >
> > > > The reason being that the CPU has a return-stack-buffer for predicting
> > > > returns, and call/ret being naturally paired, that works. The above
> > > > is a software flush of the RSB.
> > > >
> > >
> > > Ah, lovely... Maybe that's where SAVE/RESTORE unwind hints could be nice ;)
> > > .
> > >
> > > Otherwise, I don't really have a good suggestion for this...
> >
> > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > work here?
>
> And if we're going to need that hint anyway, maybe we could get rid of
> the nasty arch_exception_frame_size for the IRET thing and just use a
> hint there after all ;-)

Actually, never mind -- I guess it wouldn't work because of inconsistent
stack states and all that...

--
Josh

2020-04-03 15:49:15

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> work here?

Yes, it would.

2020-04-03 15:57:23

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > work here?
>
> Yes, it would.

Only thing is we'd need to unroll the RSB loop so each instruction has
a deterministic stack size.

--
Josh

2020-04-04 13:34:50

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > work here?
>
> Yes, it would.

Sorry, I have reconsidered. While it will shut up objtool, it will not
'work'. That is, the ORC data generated will not correctly unwind.

I'll try and write a longer email tonight.

2020-04-04 14:23:39

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
> On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> > On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > > work here?
> >
> > Yes, it would.
>
> Sorry, I have reconsidered. While it will shut up objtool, it will not
> 'work'. That is, the ORC data generated will not correctly unwind.
>
> I'll try and write a longer email tonight.

Right, that's what I've been trying to say. The ORC data will be
non-deterministic unless we unroll the loop. Or did you mean something
else?

--
Josh

2020-04-04 15:52:36

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Sat, Apr 04, 2020 at 09:22:32AM -0500, Josh Poimboeuf wrote:
> On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
> > On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> > > On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > > > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > > > work here?
> > >
> > > Yes, it would.
> >
> > Sorry, I have reconsidered. While it will shut up objtool, it will not
> > 'work'. That is, the ORC data generated will not correctly unwind.
> >
> > I'll try and write a longer email tonight.
>
> Right, that's what I've been trying to say. The ORC data will be
> non-deterministic unless we unroll the loop. Or did you mean something
> else?

The below should result in deterministic code.

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 07e95dcb40ad..109ee65f4a11 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -59,8 +59,8 @@
jmp 775b; \
774: \
dec reg; \
- jnz 771b; \
- add $(BITS_PER_LONG/8) * nr, sp;
+ add $(BITS_PER_LONG/8) * $2, sp; \
+ jnz 771b;

#ifdef __ASSEMBLY__

2020-04-06 08:22:02

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call


On 4/4/20 5:51 PM, Peter Zijlstra wrote:
> On Sat, Apr 04, 2020 at 09:22:32AM -0500, Josh Poimboeuf wrote:
>> On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
>>> On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
>>>> On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
>>>>> Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
>>>>> work here?
>>>>
>>>> Yes, it would.
>>>
>>> Sorry, I have reconsidered. While it will shut up objtool, it will not
>>> 'work'. That is, the ORC data generated will not correctly unwind.
>>>
>>> I'll try and write a longer email tonight.
>>
>> Right, that's what I've been trying to say. The ORC data will be
>> non-deterministic unless we unroll the loop. Or did you mean something
>> else?
>
> The below should result in deterministic code.
>
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 07e95dcb40ad..109ee65f4a11 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -59,8 +59,8 @@
> jmp 775b; \
> 774: \
> dec reg; \
> - jnz 771b; \
> - add $(BITS_PER_LONG/8) * nr, sp;
> + add $(BITS_PER_LONG/8) * $2, sp; \
> + jnz 771b;
>
> #ifdef __ASSEMBLY__

Nice. This works fine and allows to remove ANNOTATE_NOSPEC_ALTERNATIVE when
using __FILL_RETURN_BUFFER. However this is probably less performant because
we now have nr/2 add instructions instead of just 1.

Here is a variant where I unroll half of the loop. This way we have 2
add+dec+jnz instruction instructions instead of nr/2 dec+jnz and 1 add
instruction.

#define __FILL_RETURN_BUFFER(reg, nr, sp) \
mov $1, reg; \
771: \
.rept (nr/2); \
call 772f; \
773: /* speculation trap */ \
pause; \
lfence; \
jmp 773b; \
772: ; \
.endr; \
add $(BITS_PER_LONG/8) * (nr/2), sp; \
dec reg; \
jnz 771b;


Note that we can't unroll the entire loop: it won't work with nr=32 because
the code is then too large to fit into an alternative (the alternative size
is encoded on only one byte so this allows a maximum size of 255, while with
nr=32 the size is around 390).

alex.

2020-04-06 09:42:14

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Mon, Apr 06, 2020 at 10:19:56AM +0200, Alexandre Chartre wrote:
>
> On 4/4/20 5:51 PM, Peter Zijlstra wrote:
> > On Sat, Apr 04, 2020 at 09:22:32AM -0500, Josh Poimboeuf wrote:
> > > On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
> > > > On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> > > > > On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > > > > > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > > > > > work here?
> > > > >
> > > > > Yes, it would.
> > > >
> > > > Sorry, I have reconsidered. While it will shut up objtool, it will not
> > > > 'work'. That is, the ORC data generated will not correctly unwind.
> > > >
> > > > I'll try and write a longer email tonight.
> > >
> > > Right, that's what I've been trying to say. The ORC data will be
> > > non-deterministic unless we unroll the loop. Or did you mean something
> > > else?
> >
> > The below should result in deterministic code.
> >
> > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> > index 07e95dcb40ad..109ee65f4a11 100644
> > --- a/arch/x86/include/asm/nospec-branch.h
> > +++ b/arch/x86/include/asm/nospec-branch.h
> > @@ -59,8 +59,8 @@
> > jmp 775b; \
> > 774: \
> > dec reg; \
> > - jnz 771b; \
> > - add $(BITS_PER_LONG/8) * nr, sp;
> > + add $(BITS_PER_LONG/8) * $2, sp; \
> > + jnz 771b;
> > #ifdef __ASSEMBLY__
>
> Nice. This works fine and allows to remove ANNOTATE_NOSPEC_ALTERNATIVE when
> using __FILL_RETURN_BUFFER. However this is probably less performant because
> we now have nr/2 add instructions instead of just 1.

Does it actually matter though? That is, can you measure the difference?

2020-04-06 11:00:02

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call



On 4/6/20 11:31 AM, Peter Zijlstra wrote:
> On Mon, Apr 06, 2020 at 10:19:56AM +0200, Alexandre Chartre wrote:
>>
>> On 4/4/20 5:51 PM, Peter Zijlstra wrote:
>>> On Sat, Apr 04, 2020 at 09:22:32AM -0500, Josh Poimboeuf wrote:
>>>> On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
>>>>> On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
>>>>>> On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
>>>>>>> Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
>>>>>>> work here?
>>>>>>
>>>>>> Yes, it would.
>>>>>
>>>>> Sorry, I have reconsidered. While it will shut up objtool, it will not
>>>>> 'work'. That is, the ORC data generated will not correctly unwind.
>>>>>
>>>>> I'll try and write a longer email tonight.
>>>>
>>>> Right, that's what I've been trying to say. The ORC data will be
>>>> non-deterministic unless we unroll the loop. Or did you mean something
>>>> else?
>>>
>>> The below should result in deterministic code.
>>>
>>> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
>>> index 07e95dcb40ad..109ee65f4a11 100644
>>> --- a/arch/x86/include/asm/nospec-branch.h
>>> +++ b/arch/x86/include/asm/nospec-branch.h
>>> @@ -59,8 +59,8 @@
>>> jmp 775b; \
>>> 774: \
>>> dec reg; \
>>> - jnz 771b; \
>>> - add $(BITS_PER_LONG/8) * nr, sp;
>>> + add $(BITS_PER_LONG/8) * $2, sp; \
>>> + jnz 771b;
>>> #ifdef __ASSEMBLY__
>>
>> Nice. This works fine and allows to remove ANNOTATE_NOSPEC_ALTERNATIVE when
>> using __FILL_RETURN_BUFFER. However this is probably less performant because
>> we now have nr/2 add instructions instead of just 1.
>
> Does it actually matter though? That is, can you measure the difference?
>

I didn't do any measurement, I am just anticipating concerns others might have
as this code is used during context and vmexit. But I agree this might not be
significant.

alex.

2020-04-06 14:17:21

by Josh Poimboeuf

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call

On Sat, Apr 04, 2020 at 05:51:26PM +0200, Peter Zijlstra wrote:
> On Sat, Apr 04, 2020 at 09:22:32AM -0500, Josh Poimboeuf wrote:
> > On Sat, Apr 04, 2020 at 03:32:18PM +0200, Peter Zijlstra wrote:
> > > On Fri, Apr 03, 2020 at 05:46:20PM +0200, Peter Zijlstra wrote:
> > > > On Fri, Apr 03, 2020 at 10:17:57AM -0500, Josh Poimboeuf wrote:
> > > > > Peter, I think my previous idea for UNWIND_HINT_ADJUST stack_add=8 would
> > > > > work here?
> > > >
> > > > Yes, it would.
> > >
> > > Sorry, I have reconsidered. While it will shut up objtool, it will not
> > > 'work'. That is, the ORC data generated will not correctly unwind.
> > >
> > > I'll try and write a longer email tonight.
> >
> > Right, that's what I've been trying to say. The ORC data will be
> > non-deterministic unless we unroll the loop. Or did you mean something
> > else?
>
> The below should result in deterministic code.
>
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 07e95dcb40ad..109ee65f4a11 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -59,8 +59,8 @@
> jmp 775b; \
> 774: \
> dec reg; \
> - jnz 771b; \
> - add $(BITS_PER_LONG/8) * nr, sp;
> + add $(BITS_PER_LONG/8) * $2, sp; \
> + jnz 771b;
>
> #ifdef __ASSEMBLY__

That should work, though we should make sure it doesn't break the RSB
clearing -- I'm pretty sure it wouldn't, but you never know...

--
Josh

2020-04-06 14:34:31

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call


On 4/2/20 5:27 PM, Peter Zijlstra wrote:
> On Thu, Apr 02, 2020 at 10:22:17AM +0200, Alexandre Chartre wrote:
>> With retpoline, the return instruction is used to branch to an address
>> stored on the stack. So, unlike a regular return instruction, when a
>> retpoline return instruction is reached the stack has been modified
>> compared to what we have when the function was entered.
>>
>> Provide the mechanism to explicitly call-out such return instruction
>> so that objtool can correctly handle them.
>
> https://lkml.kernel.org/r/[email protected]
>
> And also, the split out version:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=core/objtool&id=ec9d9549901dfd2ff411676dfc624e50219e4d5a
>

HINT_RET_OFFSET works fine when an immediate value is pushed on the
stack. However if the value is pushed from a callee-saved register
(%rbp, %rbx, %r12-%r15) then we still have a "return with modified
stack frame" warning. That's because objtool checks callee-saved
registers pushed/popped on the stack, and we have retpoline functions
built for each register (see arch/x86/lib/retpoline.S)

So that's why I also added a bool to has_modified_stack_frame() to
no check registers:

@@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction *insn)
return false;
}

-static bool has_modified_stack_frame(struct insn_state *state)
+static bool has_modified_stack_frame(struct insn_state *state,
+ bool check_registers)
{
int i;

@@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct insn_state *state)
state->drap)
return true;

+ if (!check_registers)
+ return false;
+
for (i = 0; i < CFI_NUM_REGS; i++)
if (state->regs[i].base != initial_func_cfi.regs[i].base ||
state->regs[i].offset != initial_func_cfi.regs[i].offset)


alex.

2020-04-06 14:52:59

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [PATCH 4/7] objtool: Add support for return trampoline call



On 4/6/20 4:34 PM, Alexandre Chartre wrote:
>
> On 4/2/20 5:27 PM, Peter Zijlstra wrote:
>> On Thu, Apr 02, 2020 at 10:22:17AM +0200, Alexandre Chartre wrote:
>>> With retpoline, the return instruction is used to branch to an address
>>> stored on the stack. So, unlike a regular return instruction, when a
>>> retpoline return instruction is reached the stack has been modified
>>> compared to what we have when the function was entered.
>>>
>>> Provide the mechanism to explicitly call-out such return instruction
>>> so that objtool can correctly handle them.
>>
>> https://lkml.kernel.org/r/[email protected]
>>
>> And also, the split out version:
>>
>>    https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/commit/?h=core/objtool&id=ec9d9549901dfd2ff411676dfc624e50219e4d5a
>>
>
> HINT_RET_OFFSET works fine when an immediate value is pushed on the
> stack. However if the value is pushed from a callee-saved register
> (%rbp, %rbx, %r12-%r15) then we still have a "return with modified
> stack frame" warning. That's because objtool checks callee-saved
> registers pushed/popped on the stack, and we have retpoline functions
> built for each register (see arch/x86/lib/retpoline.S)
>
> So that's why I also added a bool to has_modified_stack_frame() to
> no check registers:
>
> @@ -1432,7 +1478,8 @@ static bool is_fentry_call(struct instruction *insn)
>      return false;
>  }
>
> -static bool has_modified_stack_frame(struct insn_state *state)
> +static bool has_modified_stack_frame(struct insn_state *state,
> +                     bool check_registers)
>  {
>      int i;
>
> @@ -1442,6 +1489,9 @@ static bool has_modified_stack_frame(struct insn_state *state)
>          state->drap)
>          return true;
>
> +    if (!check_registers)
> +        return false;
> +
>      for (i = 0; i < CFI_NUM_REGS; i++)
>          if (state->regs[i].base != initial_func_cfi.regs[i].base ||
>              state->regs[i].offset != initial_func_cfi.regs[i].offset)
>
>

Here is a simple change on top of the UNWIND_HINT_RET_OFFSET patch to prevent
this problem:

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index dbb2b2187037..97db8f49e06f 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1390,6 +1390,14 @@ static bool has_modified_stack_frame(struct instruction *insn,
if (state->stack_size != initial_func_cfi.cfa.offset + ret_offset)
return true;

+ /*
+ * If there is a ret offset hint then don't check registers
+ * because a callee-saved register might have been pushed on
+ * the stack.
+ */
+ if (ret_offset)
+ return false;
+
for (i = 0; i < CFI_NUM_REGS; i++) {
if (state->regs[i].base != initial_func_cfi.regs[i].base ||
state->regs[i].offset != initial_func_cfi.regs[i].offset)


alex.