The brk area needs to be zeroed initially, like the .bss section.
Juergen Gross (2):
x86/xen: use clear_bss() for Xen PV guests
x86: fix setup of brk area
arch/x86/include/asm/setup.h | 3 +++
arch/x86/kernel/head64.c | 4 +++-
arch/x86/xen/enlighten_pv.c | 8 ++++++--
arch/x86/xen/xen-head.S | 10 +---------
4 files changed, 13 insertions(+), 12 deletions(-)
--
2.35.3
Commit e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
put the brk area into the .bss segment, causing it not to be cleared
initially. As the brk area is used to allocate early page tables, these
might contain garbage in not explicitly written entries.
This is especially a problem for Xen PV guests, as the hypervisor will
validate page tables (check for writable page tables and hypervisor
private bits) before accepting them to be used. There have been reports
of early crashes of PV guests due to illegal page table contents.
Fix that by letting clear_bss() clear the brk area, too.
Fixes: e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/kernel/head64.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index e7e233209a8c..6a3cfaf6b72a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -430,6 +430,8 @@ void __init clear_bss(void)
{
memset(__bss_start, 0,
(unsigned long) __bss_stop - (unsigned long) __bss_start);
+ memset(__brk_base, 0,
+ (unsigned long) __brk_limit - (unsigned long) __brk_base);
}
static unsigned long get_cmd_line_ptr(void)
--
2.35.3
Instead of clearing the bss area in assembly code, use the clear_bss()
function.
This requires to pass the start_info address as parameter to
xen_start_kernel() in order to avoid the xen_start_info being zeroed
again.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/include/asm/setup.h | 3 +++
arch/x86/kernel/head64.c | 2 +-
arch/x86/xen/enlighten_pv.c | 8 ++++++--
arch/x86/xen/xen-head.S | 10 +---------
4 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index f8b9ee97a891..f37cbff7354c 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -120,6 +120,9 @@ void *extend_brk(size_t size, size_t align);
static char __brk_##name[size]
extern void probe_roms(void);
+
+void clear_bss(void);
+
#ifdef __i386__
asmlinkage void __init i386_start_kernel(void);
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bd4a34100ed0..e7e233209a8c 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -426,7 +426,7 @@ void __init do_early_exception(struct pt_regs *regs, int trapnr)
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
-static void __init clear_bss(void)
+void __init clear_bss(void)
{
memset(__bss_start, 0,
(unsigned long) __bss_stop - (unsigned long) __bss_start);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index e3297b15701c..70fb2ea85e90 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1183,15 +1183,19 @@ static void __init xen_domu_set_legacy_features(void)
extern void early_xen_iret_patch(void);
/* First C function to be called on Xen boot */
-asmlinkage __visible void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
int rc;
- if (!xen_start_info)
+ if (!si)
return;
+ clear_bss();
+
+ xen_start_info = si;
+
__text_gen_insn(&early_xen_iret_patch,
JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
JMP32_INSN_SIZE);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 3a2cd93bf059..13af6fe453e3 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -48,15 +48,6 @@ SYM_CODE_START(startup_xen)
ANNOTATE_NOENDBR
cld
- /* Clear .bss */
- xor %eax,%eax
- mov $__bss_start, %rdi
- mov $__bss_stop, %rcx
- sub %rdi, %rcx
- shr $3, %rcx
- rep stosq
-
- mov %rsi, xen_start_info
mov initial_stack(%rip), %rsp
/* Set up %gs.
@@ -71,6 +62,7 @@ SYM_CODE_START(startup_xen)
cdq
wrmsr
+ mov %rsi, %rdi
call xen_start_kernel
SYM_CODE_END(startup_xen)
__FINIT
--
2.35.3
On 22.06.2022 18:10, Juergen Gross wrote:
> Commit e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
> put the brk area into the .bss segment, causing it not to be cleared
> initially.
This reads contradictively: If the area was put in .bss, it would be
cleared. Thing is it is put in .bss..brk in the object files, while
the linker script puts it in .brk (i.e. outside of .bss).
> As the brk area is used to allocate early page tables, these
> might contain garbage in not explicitly written entries.
I'm surprised this lack of zero-initialization didn't cause any issue
outside of PV Xen. Unless of course there never was the intention for
users of the facility to assume blank pages coming from there, in
which case Xen's use for early page tables would have been wrong (in
not explicitly zeroing the space first).
Jan
On 22.06.2022 18:10, Juergen Gross wrote:
> Instead of clearing the bss area in assembly code, use the clear_bss()
> function.
>
> This requires to pass the start_info address as parameter to
> xen_start_kernel() in order to avoid the xen_start_info being zeroed
> again.
>
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
On 23.06.22 10:09, Jan Beulich wrote:
> On 22.06.2022 18:10, Juergen Gross wrote:
>> Commit e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
>> put the brk area into the .bss segment, causing it not to be cleared
>> initially.
>
> This reads contradictively: If the area was put in .bss, it would be
> cleared. Thing is it is put in .bss..brk in the object files, while
> the linker script puts it in .brk (i.e. outside of .bss).
Hmm, yes, this should be reworded.
>
>> As the brk area is used to allocate early page tables, these
>> might contain garbage in not explicitly written entries.
>
> I'm surprised this lack of zero-initialization didn't cause any issue
> outside of PV Xen. Unless of course there never was the intention for
> users of the facility to assume blank pages coming from there, in
> which case Xen's use for early page tables would have been wrong (in
> not explicitly zeroing the space first).
Fun fact: Its not Xen's use for early page tables, but the kernel's
init code. It is used for bare metal, too.
The use case for initial page tables is the problematic one. Only the
needed page table entries are written by the kernel, so the other ones
keep their initial garbage values. As normally no uninitialized entries
are ever referenced, this will have no real impact.
With Xen, however, ALL entries are being validated, which led to the
early crash of dom0.
Juergen
On 23.06.2022 10:14, Juergen Gross wrote:
> On 23.06.22 10:09, Jan Beulich wrote:
>> On 22.06.2022 18:10, Juergen Gross wrote:
>>> Commit e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
>>> put the brk area into the .bss segment, causing it not to be cleared
>>> initially.
>>
>> This reads contradictively: If the area was put in .bss, it would be
>> cleared. Thing is it is put in .bss..brk in the object files, while
>> the linker script puts it in .brk (i.e. outside of .bss).
>
> Hmm, yes, this should be reworded.
>
>>
>>> As the brk area is used to allocate early page tables, these
>>> might contain garbage in not explicitly written entries.
>>
>> I'm surprised this lack of zero-initialization didn't cause any issue
>> outside of PV Xen. Unless of course there never was the intention for
>> users of the facility to assume blank pages coming from there, in
>> which case Xen's use for early page tables would have been wrong (in
>> not explicitly zeroing the space first).
>
> Fun fact: Its not Xen's use for early page tables, but the kernel's
> init code. It is used for bare metal, too.
>
> The use case for initial page tables is the problematic one. Only the
> needed page table entries are written by the kernel, so the other ones
> keep their initial garbage values. As normally no uninitialized entries
> are ever referenced, this will have no real impact.
Are you sure there couldn't surface user-mode accessible page table
entries pointing at random pages?
Jan
On 23.06.22 10:50, Jan Beulich wrote:
> On 23.06.2022 10:14, Juergen Gross wrote:
>> On 23.06.22 10:09, Jan Beulich wrote:
>>> On 22.06.2022 18:10, Juergen Gross wrote:
>>>> Commit e32683c6f7d2 ("x86/mm: Fix RESERVE_BRK() for older binutils")
>>>> put the brk area into the .bss segment, causing it not to be cleared
>>>> initially.
>>>
>>> This reads contradictively: If the area was put in .bss, it would be
>>> cleared. Thing is it is put in .bss..brk in the object files, while
>>> the linker script puts it in .brk (i.e. outside of .bss).
>>
>> Hmm, yes, this should be reworded.
>>
>>>
>>>> As the brk area is used to allocate early page tables, these
>>>> might contain garbage in not explicitly written entries.
>>>
>>> I'm surprised this lack of zero-initialization didn't cause any issue
>>> outside of PV Xen. Unless of course there never was the intention for
>>> users of the facility to assume blank pages coming from there, in
>>> which case Xen's use for early page tables would have been wrong (in
>>> not explicitly zeroing the space first).
>>
>> Fun fact: Its not Xen's use for early page tables, but the kernel's
>> init code. It is used for bare metal, too.
>>
>> The use case for initial page tables is the problematic one. Only the
>> needed page table entries are written by the kernel, so the other ones
>> keep their initial garbage values. As normally no uninitialized entries
>> are ever referenced, this will have no real impact.
>
> Are you sure there couldn't surface user-mode accessible page table
> entries pointing at random pages?
No, I'm not sure this can't happen.
Juergen