2022-10-26 13:52:08

by Anirudh Rayabharam

[permalink] [raw]
Subject: [PATCH 0/2] Fix MSR access errors during kexec in root partition

Fixes a couple of MSR access errors seen during kexec in root partition
and opportunistically introduces a data structure for the reference TSC
MSR in order to simplify the code that updates that MSR.

Anirudh Rayabharam (2):
x86/hyperv: fix invalid writes to MSRs during root partition kexec
clocksource/drivers/hyperv: add data structure for reference TSC MSR

arch/x86/hyperv/hv_init.c | 11 +++++++----
drivers/clocksource/hyperv_timer.c | 28 ++++++++++++++--------------
include/asm-generic/hyperv-tlfs.h | 9 +++++++++
3 files changed, 30 insertions(+), 18 deletions(-)

--
2.34.1



2022-10-26 14:12:23

by Anirudh Rayabharam

[permalink] [raw]
Subject: [PATCH 1/2] x86/hyperv: fix invalid writes to MSRs during root partition kexec

hv_cleanup resets the hypercall page by setting the MSR to 0. However,
the root partition is not allowed to write to the GPA bits of the MSR.
Instead, it uses the hypercall page provided by the MSR. Similar is the
case with the reference TSC MSR.

Clear only the enable bit instead of zeroing the entire MSR to make
the code valid for root partition too.

Signed-off-by: Anirudh Rayabharam <[email protected]>
---
arch/x86/hyperv/hv_init.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 29774126e931..76ff63d69461 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -537,6 +537,7 @@ void __init hyperv_init(void)
void hyperv_cleanup(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
+ u64 tsc_msr;

unregister_syscore_ops(&hv_syscore_ops);

@@ -552,12 +553,14 @@ void hyperv_cleanup(void)
hv_hypercall_pg = NULL;

/* Reset the hypercall page */
- hypercall_msr.as_uint64 = 0;
+ rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ hypercall_msr.enable = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

/* Reset the TSC page */
- hypercall_msr.as_uint64 = 0;
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
+ tsc_msr &= ~BIT_ULL(0);
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
}

void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
--
2.34.1


2022-10-26 14:34:19

by Anirudh Rayabharam

[permalink] [raw]
Subject: [PATCH 2/2] clocksource/drivers/hyperv: add data structure for reference TSC MSR

Add a data structure to represent the reference TSC MSR similar to
other MSRs. This simplifies the code for updating the MSR.

Signed-off-by: Anirudh Rayabharam <[email protected]>
---
arch/x86/hyperv/hv_init.c | 10 +++++-----
drivers/clocksource/hyperv_timer.c | 28 ++++++++++++++--------------
include/asm-generic/hyperv-tlfs.h | 9 +++++++++
3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 76ff63d69461..78993148d14c 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -537,7 +537,7 @@ void __init hyperv_init(void)
void hyperv_cleanup(void)
{
union hv_x64_msr_hypercall_contents hypercall_msr;
- u64 tsc_msr;
+ union hv_reference_tsc_msr tsc_msr;

unregister_syscore_ops(&hv_syscore_ops);

@@ -557,10 +557,10 @@ void hyperv_cleanup(void)
hypercall_msr.enable = 0;
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);

- /* Reset the TSC page */
- rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
- tsc_msr &= ~BIT_ULL(0);
- wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
+ /* Reset the TSC page. */
+ rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
+ tsc_msr.enable = 0;
+ wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}

void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index bb47610bbd1c..11332c82d1af 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -395,25 +395,25 @@ static u64 notrace read_hv_sched_clock_tsc(void)

static void suspend_hv_clock_tsc(struct clocksource *arg)
{
- u64 tsc_msr;
+ union hv_reference_tsc_msr tsc_msr;

/* Disable the TSC page */
- tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
- tsc_msr &= ~BIT_ULL(0);
- hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
+ tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
+ tsc_msr.enable = 0;
+ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
}


static void resume_hv_clock_tsc(struct clocksource *arg)
{
phys_addr_t phys_addr = virt_to_phys(&tsc_pg);
- u64 tsc_msr;
+ union hv_reference_tsc_msr tsc_msr;

/* Re-enable the TSC page */
- tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
- tsc_msr &= GENMASK_ULL(11, 0);
- tsc_msr |= BIT_ULL(0) | (u64)phys_addr;
- hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
+ tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
+ tsc_msr.enable = 1;
+ tsc_msr.pfn = __phys_to_pfn(phys_addr);
+ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
}

#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
@@ -495,7 +495,7 @@ static __always_inline void hv_setup_sched_clock(void *sched_clock) {}

static bool __init hv_init_tsc_clocksource(void)
{
- u64 tsc_msr;
+ union hv_reference_tsc_msr tsc_msr;
phys_addr_t phys_addr;

if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
@@ -530,10 +530,10 @@ static bool __init hv_init_tsc_clocksource(void)
* (which already has at least the low 12 bits set to zero since
* it is page aligned). Also set the "enable" bit, which is bit 0.
*/
- tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);
- tsc_msr &= GENMASK_ULL(11, 0);
- tsc_msr = tsc_msr | 0x1 | (u64)phys_addr;
- hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
+ tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
+ tsc_msr.enable = 1;
+ tsc_msr.pfn = __phys_to_pfn(phys_addr);
+ hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);

clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);

diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index fdce7a4cfc6f..b17c6eeb9afa 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -102,6 +102,15 @@ struct ms_hyperv_tsc_page {
volatile s64 tsc_offset;
} __packed;

+union hv_reference_tsc_msr {
+ u64 as_uint64;
+ struct {
+ u64 enable:1;
+ u64 reserved:11;
+ u64 pfn:52;
+ } __packed;
+};
+
/*
* The guest OS needs to register the guest ID with the hypervisor.
* The guest ID is a 64 bit entity and the structure of this ID is
--
2.34.1


2022-10-26 15:10:32

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 0/2] Fix MSR access errors during kexec in root partition

From: Anirudh Rayabharam <[email protected]> Sent: Wednesday, October 26, 2022 6:47 AM
>
> Fixes a couple of MSR access errors seen during kexec in root partition
> and opportunistically introduces a data structure for the reference TSC
> MSR in order to simplify the code that updates that MSR.
>
> Anirudh Rayabharam (2):
> x86/hyperv: fix invalid writes to MSRs during root partition kexec
> clocksource/drivers/hyperv: add data structure for reference TSC MSR

Could these two patches be sequenced in the opposite order, to avoid the
need to change the TSC code in hyperv_cleanup() twice? The new
data structure for the Reference TSC MSR and clocksource driver changes
would be in the first patch. Then the second patch would update
hyperv_cleanup() and could use the new data structure.

Michael

>
> arch/x86/hyperv/hv_init.c | 11 +++++++----
> drivers/clocksource/hyperv_timer.c | 28 ++++++++++++++--------------
> include/asm-generic/hyperv-tlfs.h | 9 +++++++++
> 3 files changed, 30 insertions(+), 18 deletions(-)
>
> --
> 2.34.1


2022-10-26 15:48:29

by Michael Kelley (LINUX)

[permalink] [raw]
Subject: RE: [PATCH 1/2] x86/hyperv: fix invalid writes to MSRs during root partition kexec

From: Anirudh Rayabharam <[email protected]> Sent: Wednesday, October 26, 2022 6:47 AM
>
> hv_cleanup resets the hypercall page by setting the MSR to 0. However,
> the root partition is not allowed to write to the GPA bits of the MSR.
> Instead, it uses the hypercall page provided by the MSR. Similar is the
> case with the reference TSC MSR.
>
> Clear only the enable bit instead of zeroing the entire MSR to make
> the code valid for root partition too.

When the enable bit is cleared (but not the PFN) in the MSR, do we know
for sure that Hyper-V removes the overlay page for the PFN? Making sure
that the overlay page is removed is the main reason for clearing the entire
MSR. If we're going to leave the PFN in place and just clear the enable bit,
we need to confirm with the Hyper-V guys that the overlay page will be
removed.

Michael

>
> Signed-off-by: Anirudh Rayabharam <[email protected]>
> ---
> arch/x86/hyperv/hv_init.c | 9 ++++++---
> 1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 29774126e931..76ff63d69461 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -537,6 +537,7 @@ void __init hyperv_init(void)
> void hyperv_cleanup(void)
> {
> union hv_x64_msr_hypercall_contents hypercall_msr;
> + u64 tsc_msr;
>
> unregister_syscore_ops(&hv_syscore_ops);
>
> @@ -552,12 +553,14 @@ void hyperv_cleanup(void)
> hv_hypercall_pg = NULL;
>
> /* Reset the hypercall page */
> - hypercall_msr.as_uint64 = 0;
> + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> + hypercall_msr.enable = 0;
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
>
> /* Reset the TSC page */
> - hypercall_msr.as_uint64 = 0;
> - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
> + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
> + tsc_msr &= ~BIT_ULL(0);
> + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
> }
>
> void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
> --
> 2.34.1


2022-10-27 10:06:18

by Anirudh Rayabharam

[permalink] [raw]
Subject: Re: [PATCH 0/2] Fix MSR access errors during kexec in root partition

On Wed, Oct 26, 2022 at 02:57:58PM +0000, Michael Kelley (LINUX) wrote:
> From: Anirudh Rayabharam <[email protected]> Sent: Wednesday, October 26, 2022 6:47 AM
> >
> > Fixes a couple of MSR access errors seen during kexec in root partition
> > and opportunistically introduces a data structure for the reference TSC
> > MSR in order to simplify the code that updates that MSR.
> >
> > Anirudh Rayabharam (2):
> > x86/hyperv: fix invalid writes to MSRs during root partition kexec
> > clocksource/drivers/hyperv: add data structure for reference TSC MSR
>
> Could these two patches be sequenced in the opposite order, to avoid the
> need to change the TSC code in hyperv_cleanup() twice? The new
> data structure for the Reference TSC MSR and clocksource driver changes
> would be in the first patch. Then the second patch would update
> hyperv_cleanup() and could use the new data structure.

I just sent a new version with the ordering you suggested.

Thanks,
Anirudh.

>
> Michael
>
> >
> > arch/x86/hyperv/hv_init.c | 11 +++++++----
> > drivers/clocksource/hyperv_timer.c | 28 ++++++++++++++--------------
> > include/asm-generic/hyperv-tlfs.h | 9 +++++++++
> > 3 files changed, 30 insertions(+), 18 deletions(-)
> >
> > --
> > 2.34.1

2022-10-27 10:28:15

by Anirudh Rayabharam

[permalink] [raw]
Subject: Re: [PATCH 1/2] x86/hyperv: fix invalid writes to MSRs during root partition kexec

On Wed, Oct 26, 2022 at 02:58:16PM +0000, Michael Kelley (LINUX) wrote:
> From: Anirudh Rayabharam <[email protected]> Sent: Wednesday, October 26, 2022 6:47 AM
> >
> > hv_cleanup resets the hypercall page by setting the MSR to 0. However,
> > the root partition is not allowed to write to the GPA bits of the MSR.
> > Instead, it uses the hypercall page provided by the MSR. Similar is the
> > case with the reference TSC MSR.
> >
> > Clear only the enable bit instead of zeroing the entire MSR to make
> > the code valid for root partition too.
>
> When the enable bit is cleared (but not the PFN) in the MSR, do we know
> for sure that Hyper-V removes the overlay page for the PFN? Making sure
> that the overlay page is removed is the main reason for clearing the entire
> MSR. If we're going to leave the PFN in place and just clear the enable bit,
> we need to confirm with the Hyper-V guys that the overlay page will be
> removed.

I checked the hypervisor code. Just clearing the enable bit does cause
the overlay page to be unmapped by the hypervisor.

Thanks,
Anirudh.

>
> Michael
>
> >
> > Signed-off-by: Anirudh Rayabharam <[email protected]>
> > ---
> > arch/x86/hyperv/hv_init.c | 9 ++++++---
> > 1 file changed, 6 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> > index 29774126e931..76ff63d69461 100644
> > --- a/arch/x86/hyperv/hv_init.c
> > +++ b/arch/x86/hyperv/hv_init.c
> > @@ -537,6 +537,7 @@ void __init hyperv_init(void)
> > void hyperv_cleanup(void)
> > {
> > union hv_x64_msr_hypercall_contents hypercall_msr;
> > + u64 tsc_msr;
> >
> > unregister_syscore_ops(&hv_syscore_ops);
> >
> > @@ -552,12 +553,14 @@ void hyperv_cleanup(void)
> > hv_hypercall_pg = NULL;
> >
> > /* Reset the hypercall page */
> > - hypercall_msr.as_uint64 = 0;
> > + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> > + hypercall_msr.enable = 0;
> > wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> >
> > /* Reset the TSC page */
> > - hypercall_msr.as_uint64 = 0;
> > - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
> > + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
> > + tsc_msr &= ~BIT_ULL(0);
> > + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr);
> > }
> >
> > void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die)
> > --
> > 2.34.1