2022-12-01 03:02:44

by Alexey Kardashevskiy

[permalink] [raw]
Subject: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
KVM needs to store these before switching to a guest; the DebugSwitch
hardware support restores them as type B swap.

This stores MSR values from set_dr_addr_mask() in percpu values and
returns them via new get_dr_addr_mask(). The gain here is about 10x.

Signed-off-by: Alexey Kardashevskiy <[email protected]>
---
arch/x86/include/asm/debugreg.h | 1 +
arch/x86/kernel/cpu/amd.c | 32 ++++++++++++++++++++
2 files changed, 33 insertions(+)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index cfdf307ddc01..c4324d0205b5 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)

#ifdef CONFIG_CPU_SUP_AMD
extern void set_dr_addr_mask(unsigned long mask, int dr);
+extern unsigned long get_dr_addr_mask(int dr);
#else
static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c75d75b9f11a..ec7efcef4e14 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
return false;
}

+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
+
void set_dr_addr_mask(unsigned long mask, int dr)
{
if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
switch (dr) {
case 0:
wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
+ per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
break;
case 1:
+ wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+ per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
+ break;
case 2:
+ wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+ per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
+ break;
case 3:
wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+ per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
break;
default:
break;
}
}

+unsigned long get_dr_addr_mask(int dr)
+{
+ if (!boot_cpu_has(X86_FEATURE_BPEXT))
+ return 0;
+
+ switch (dr) {
+ case 0:
+ return per_cpu(dr0_addr_mask, smp_processor_id());
+ case 1:
+ return per_cpu(dr1_addr_mask, smp_processor_id());
+ case 2:
+ return per_cpu(dr2_addr_mask, smp_processor_id());
+ case 3:
+ return per_cpu(dr3_addr_mask, smp_processor_id());
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_dr_addr_mask);
+
u32 amd_get_highest_perf(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
--
2.38.1


2022-12-01 17:09:40

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
> KVM needs to store these before switching to a guest; the DebugSwitch
> hardware support restores them as type B swap.
>
> This stores MSR values from set_dr_addr_mask() in percpu values and
> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>
> Signed-off-by: Alexey Kardashevskiy <[email protected]>
> ---
> arch/x86/include/asm/debugreg.h | 1 +
> arch/x86/kernel/cpu/amd.c | 32 ++++++++++++++++++++
> 2 files changed, 33 insertions(+)
>
> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> index cfdf307ddc01..c4324d0205b5 100644
> --- a/arch/x86/include/asm/debugreg.h
> +++ b/arch/x86/include/asm/debugreg.h
> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>
> #ifdef CONFIG_CPU_SUP_AMD
> extern void set_dr_addr_mask(unsigned long mask, int dr);
> +extern unsigned long get_dr_addr_mask(int dr);
> #else
> static inline void set_dr_addr_mask(unsigned long mask, int dr) { }

KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub. Or we need to add
a dependency.

> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index c75d75b9f11a..ec7efcef4e14 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
> return false;
> }
>
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
> +
> void set_dr_addr_mask(unsigned long mask, int dr)
> {
> if (!boot_cpu_has(X86_FEATURE_BPEXT))
> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
> switch (dr) {
> case 0:
> wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);

LOL, I'd love to hear how MSR_F16H_DR0_ADDR_MASK ended up with a completely
different MSR index.

> + per_cpu(dr0_addr_mask, smp_processor_id()) = mask;

Use an array to avoid the copy+paste? And if you're going to add a cache, might
as well use it to avoid unnecessary writes.

> break;
> case 1:

2022-12-06 07:56:47

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables



On 2/12/22 03:58, Sean Christopherson wrote:
> On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
>> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
>> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
>> KVM needs to store these before switching to a guest; the DebugSwitch
>> hardware support restores them as type B swap.
>>
>> This stores MSR values from set_dr_addr_mask() in percpu values and
>> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>>
>> Signed-off-by: Alexey Kardashevskiy <[email protected]>
>> ---
>> arch/x86/include/asm/debugreg.h | 1 +
>> arch/x86/kernel/cpu/amd.c | 32 ++++++++++++++++++++
>> 2 files changed, 33 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>> index cfdf307ddc01..c4324d0205b5 100644
>> --- a/arch/x86/include/asm/debugreg.h
>> +++ b/arch/x86/include/asm/debugreg.h
>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>
>> #ifdef CONFIG_CPU_SUP_AMD
>> extern void set_dr_addr_mask(unsigned long mask, int dr);
>> +extern unsigned long get_dr_addr_mask(int dr);
>> #else
>> static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>
> KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub. Or we need to add
> a dependency.

The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user
arch/x86/kernel/cpu/amd.c:

arch/x86/kernel/cpu/Makefile:
obj-$(CONFIG_CPU_SUP_AMD) += amd.o


Is this enough dependency or we need something else? (if enough, I'll
put it into the next commit log).


>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index c75d75b9f11a..ec7efcef4e14 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>> return false;
>> }
>>
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
>> +
>> void set_dr_addr_mask(unsigned long mask, int dr)
>> {
>> if (!boot_cpu_has(X86_FEATURE_BPEXT))
>> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>> switch (dr) {
>> case 0:
>> wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
>
> LOL, I'd love to hear how MSR_F16H_DR0_ADDR_MASK ended up with a completely
> different MSR index.
>> + per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
>
> Use an array to avoid the copy+paste? And if you're going to add a cache, might
> as well use it to avoid unnecessary writes.

I'll do this, I did not realize DEFINE_PER_CPU_READ_MOSTLY can do
arrays. Thanks,

>
>> break;
>> case 1:

--
Alexey

2022-12-06 17:11:59

by Sean Christopherson

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

On Tue, Dec 06, 2022, Alexey Kardashevskiy wrote:
>
> On 2/12/22 03:58, Sean Christopherson wrote:
> > On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
> > > diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> > > index cfdf307ddc01..c4324d0205b5 100644
> > > --- a/arch/x86/include/asm/debugreg.h
> > > +++ b/arch/x86/include/asm/debugreg.h
> > > @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
> > > #ifdef CONFIG_CPU_SUP_AMD
> > > extern void set_dr_addr_mask(unsigned long mask, int dr);
> > > +extern unsigned long get_dr_addr_mask(int dr);
> > > #else
> > > static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
> >
> > KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub. Or we need to add
> > a dependency.
>
> The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user
> arch/x86/kernel/cpu/amd.c:
>
> arch/x86/kernel/cpu/Makefile:
> obj-$(CONFIG_CPU_SUP_AMD) += amd.o
>
>
> Is this enough dependency or we need something else? (if enough, I'll put it
> into the next commit log).

No, it's actually the opposite, the issue is precisely that the symbol is buried
under CPU_SUP_AMD. KVM_AMD doesn't currently depend on CPU_SUP_AMD, and so the
usage in sev_es_prepare_switch_to_guest() fails to compile if CPU_SUP_AMD=n and
KVM_AMD={Y,M}.

config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM

I actually just submitted a patch[*] to "fix" that since KVM requires the CPU vendor
to be AMD or Hygon at runtime. Although that patch is buried in the middle of a
large series, it doesn't have any dependencies. So, if this series is routed through
the KVM tree, it should be straightforward to just pull that patch into this series,
and whichever series lands first gets the honor of applying that patch.

If this series is routed through the tip tree, the best option may be to just add
a stub to avoid potential conflicts, and then we can rip the stub out later.

[*] https://lore.kernel.org/all/[email protected]

2022-12-07 01:44:29

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables



On 7/12/22 04:07, Sean Christopherson wrote:
> On Tue, Dec 06, 2022, Alexey Kardashevskiy wrote:
>>
>> On 2/12/22 03:58, Sean Christopherson wrote:
>>> On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
>>>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>>>> index cfdf307ddc01..c4324d0205b5 100644
>>>> --- a/arch/x86/include/asm/debugreg.h
>>>> +++ b/arch/x86/include/asm/debugreg.h
>>>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>>> #ifdef CONFIG_CPU_SUP_AMD
>>>> extern void set_dr_addr_mask(unsigned long mask, int dr);
>>>> +extern unsigned long get_dr_addr_mask(int dr);
>>>> #else
>>>> static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>>>
>>> KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub. Or we need to add
>>> a dependency.
>>
>> The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user
>> arch/x86/kernel/cpu/amd.c:
>>
>> arch/x86/kernel/cpu/Makefile:
>> obj-$(CONFIG_CPU_SUP_AMD) += amd.o
>>
>>
>> Is this enough dependency or we need something else? (if enough, I'll put it
>> into the next commit log).
>
> No, it's actually the opposite, the issue is precisely that the symbol is buried
> under CPU_SUP_AMD. KVM_AMD doesn't currently depend on CPU_SUP_AMD, and so the
> usage in sev_es_prepare_switch_to_guest() fails to compile if CPU_SUP_AMD=n and
> KVM_AMD={Y,M}.

Ouch, you are one step ahead, 2/3 fails, not this one. My bad. I'll add
a stub anyway.

btw I want to do "s/int dr/unsigned int dr/" since I am using an array
now. Does it have to be patch#1 "fix set_dr_addr_mask" and then patch#2
"add get_dr_addr_mask" or one patch will do? Thanks,


>
> config KVM_AMD
> tristate "KVM for AMD processors support"
> depends on KVM
>
> I actually just submitted a patch[*] to "fix" that since KVM requires the CPU vendor
> to be AMD or Hygon at runtime.

> Although that patch is buried in the middle of a
> large series, it doesn't have any dependencies. So, if this series is routed through
> the KVM tree, it should be straightforward to just pull that patch into this series,
> and whichever series lands first gets the honor of applying that patch.
>
> If this series is routed through the tip tree, the best option may be to just add
> a stub to avoid potential conflicts, and then we can rip the stub out later.
>
> [*] https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2F20221201232655.290720-12-seanjc%40google.com&amp;data=05%7C01%7CAlexey.Kardashevskiy%40amd.com%7C6ee92cb78f8f446b887908dad7ac6fca%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638059432896741545%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=25f18IS6z9HEf0Qtt%2BFDxZdtbTVPg%2FVulsFGhWLH0Rg%3D&amp;reserved=0

--
Alexey

2022-12-07 19:23:20

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

On Thu, Dec 01, 2022 at 01:19:46PM +1100, Alexey Kardashevskiy wrote:
> Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

"x86/amd: " is perfectly fine as a prefix.

> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and

which does what? I.e., a sort of lazy DR regs swapping...

> KVM needs to store these before switching to a guest; the DebugSwitch
> hardware support restores them as type B swap.

I know this is all clear to you but you should explain what type B
register swap is.

> This stores MSR values from set_dr_addr_mask() in percpu values and

s/This stores/Store/

From Documentation/process/submitting-patches.rst:

"Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
to do frotz", as if you are giving orders to the codebase to change
its behaviour."

Also, do not talk about what your patch does - that should hopefully be
visible in the diff itself. Rather, talk about *why* you're doing what
you're doing.

> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>
> Signed-off-by: Alexey Kardashevskiy <[email protected]>
> ---
> arch/x86/include/asm/debugreg.h | 1 +
> arch/x86/kernel/cpu/amd.c | 32 ++++++++++++++++++++
> 2 files changed, 33 insertions(+)
>
> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> index cfdf307ddc01..c4324d0205b5 100644
> --- a/arch/x86/include/asm/debugreg.h
> +++ b/arch/x86/include/asm/debugreg.h
> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>
> #ifdef CONFIG_CPU_SUP_AMD
> extern void set_dr_addr_mask(unsigned long mask, int dr);
> +extern unsigned long get_dr_addr_mask(int dr);
> #else
> static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
> #endif
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index c75d75b9f11a..ec7efcef4e14 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
> return false;
> }
>
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);

This BPEXT thing is AMD-only, right?

I guess those should be called amd_drX_addr_mask where X in [0-3].

Yeah yeah, they are used in AMD-only code - svm* - but still.

> void set_dr_addr_mask(unsigned long mask, int dr)
> {
> if (!boot_cpu_has(X86_FEATURE_BPEXT))
> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
> switch (dr) {
> case 0:
> wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
> + per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
> break;
> case 1:
> + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> + per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
> + break;
> case 2:
> + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> + per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
> + break;
> case 3:
> wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> + per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
> break;
> default:
> break;
> }
> }
>
> +unsigned long get_dr_addr_mask(int dr)

This function name is too generic for an exported function.

amd_get_dr_addr_mask() I'd say.

> + if (!boot_cpu_has(X86_FEATURE_BPEXT))

check_for_deprecated_apis: WARNING: arch/x86/kernel/cpu/amd.c:1195: Do not use boot_cpu_has() - use cpu_feature_enabled() instead.

You could fix the above one too, while at it.

> + return 0;
> +
> + switch (dr) {
> + case 0:
> + return per_cpu(dr0_addr_mask, smp_processor_id());
> + case 1:
> + return per_cpu(dr1_addr_mask, smp_processor_id());
> + case 2:
> + return per_cpu(dr2_addr_mask, smp_processor_id());
> + case 3:
> + return per_cpu(dr3_addr_mask, smp_processor_id());

default:
WARN_ON_ONCE(1);
break;

Just in case.

And as a matter of fact, make that short and succinct:

switch (dr) {
case 0: return per_cpu(dr0_addr_mask, smp_processor_id());
case 1: return per_cpu(dr1_addr_mask, smp_processor_id());
case 2: return per_cpu(dr2_addr_mask, smp_processor_id());
case 3: return per_cpu(dr3_addr_mask, smp_processor_id());
default: WARN_ON_ONCE(1); break;
}

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

2022-12-08 06:46:15

by Alexey Kardashevskiy

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables



On 8/12/22 05:55, Borislav Petkov wrote:
> On Thu, Dec 01, 2022 at 01:19:46PM +1100, Alexey Kardashevskiy wrote:
>> Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables
>
> "x86/amd: " is perfectly fine as a prefix.
>
>> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
>> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
>
> which does what? I.e., a sort of lazy DR regs swapping...
>
>> KVM needs to store these before switching to a guest; the DebugSwitch
>> hardware support restores them as type B swap.
>
> I know this is all clear to you but you should explain what type B
> register swap is.
>
>> This stores MSR values from set_dr_addr_mask() in percpu values and
>
> s/This stores/Store/
>
> From Documentation/process/submitting-patches.rst:
>
> "Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
> instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
> to do frotz", as if you are giving orders to the codebase to change
> its behaviour."
>
> Also, do not talk about what your patch does - that should hopefully be
> visible in the diff itself. Rather, talk about *why* you're doing what
> you're doing.
>
>> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>>
>> Signed-off-by: Alexey Kardashevskiy <[email protected]>
>> ---
>> arch/x86/include/asm/debugreg.h | 1 +
>> arch/x86/kernel/cpu/amd.c | 32 ++++++++++++++++++++
>> 2 files changed, 33 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>> index cfdf307ddc01..c4324d0205b5 100644
>> --- a/arch/x86/include/asm/debugreg.h
>> +++ b/arch/x86/include/asm/debugreg.h
>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>
>> #ifdef CONFIG_CPU_SUP_AMD
>> extern void set_dr_addr_mask(unsigned long mask, int dr);
>> +extern unsigned long get_dr_addr_mask(int dr);
>> #else
>> static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>> #endif
>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index c75d75b9f11a..ec7efcef4e14 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>> return false;
>> }
>>
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
>
> This BPEXT thing is AMD-only, right?
>
> I guess those should be called amd_drX_addr_mask where X in [0-3].
>
> Yeah yeah, they are used in AMD-only code - svm* - but still.
>
>> void set_dr_addr_mask(unsigned long mask, int dr)
>> {
>> if (!boot_cpu_has(X86_FEATURE_BPEXT))
>> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>> switch (dr) {
>> case 0:
>> wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
>> + per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
>> break;
>> case 1:
>> + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> + per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
>> + break;
>> case 2:
>> + wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> + per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
>> + break;
>> case 3:
>> wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> + per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
>> break;
>> default:
>> break;
>> }
>> }
>>
>> +unsigned long get_dr_addr_mask(int dr)
>
> This function name is too generic for an exported function.
>
> amd_get_dr_addr_mask() I'd say.
>
>> + if (!boot_cpu_has(X86_FEATURE_BPEXT))
>
> check_for_deprecated_apis: WARNING: arch/x86/kernel/cpu/amd.c:1195: Do not use boot_cpu_has() - use cpu_feature_enabled() instead.
>
> You could fix the above one too, while at it.
>
>> + return 0;
>> +
>> + switch (dr) {
>> + case 0:
>> + return per_cpu(dr0_addr_mask, smp_processor_id());
>> + case 1:
>> + return per_cpu(dr1_addr_mask, smp_processor_id());
>> + case 2:
>> + return per_cpu(dr2_addr_mask, smp_processor_id());
>> + case 3:
>> + return per_cpu(dr3_addr_mask, smp_processor_id());
>
> default:
> WARN_ON_ONCE(1);
> break;
>
> Just in case.
>
> And as a matter of fact, make that short and succinct:
>
> switch (dr) {
> case 0: return per_cpu(dr0_addr_mask, smp_processor_id());
> case 1: return per_cpu(dr1_addr_mask, smp_processor_id());
> case 2: return per_cpu(dr2_addr_mask, smp_processor_id());
> case 3: return per_cpu(dr3_addr_mask, smp_processor_id());
> default: WARN_ON_ONCE(1); break;
> }


Not an array, as Sean suggested? Uff...


>
> Thx.
>

--
Alexey

2022-12-08 10:46:05

by Borislav Petkov

[permalink] [raw]
Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

On Thu, Dec 08, 2022 at 05:11:26PM +1100, Alexey Kardashevskiy wrote:
> Not an array, as Sean suggested? Uff...

Sure an array if it makes the code even more readable and clean. With an
array it should become even more compact, I'd say.

Thx.

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette