Originally, AMD SVM AVIC supports 8-bit host physical APIC ID.
However, newer AMD systems can have physical APIC ID larger than 255,
and AVIC hardware has been extended to support upto the maximum physical
APIC ID available in the system.
This series introduces a helper function in the APIC subsystem to get
the maximum physical APIC ID allowing the SVM AVIC driver to calculate
the number of bits to program the host physical APIC ID in the AVIC
physical APIC ID table entry.
Regards,
Suravee Suthikulpanit
Changes from V2 (https://www.spinics.net/lists/kvm/msg261351.html)
* Use global variable npt_enabled instead (patch 1/3)
* Use BIT_ULL() instead of BIT() since avic_host_physical_id_mask
is 64-bit value (patch 3/3)
Suravee Suthikulpanit (3):
KVM: SVM: Refactor AVIC hardware setup logic into helper function
x86/apic: Add helper function to get maximum physical APIC ID
KVM: SVM: Extend host physical APIC ID field to support more than
8-bit
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/apic.c | 6 ++++++
arch/x86/kvm/svm/avic.c | 38 +++++++++++++++++++++++++++++--------
arch/x86/kvm/svm/svm.c | 8 +-------
arch/x86/kvm/svm/svm.h | 2 +-
5 files changed, 39 insertions(+), 16 deletions(-)
--
2.25.1
To prepare for upcoming AVIC changes. There is no functional change.
Signed-off-by: Suravee Suthikulpanit <[email protected]>
---
arch/x86/kvm/svm/avic.c | 10 ++++++++++
arch/x86/kvm/svm/svm.c | 8 +-------
arch/x86/kvm/svm/svm.h | 1 +
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8052d92069e0..63c3801d1829 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1011,3 +1011,13 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
kvm_vcpu_update_apicv(vcpu);
avic_set_running(vcpu, true);
}
+
+bool avic_hardware_setup(bool avic)
+{
+ if (!avic || !npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))
+ return false;
+
+ pr_info("AVIC enabled\n");
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ return true;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 989685098b3e..e59f663ab8cb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1031,13 +1031,7 @@ static __init int svm_hardware_setup(void)
nrips = false;
}
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-
- if (enable_apicv) {
- pr_info("AVIC enabled\n");
-
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- }
+ enable_apicv = avic = avic_hardware_setup(avic);
if (vls) {
if (!npt_enabled ||
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5d30db599e10..3fa975031dc9 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -515,6 +515,7 @@ static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
}
+bool avic_hardware_setup(bool avic);
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
--
2.25.1
Export the max_physical_apicid via a helper function since this information
is required by AMD SVM AVIC support.
Signed-off-by: Suravee Suthikulpanit <[email protected]>
---
arch/x86/include/asm/apic.h | 1 +
arch/x86/kernel/apic/apic.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 48067af94678..77d9cb2a7e28 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -435,6 +435,7 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {}
#endif /* CONFIG_X86_LOCAL_APIC */
extern void apic_ack_irq(struct irq_data *data);
+extern u32 apic_get_max_phys_apicid(void);
static inline void ack_APIC_irq(void)
{
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b70344bf6600..47653d8c05f2 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2361,6 +2361,12 @@ bool apic_id_is_primary_thread(unsigned int apicid)
}
#endif
+u32 apic_get_max_phys_apicid(void)
+{
+ return max_physical_apicid;
+}
+EXPORT_SYMBOL_GPL(apic_get_max_phys_apicid);
+
/*
* Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
* and cpuid_to_apicid[] synchronized.
--
2.25.1
The AVIC physical APIC ID table entry contains the host physical
APIC ID field, which the hardware uses to keep track of where each
vCPU is running. Originally, the field is an 8-bit value, which can
only support physical APIC ID up to 255.
To support system with larger APIC ID, the AVIC hardware extends
this field to support up to the largest possible physical APIC ID
available on the system.
Therefore, replace the hard-coded mask value with the value
calculated from the maximum possible physical APIC ID in the system.
Reviewed-by: Maxim Levitsky <[email protected]>
Signed-off-by: Suravee Suthikulpanit <[email protected]>
---
arch/x86/kvm/svm/avic.c | 28 ++++++++++++++++++++--------
arch/x86/kvm/svm/svm.h | 1 -
2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 63c3801d1829..cc6daf5b91d5 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -19,6 +19,7 @@
#include <linux/amd-iommu.h>
#include <linux/kvm_host.h>
+#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include "trace.h"
@@ -63,6 +64,7 @@
static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
+static u64 avic_host_physical_id_mask;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
/*
@@ -133,6 +135,20 @@ void avic_vm_destroy(struct kvm *kvm)
spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
}
+static void avic_init_host_physical_apicid_mask(void)
+{
+ if (!x2apic_mode) {
+ /* If host is in xAPIC mode, default to only 8-bit mask. */
+ avic_host_physical_id_mask = 0xffULL;
+ } else {
+ u32 count = get_count_order(apic_get_max_phys_apicid());
+
+ avic_host_physical_id_mask = BIT_ULL(count) - 1;
+ }
+ pr_debug("Using AVIC host physical APIC ID mask %#0llx\n",
+ avic_host_physical_id_mask);
+}
+
int avic_vm_init(struct kvm *kvm)
{
unsigned long flags;
@@ -943,22 +959,17 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
- /* ID = 0xff (broadcast), ID > 0xff (reserved) */
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
- /*
- * Since the host physical APIC id is 8 bits,
- * we can support host APIC ID upto 255.
- */
- if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
+ if (WARN_ON(h_physical_id > avic_host_physical_id_mask))
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
- entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
+ entry &= ~avic_host_physical_id_mask;
+ entry |= h_physical_id;
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
if (svm->avic_is_running)
@@ -1018,6 +1029,7 @@ bool avic_hardware_setup(bool avic)
return false;
pr_info("AVIC enabled\n");
+ avic_init_host_physical_apicid_mask();
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
return true;
}
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 3fa975031dc9..bbe2fb226b52 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -497,7 +497,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
--
2.25.1
Are there any other concerns with this series?
Regards,
Suravee
On 12/13/21 6:31 PM, Suravee Suthikulpanit wrote:
> Originally, AMD SVM AVIC supports 8-bit host physical APIC ID.
> However, newer AMD systems can have physical APIC ID larger than 255,
> and AVIC hardware has been extended to support upto the maximum physical
> APIC ID available in the system.
>
> This series introduces a helper function in the APIC subsystem to get
> the maximum physical APIC ID allowing the SVM AVIC driver to calculate
> the number of bits to program the host physical APIC ID in the AVIC
> physical APIC ID table entry.
>
> Regards,
> Suravee Suthikulpanit
>
> Changes from V2 (https://www.spinics.net/lists/kvm/msg261351.html)
>
> * Use global variable npt_enabled instead (patch 1/3)
>
> * Use BIT_ULL() instead of BIT() since avic_host_physical_id_mask
> is 64-bit value (patch 3/3)
>
> Suravee Suthikulpanit (3):
> KVM: SVM: Refactor AVIC hardware setup logic into helper function
> x86/apic: Add helper function to get maximum physical APIC ID
> KVM: SVM: Extend host physical APIC ID field to support more than
> 8-bit
>
> arch/x86/include/asm/apic.h | 1 +
> arch/x86/kernel/apic/apic.c | 6 ++++++
> arch/x86/kvm/svm/avic.c | 38 +++++++++++++++++++++++++++++--------
> arch/x86/kvm/svm/svm.c | 8 +-------
> arch/x86/kvm/svm/svm.h | 2 +-
> 5 files changed, 39 insertions(+), 16 deletions(-)
>
On Mon, Dec 13, 2021, Suravee Suthikulpanit wrote:
> The AVIC physical APIC ID table entry contains the host physical
> APIC ID field, which the hardware uses to keep track of where each
> vCPU is running. Originally, the field is an 8-bit value, which can
> only support physical APIC ID up to 255.
>
> To support system with larger APIC ID, the AVIC hardware extends
> this field to support up to the largest possible physical APIC ID
> available on the system.
>
> Therefore, replace the hard-coded mask value with the value
> calculated from the maximum possible physical APIC ID in the system.
...
> +static void avic_init_host_physical_apicid_mask(void)
> +{
> + if (!x2apic_mode) {
> + /* If host is in xAPIC mode, default to only 8-bit mask. */
> + avic_host_physical_id_mask = 0xffULL;
Use GENMASK(7:0) instead of open coding the equivalent. Oh, and
avic_host_physical_id_mask doesn't need to be a u64, it's hard capped at 12 bits
and so can be a simple int.
> + } else {
> + u32 count = get_count_order(apic_get_max_phys_apicid());
> +
> + avic_host_physical_id_mask = BIT_ULL(count) - 1;
> + }
Why is the "legal" mask dynamically calculated? That's way more complicated and
convoluted then this needs to be.
The cover letter says
However, newer AMD systems can have physical APIC ID larger than 255,
and AVIC hardware has been extended to support upto the maximum physical
APIC ID available in the system.
and newer versions of the APM have bits
11:8 - Reserved/SBZ for legacy APIC; extension of Host Physical APIC ID when
x2APIC is enabled.
7:0 - Host Physical APIC ID Physical APIC ID of the physical core allocated by
the VMM to host the guest virtual processor. This field is not valid
unless the IsRunning bit is set.
whereas older versions have
11:8 - Reserved, SBZ. Should always be set to zero.
That implies that an APIC ID > 255 on older hardware what ignores bits 11:8 even
in x2APIC will silently fail, and the whole point of this mask is to avoid exactly
that.
To further confuse things, the APM was only partially updated and needs to be fixed,
e.g. "Figure 15-19. Physical APIC Table in Memory" and the following blurb wasn't
updated to account for the new x2APIC behavior.
But at least one APM blurb appears to have been wrong (or the architecture is broken)
prior to the larger AVIC width:
Since a destination of FFh is used to specify a broadcast, physical APIC ID FFh
is reserved.
We have Rome systems with 256 CPUs and thus an x2APIC ID for a CPU of FFh. So
either the APM is wrong or AVIC is broken on older large systems.
Anyways, for the new larger mask, IMO dynamically computing the mask based on what
APIC IDs were enumerated to the kernel is pointless. If the AVIC doesn't support
using bits 11:0 to address APIC IDs then KVM is silently hosed no matter what if
any APIC ID is >255.
Ideally, there would be a feature flag enumerating the larger AVIC support so we
could do:
if (!x2apic_mode || !boot_cpu_has(X86_FEATURE_FANCY_NEW_AVIC))
avic_host_physical_id_mask = GENMASK(7:0);
else
avic_host_physical_id_mask = GENMASK(11:0);
but since it sounds like that's not the case, and presumably hardware is smart
enough not to assign APIC IDs it can't address, this can simply be
if (!x2apic_mode)
avic_host_physical_id_mask = GENMASK(7:0);
else
avic_host_physical_id_mask = GENMASK(11:0);
and patch 01 to add+export apic_get_max_phys_apicid() goes away.
> + pr_debug("Using AVIC host physical APIC ID mask %#0llx\n",
> + avic_host_physical_id_mask);
> +}
> +
> int avic_vm_init(struct kvm *kvm)
> {
> unsigned long flags;
> @@ -943,22 +959,17 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
> void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> u64 entry;
> - /* ID = 0xff (broadcast), ID > 0xff (reserved) */
> int h_physical_id = kvm_cpu_get_apicid(cpu);
> struct vcpu_svm *svm = to_svm(vcpu);
>
> - /*
> - * Since the host physical APIC id is 8 bits,
> - * we can support host APIC ID upto 255.
> - */
> - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
> + if (WARN_ON(h_physical_id > avic_host_physical_id_mask))
Not really your code, but this should really be
if (WARN_ON((h_physical_id & avic_host_physical_id_mask) != h_physical_id))
return;
otherwise a negative value will get a false negative.
LOL, and with respect to FFh being reserved, the current KVM code doesn't treat
it as reserved. Which is probably a serendipituous bug as it allows addressing
APID ID FFh when x2APIC is enabled. I suspect we also want:
if (WARN_ON(!x2apic_mode && h_physical_id == 0xff))
return;
> return;
>
> entry = READ_ONCE(*(svm->avic_physical_id_cache));
> WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
>
> - entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
> - entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
> + entry &= ~avic_host_physical_id_mask;
> + entry |= h_physical_id;
>
> entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
> if (svm->avic_is_running)
> @@ -1018,6 +1029,7 @@ bool avic_hardware_setup(bool avic)
> return false;
>
> pr_info("AVIC enabled\n");
> + avic_init_host_physical_apicid_mask();
> amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
> return true;
> }
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 3fa975031dc9..bbe2fb226b52 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -497,7 +497,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
> #define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
> #define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
>
> -#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
> #define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
> #define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
> #define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
> --
> 2.25.1
>
On Mon, Dec 13, 2021, Suravee Suthikulpanit wrote:
> To prepare for upcoming AVIC changes. There is no functional change.
>
> Signed-off-by: Suravee Suthikulpanit <[email protected]>
> ---
> arch/x86/kvm/svm/avic.c | 10 ++++++++++
> arch/x86/kvm/svm/svm.c | 8 +-------
> arch/x86/kvm/svm/svm.h | 1 +
> 3 files changed, 12 insertions(+), 7 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 8052d92069e0..63c3801d1829 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -1011,3 +1011,13 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
> kvm_vcpu_update_apicv(vcpu);
> avic_set_running(vcpu, true);
> }
> +
> +bool avic_hardware_setup(bool avic)
> +{
> + if (!avic || !npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))
> + return false;
> +
> + pr_info("AVIC enabled\n");
> + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
> + return true;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 989685098b3e..e59f663ab8cb 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1031,13 +1031,7 @@ static __init int svm_hardware_setup(void)
> nrips = false;
> }
>
> - enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
> -
> - if (enable_apicv) {
> - pr_info("AVIC enabled\n");
> -
> - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
> - }
> + enable_apicv = avic = avic_hardware_setup(avic);
Rather than pass in "avic", just do
enable_apicv = avic == avic && avic_hardware_setup();
This also conflicts with changes sitting in kvm/queue to nullify vcpu_(un)blocking
when AVIC is disabled. But moving AVIC setup to avic.c provides an opportunity for
further cleanup, as it means vcpu_(un)blocking can be NULL by default and set to
the AVIC helpers if and only if AVIC is enable. That will allow making the helpers
static in avic.c. E.g.
---
arch/x86/kvm/svm/avic.c | 17 +++++++++++++++--
arch/x86/kvm/svm/svm.c | 13 +------------
arch/x86/kvm/svm/svm.h | 3 +--
3 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 90364d02f22a..f5c6cab42d74 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -1027,7 +1027,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
+static void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
{
if (!kvm_vcpu_apicv_active(vcpu))
return;
@@ -1052,7 +1052,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
preempt_enable();
}
-void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
+static void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
int cpu;
@@ -1066,3 +1066,16 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
put_cpu();
}
+
+bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+ if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))
+ return false;
+
+ x86_ops->vcpu_blocking = avic_vcpu_blocking,
+ x86_ops->vcpu_unblocking = avic_vcpu_unblocking,
+
+ pr_info("AVIC enabled\n");
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ return true;
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6cb38044a860..6cb0f58238cd 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4390,8 +4390,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.prepare_guest_switch = svm_prepare_guest_switch,
.vcpu_load = svm_vcpu_load,
.vcpu_put = svm_vcpu_put,
- .vcpu_blocking = avic_vcpu_blocking,
- .vcpu_unblocking = avic_vcpu_unblocking,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
@@ -4674,16 +4672,7 @@ static __init int svm_hardware_setup(void)
nrips = false;
}
- enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
-
- if (enable_apicv) {
- pr_info("AVIC enabled\n");
-
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- } else {
- svm_x86_ops.vcpu_blocking = NULL;
- svm_x86_ops.vcpu_unblocking = NULL;
- }
+ enable_apicv = avic = avic && avic_hardware_setup(&svm_x86_ops);
if (vls) {
if (!npt_enabled ||
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index daa8ca84afcc..59d91b969bd7 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -573,6 +573,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
+bool avic_hardware_setup(struct kvm_x86_ops *ops);
int avic_ga_log_notifier(u32 ga_tag);
void avic_vm_destroy(struct kvm *kvm);
int avic_vm_init(struct kvm *kvm);
@@ -593,8 +594,6 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
-void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
-void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
/* sev.c */
--
Sean,
On 2/2/2022 4:57 AM, Sean Christopherson wrote:
> On Tue, Feb 01, 2022, Suthikulpanit, Suravee wrote:
>>> That implies that an APIC ID > 255 on older hardware what ignores bits 11:8 even
>>> in x2APIC will silently fail, and the whole point of this mask is to avoid exactly
>>> that.
>>
>> On current AMD system w/ x2APIC and 256 cpus (e.g. max APIC ID is 255), it would only
>> need 8 bits in the physical APIC ID table entry, and the bit 11:9 are reserved.
>> For newer system, it could take upto 12 bits to represent APIC ID.
>
> But x2APIC IDs are 32-bit values that, from the APM, are model specific:
>
> The x2APIC_ID is a concatenation of several fields such as socket ID, core ID
> and thread ID.
>
> Because the number of sockets, cores and threads may differ for each SOC, the
> format of x2APIC ID is model-dependent.
>
> In other words, there's nothing that _architecturally_ guarantees 8 bits are
> sufficient to hold the x2APIC ID.
Agree that there is nothing architecturally guarantee. Let's discuss this below....
>>> But at least one APM blurb appears to have been wrong (or the architecture is broken)
>>> prior to the larger AVIC width:
>>>
>>> Since a destination of FFh is used to specify a broadcast, physical APIC ID FFh
>>> is reserved.
>>>
>>> We have Rome systems with 256 CPUs and thus an x2APIC ID for a CPU of FFh. So
>>> either the APM is wrong or AVIC is broken on older large systems.
>>
>> Actually, the statement is referred to the guest physical APIC ID, which is used to
>> index the per-vm physical APIC table in the host. So, it should be correct in the case
>> of AVIC, which only support APIC mode in the guest.
>
> Ah. If you have the ear of the APM writers, can you ask that they insert a "guest",
> e.g. so that it reads:
>
> Since a destination of FFh is used to specify a broadcast, guest physical APIC ID FFh is reserved.
I'll let them know :)
>>> Anyways, for the new larger mask, IMO dynamically computing the mask based on what
>>> APIC IDs were enumerated to the kernel is pointless. If the AVIC doesn't support
>>> using bits 11:0 to address APIC IDs then KVM is silently hosed no matter what if
>>> any APIC ID is >255.
>>
>> The reason for dynamic mask is to protect the reserved bits, which varies between
>> the current platform (i.e 11:8) vs. newer platform (i.e. 11:10), in which
>> there is no good way to tell except to check the max_physical_apicid (see below).
>
> ...
>
>>> Ideally, there would be a feature flag enumerating the larger AVIC support so we
>>> could do:
>>>
>>> if (!x2apic_mode || !boot_cpu_has(X86_FEATURE_FANCY_NEW_AVIC))
>>> avic_host_physical_id_mask = GENMASK(7:0);
>>> else
>>> avic_host_physical_id_mask = GENMASK(11:0);
>>>
>>> but since it sounds like that's not the case, and presumably hardware is smart
>>> enough not to assign APIC IDs it can't address, this can simply be
>>>
>>> if (!x2apic_mode)
>>> avic_host_physical_id_mask = GENMASK(7:0);
>>> else
>>> avic_host_physical_id_mask = GENMASK(11:0);
>>>
>>> and patch 01 to add+export apic_get_max_phys_apicid() goes away.
>>
>> Unfortunately, we do not have the "X86_FEATURE_FANCY_NEW_AVIC" CPUID bit :(
>>
>> Also, based on the previous comment, we can't use the x2APIC mode in the host
>> to determine such condition. Hence, the need for dynamic mask based on
>> the max_physical_apicid.
>
> I don't get this. The APM literally says bits 11:8 are:
>
> Reserved/SBZ for legacy APIC; extension of Host Physical APIC ID when
> x2APIC is enabled.
>
> so we absolutely should be able to key off x2APIC mode. IMO, defining the mask
> based on apic_get_max_phys_apicid() is pointless and misleading. The only thing
> it really protects is passing in a completely bogus value, e.g. -1. If for some
> reason bits 11:8 are ignored/reserved by older CPUs even in x2APIC, and the CPU
> assigns an x2APIC ID with bits 11:8!=0, then KVM is hosed no matter what as the
> dynamic calculation will also allow the "bad" ID.
.... here
As I mentioned, the APM will be corrected to remove the word "x2APIC".
Essentially, it will be changed to:
* 7:0 - For systems w/ max APIC ID upto 255 (a.k.a old system)
* 11:8 - For systems w/ max APIC ID 256 and above (a.k.a new system). Otherwise, reserved and should be zero.
As for the required number of bits, there is no good way to tell what's the max
APIC ID would be on a particular system. Hence, we utilize the apic_get_max_phys_apicid()
to figure out how to properly program the table (which is leaving the reserved field
alone when making change to the table).
The avic_host_physical_id_mask is not just for protecting APIC ID larger than
the allowed fields. It is also currently used for clearing the old physical APIC ID table entry
before programing it with the new APIC ID.
So, What if we use the following logic:
+ u32 count = get_count_order(apic_get_max_phys_apicid());
+
+ /*
+ * Depending on the maximum host physical APIC ID available
+ * on the system, AVIC can support upto 8-bit or 12-bit host
+ * physical APIC ID.
+ */
+ if (count <= 8)
+ avic_host_physical_id_mask = GENMASK(7, 0);
+ else if (count <= 12)
+ avic_host_physical_id_mask = GENMASK(11, 0);
+ else
+ /* Warn and Disable AVIC here due to unable to satisfy APIC ID requirement */
Regards,
Suravee
Sean,
On 12/31/2021 12:21 AM, Sean Christopherson wrote:
> On Mon, Dec 13, 2021, Suravee Suthikulpanit wrote:
>> The AVIC physical APIC ID table entry contains the host physical
>> APIC ID field, which the hardware uses to keep track of where each
>> vCPU is running. Originally, the field is an 8-bit value, which can
>> only support physical APIC ID up to 255.
>>
>> To support system with larger APIC ID, the AVIC hardware extends
>> this field to support up to the largest possible physical APIC ID
>> available on the system.
>>
>> Therefore, replace the hard-coded mask value with the value
>> calculated from the maximum possible physical APIC ID in the system.
>
> ...
>
>> +static void avic_init_host_physical_apicid_mask(void)
>> +{
>> + if (!x2apic_mode) {
>> + /* If host is in xAPIC mode, default to only 8-bit mask. */
>> + avic_host_physical_id_mask = 0xffULL;
>
> Use GENMASK(7:0) instead of open coding the equivalent. Oh, and
> avic_host_physical_id_mask doesn't need to be a u64, it's hard capped at 12 bits
> and so can be a simple int.
>
Actually, shouldn't it be u16 since the value returned from kvm_cpu_get_apicid()
would typically be 16-bit value (despite it has int as a return type).
Regards,
Suravee
On Tue, Feb 01, 2022, Suthikulpanit, Suravee wrote:
> > That implies that an APIC ID > 255 on older hardware what ignores bits 11:8 even
> > in x2APIC will silently fail, and the whole point of this mask is to avoid exactly
> > that.
>
> On current AMD system w/ x2APIC and 256 cpus (e.g. max APIC ID is 255), it would only
> need 8 bits in the physical APIC ID table entry, and the bit 11:9 are reserved.
> For newer system, it could take upto 12 bits to represent APIC ID.
But x2APIC IDs are 32-bit values that, from the APM, are model specific:
The x2APIC_ID is a concatenation of several fields such as socket ID, core ID
and thread ID.
Because the number of sockets, cores and threads may differ for each SOC, the
format of x2APIC ID is model-dependent.
In other words, there's nothing that _architecturally_ guarantees 8 bits are
sufficient to hold the x2APIC ID.
> > But at least one APM blurb appears to have been wrong (or the architecture is broken)
> > prior to the larger AVIC width:
> >
> > Since a destination of FFh is used to specify a broadcast, physical APIC ID FFh
> > is reserved.
> >
> > We have Rome systems with 256 CPUs and thus an x2APIC ID for a CPU of FFh. So
> > either the APM is wrong or AVIC is broken on older large systems.
>
> Actually, the statement is referred to the guest physical APIC ID, which is used to
> index the per-vm physical APIC table in the host. So, it should be correct in the case
> of AVIC, which only support APIC mode in the guest.
Ah. If you have the ear of the APM writers, can you ask that they insert a "guest",
e.g. so that it reads:
Since a destination of FFh is used to specify a broadcast, guest physical APIC ID FFh is reserved.
> > Anyways, for the new larger mask, IMO dynamically computing the mask based on what
> > APIC IDs were enumerated to the kernel is pointless. If the AVIC doesn't support
> > using bits 11:0 to address APIC IDs then KVM is silently hosed no matter what if
> > any APIC ID is >255.
>
> The reason for dynamic mask is to protect the reserved bits, which varies between
> the current platform (i.e 11:8) vs. newer platform (i.e. 11:10), in which
> there is no good way to tell except to check the max_physical_apicid (see below).
...
> > Ideally, there would be a feature flag enumerating the larger AVIC support so we
> > could do:
> >
> > if (!x2apic_mode || !boot_cpu_has(X86_FEATURE_FANCY_NEW_AVIC))
> > avic_host_physical_id_mask = GENMASK(7:0);
> > else
> > avic_host_physical_id_mask = GENMASK(11:0);
> >
> > but since it sounds like that's not the case, and presumably hardware is smart
> > enough not to assign APIC IDs it can't address, this can simply be
> >
> > if (!x2apic_mode)
> > avic_host_physical_id_mask = GENMASK(7:0);
> > else
> > avic_host_physical_id_mask = GENMASK(11:0);
> >
> > and patch 01 to add+export apic_get_max_phys_apicid() goes away.
>
> Unfortunately, we do not have the "X86_FEATURE_FANCY_NEW_AVIC" CPUID bit :(
>
> Also, based on the previous comment, we can't use the x2APIC mode in the host
> to determine such condition. Hence, the need for dynamic mask based on
> the max_physical_apicid.
I don't get this. The APM literally says bits 11:8 are:
Reserved/SBZ for legacy APIC; extension of Host Physical APIC ID when
x2APIC is enabled.
so we absolutely should be able to key off x2APIC mode. IMO, defining the mask
based on apic_get_max_phys_apicid() is pointless and misleading. The only thing
it really protects is passing in a completely bogus value, e.g. -1. If for some
reason bits 11:8 are ignored/reserved by older CPUs even in x2APIC, and the CPU
assigns an x2APIC ID with bits 11:8!=0, then KVM is hosed no matter what as the
dynamic calculation will also allow the "bad" ID.
Hi Sean,
On 12/31/2021 12:21 AM, Sean Christopherson wrote:
> On Mon, Dec 13, 2021, Suravee Suthikulpanit wrote:
>> .....
>> + } else {
>> + u32 count = get_count_order(apic_get_max_phys_apicid());
>> +
>> + avic_host_physical_id_mask = BIT_ULL(count) - 1;
>> + }
>
> Why is the "legal" mask dynamically calculated? That's way more complicated and
> convoluted then this needs to be.
>
> The cover letter says
>
> However, newer AMD systems can have physical APIC ID larger than 255,
> and AVIC hardware has been extended to support upto the maximum physical
> APIC ID available in the system.
>
> and newer versions of the APM have bits
>
> 11:8 - Reserved/SBZ for legacy APIC; extension of Host Physical APIC ID when
> x2APIC is enabled.
> 7:0 - Host Physical APIC ID Physical APIC ID of the physical core allocated by
> the VMM to host the guest virtual processor. This field is not valid
> unless the IsRunning bit is set.
>
> whereas older versions have
>
> 11:8 - Reserved, SBZ. Should always be set to zero.
>
I have checked with the hardware and documentation team. The statement regarding "x2APIC"
is not accurate and will be corrected. Sorry for confusion.
> That implies that an APIC ID > 255 on older hardware what ignores bits 11:8 even
> in x2APIC will silently fail, and the whole point of this mask is to avoid exactly
> that.
On current AMD system w/ x2APIC and 256 cpus (e.g. max APIC ID is 255), it would only
need 8 bits in the physical APIC ID table entry, and the bit 11:9 are reserved.
For newer system, it could take upto 12 bits to represent APIC ID.
> To further confuse things, the APM was only partially updated and needs to be fixed,
> e.g. "Figure 15-19. Physical APIC Table in Memory" and the following blurb wasn't
> updated to account for the new x2APIC behavior.
Noted. I'll inform the team.
> But at least one APM blurb appears to have been wrong (or the architecture is broken)
> prior to the larger AVIC width:
>
> Since a destination of FFh is used to specify a broadcast, physical APIC ID FFh
> is reserved.
>
> We have Rome systems with 256 CPUs and thus an x2APIC ID for a CPU of FFh. So
> either the APM is wrong or AVIC is broken on older large systems.
Actually, the statement is referred to the guest physical APIC ID, which is used to
index the per-vm physical APIC table in the host. So, it should be correct in the case
of AVIC, which only support APIC mode in the guest.
> Anyways, for the new larger mask, IMO dynamically computing the mask based on what
> APIC IDs were enumerated to the kernel is pointless. If the AVIC doesn't support
> using bits 11:0 to address APIC IDs then KVM is silently hosed no matter what if
> any APIC ID is >255.
The reason for dynamic mask is to protect the reserved bits, which varies between
the current platform (i.e 11:8) vs. newer platform (i.e. 11:10), in which
there is no good way to tell except to check the max_physical_apicid (see below).
> Ideally, there would be a feature flag enumerating the larger AVIC support so we
> could do:
>
> if (!x2apic_mode || !boot_cpu_has(X86_FEATURE_FANCY_NEW_AVIC))
> avic_host_physical_id_mask = GENMASK(7:0);
> else
> avic_host_physical_id_mask = GENMASK(11:0);
>
> but since it sounds like that's not the case, and presumably hardware is smart
> enough not to assign APIC IDs it can't address, this can simply be
>
> if (!x2apic_mode)
> avic_host_physical_id_mask = GENMASK(7:0);
> else
> avic_host_physical_id_mask = GENMASK(11:0);
>
> and patch 01 to add+export apic_get_max_phys_apicid() goes away.
Unfortunately, we do not have the "X86_FEATURE_FANCY_NEW_AVIC" CPUID bit :(
Also, based on the previous comment, we can't use the x2APIC mode in the host
to determine such condition. Hence, the need for dynamic mask based on
the max_physical_apicid.
>> + pr_debug("Using AVIC host physical APIC ID mask %#0llx\n",
>> + avic_host_physical_id_mask);
>> +}
>> +
>> int avic_vm_init(struct kvm *kvm)
>> {
>> unsigned long flags;
>> @@ -943,22 +959,17 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
>> void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>> {
>> u64 entry;
>> - /* ID = 0xff (broadcast), ID > 0xff (reserved) */
>> int h_physical_id = kvm_cpu_get_apicid(cpu);
>> struct vcpu_svm *svm = to_svm(vcpu);
>>
>> - /*
>> - * Since the host physical APIC id is 8 bits,
>> - * we can support host APIC ID upto 255.
>> - */
>> - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
>> + if (WARN_ON(h_physical_id > avic_host_physical_id_mask))
>
> Not really your code, but this should really be
>
> if (WARN_ON((h_physical_id & avic_host_physical_id_mask) != h_physical_id))
> return;
>
> otherwise a negative value will get a false negative.
I can do this in v4.
Regards,
Suravee
On Wed, Feb 02, 2022, Sean Christopherson wrote:
> On Wed, Feb 02, 2022, Suthikulpanit, Suravee wrote:
> > As I mentioned, the APM will be corrected to remove the word "x2APIC".
>
> Ah, I misunderstood what part was being amended.
>
> > Essentially, it will be changed to:
> >
> > * 7:0 - For systems w/ max APIC ID upto 255 (a.k.a old system)
> > * 11:8 - For systems w/ max APIC ID 256 and above (a.k.a new system). Otherwise, reserved and should be zero.
> >
> > As for the required number of bits, there is no good way to tell what's the max
> > APIC ID would be on a particular system. Hence, we utilize the apic_get_max_phys_apicid()
> > to figure out how to properly program the table (which is leaving the reserved field
> > alone when making change to the table).
> >
> > The avic_host_physical_id_mask is not just for protecting APIC ID larger than
> > the allowed fields. It is also currently used for clearing the old physical APIC ID table entry
> > before programing it with the new APIC ID.
>
> Just clear 11:0 unconditionally, the reserved bits are Should Be Zero.
Actually, that needs to be crafted a bug fix that's sent to stable@, otherwise
running old kernels on new hardware will break. I'm pretty sure this is the
entirety of what's needed.
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 90364d02f22a..e4cfd8bf4f24 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -974,17 +974,12 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
- /* ID = 0xff (broadcast), ID > 0xff (reserved) */
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
lockdep_assert_preemption_disabled();
- /*
- * Since the host physical APIC id is 8 bits,
- * we can support host APIC ID upto 255.
- */
- if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
+ if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
return;
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 73525353e424..a157af1cce6a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -560,7 +560,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31
#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31)
-#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK (0xFFULL)
+#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11:0)
#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12)
#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62)
#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63)
Hi Sean,
Thanks for the suggestion. I'll update this in v4.
Regards,
Suravee
On 12/31/2021 12:26 AM, Sean Christopherson wrote:
> On Mon, Dec 13, 2021, Suravee Suthikulpanit wrote:
>> To prepare for upcoming AVIC changes. There is no functional change.
>>
>> Signed-off-by: Suravee Suthikulpanit <[email protected]>
>> ---
>> arch/x86/kvm/svm/avic.c | 10 ++++++++++
>> arch/x86/kvm/svm/svm.c | 8 +-------
>> arch/x86/kvm/svm/svm.h | 1 +
>> 3 files changed, 12 insertions(+), 7 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
>> index 8052d92069e0..63c3801d1829 100644
>> --- a/arch/x86/kvm/svm/avic.c
>> +++ b/arch/x86/kvm/svm/avic.c
>> @@ -1011,3 +1011,13 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
>> kvm_vcpu_update_apicv(vcpu);
>> avic_set_running(vcpu, true);
>> }
>> +
>> +bool avic_hardware_setup(bool avic)
>> +{
>> + if (!avic || !npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))
>> + return false;
>> +
>> + pr_info("AVIC enabled\n");
>> + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
>> + return true;
>> +}
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index 989685098b3e..e59f663ab8cb 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -1031,13 +1031,7 @@ static __init int svm_hardware_setup(void)
>> nrips = false;
>> }
>>
>> - enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
>> -
>> - if (enable_apicv) {
>> - pr_info("AVIC enabled\n");
>> -
>> - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
>> - }
>> + enable_apicv = avic = avic_hardware_setup(avic);
>
> Rather than pass in "avic", just do
>
> enable_apicv = avic == avic && avic_hardware_setup();
>
> This also conflicts with changes sitting in kvm/queue to nullify vcpu_(un)blocking
> when AVIC is disabled. But moving AVIC setup to avic.c provides an opportunity for
> further cleanup, as it means vcpu_(un)blocking can be NULL by default and set to
> the AVIC helpers if and only if AVIC is enable. That will allow making the helpers
> static in avic.c. E.g.
>
> ---
> arch/x86/kvm/svm/avic.c | 17 +++++++++++++++--
> arch/x86/kvm/svm/svm.c | 13 +------------
> arch/x86/kvm/svm/svm.h | 3 +--
> 3 files changed, 17 insertions(+), 16 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 90364d02f22a..f5c6cab42d74 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -1027,7 +1027,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
> WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
> }
>
> -void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> +static void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> {
> if (!kvm_vcpu_apicv_active(vcpu))
> return;
> @@ -1052,7 +1052,7 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
> preempt_enable();
> }
>
> -void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
> +static void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
> {
> int cpu;
>
> @@ -1066,3 +1066,16 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
>
> put_cpu();
> }
> +
> +bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> + if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC))
> + return false;
> +
> + x86_ops->vcpu_blocking = avic_vcpu_blocking,
> + x86_ops->vcpu_unblocking = avic_vcpu_unblocking,
> +
> + pr_info("AVIC enabled\n");
> + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
> + return true;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 6cb38044a860..6cb0f58238cd 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4390,8 +4390,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .prepare_guest_switch = svm_prepare_guest_switch,
> .vcpu_load = svm_vcpu_load,
> .vcpu_put = svm_vcpu_put,
> - .vcpu_blocking = avic_vcpu_blocking,
> - .vcpu_unblocking = avic_vcpu_unblocking,
>
> .update_exception_bitmap = svm_update_exception_bitmap,
> .get_msr_feature = svm_get_msr_feature,
> @@ -4674,16 +4672,7 @@ static __init int svm_hardware_setup(void)
> nrips = false;
> }
>
> - enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
> -
> - if (enable_apicv) {
> - pr_info("AVIC enabled\n");
> -
> - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
> - } else {
> - svm_x86_ops.vcpu_blocking = NULL;
> - svm_x86_ops.vcpu_unblocking = NULL;
> - }
> + enable_apicv = avic = avic && avic_hardware_setup(&svm_x86_ops);
>
> if (vls) {
> if (!npt_enabled ||
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index daa8ca84afcc..59d91b969bd7 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -573,6 +573,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
>
> #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL
>
> +bool avic_hardware_setup(struct kvm_x86_ops *ops);
> int avic_ga_log_notifier(u32 ga_tag);
> void avic_vm_destroy(struct kvm *kvm);
> int avic_vm_init(struct kvm *kvm);
> @@ -593,8 +594,6 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec);
> bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu);
> int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
> uint32_t guest_irq, bool set);
> -void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
> -void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
>
> /* sev.c */
>
> --
>
>
On Wed, Feb 02, 2022, Suthikulpanit, Suravee wrote:
> As I mentioned, the APM will be corrected to remove the word "x2APIC".
Ah, I misunderstood what part was being amended.
> Essentially, it will be changed to:
>
> * 7:0 - For systems w/ max APIC ID upto 255 (a.k.a old system)
> * 11:8 - For systems w/ max APIC ID 256 and above (a.k.a new system). Otherwise, reserved and should be zero.
>
> As for the required number of bits, there is no good way to tell what's the max
> APIC ID would be on a particular system. Hence, we utilize the apic_get_max_phys_apicid()
> to figure out how to properly program the table (which is leaving the reserved field
> alone when making change to the table).
>
> The avic_host_physical_id_mask is not just for protecting APIC ID larger than
> the allowed fields. It is also currently used for clearing the old physical APIC ID table entry
> before programing it with the new APIC ID.
Just clear 11:0 unconditionally, the reserved bits are Should Be Zero.
> So, What if we use the following logic:
>
> + u32 count = get_count_order(apic_get_max_phys_apicid());
> +
> + /*
> + * Depending on the maximum host physical APIC ID available
> + * on the system, AVIC can support upto 8-bit or 12-bit host
> + * physical APIC ID.
> + */
> + if (count <= 8)
> + avic_host_physical_id_mask = GENMASK(7, 0);
> + else if (count <= 12)
> + avic_host_physical_id_mask = GENMASK(11, 0);
> + else
> + /* Warn and Disable AVIC here due to unable to satisfy APIC ID requirement */
I still don't see the point. It's using the max APIC ID to verify that the max
APIC ID is valid. Either we trust hardware to not screw up assigning APIC IDs,
or we don't use AVIC.
On 2/1/2022 7:58 PM, Suthikulpanit, Suravee wrote:
>> Anyways, for the new larger mask, IMO dynamically computing the mask based on what
>> APIC IDs were enumerated to the kernel is pointless. If the AVIC doesn't support
>> using bits 11:0 to address APIC IDs then KVM is silently hosed no matter what if
>> any APIC ID is >255.
>
> The reason for dynamic mask is to protect the reserved bits, which varies between
> the current platform (i.e 11:8) vs. newer platform (i.e. 11:10), in which
> there is no good way to tell except to check the max_physical_apicid (see below).
>
>> Ideally, there would be a feature flag enumerating the larger AVIC support so we
>> could do:
>>
>> if (!x2apic_mode || !boot_cpu_has(X86_FEATURE_FANCY_NEW_AVIC))
>> avic_host_physical_id_mask = GENMASK(7:0);
>> else
>> avic_host_physical_id_mask = GENMASK(11:0);
>>
>> but since it sounds like that's not the case, and presumably hardware is smart
>> enough not to assign APIC IDs it can't address, this can simply be
>>
>> if (!x2apic_mode)
>> avic_host_physical_id_mask = GENMASK(7:0);
>> else
>> avic_host_physical_id_mask = GENMASK(11:0);
>>
>> and patch 01 to add+export apic_get_max_phys_apicid() goes away.
>
> Unfortunately, we do not have the "X86_FEATURE_FANCY_NEW_AVIC" CPUID bit :(
>
> Also, based on the previous comment, we can't use the x2APIC mode in the host
> to determine such condition. Hence, the need for dynamic mask based on
> the max_physical_apicid.
I recheck this part, and it should be safe to assume that AVIC HW can support
upto 8-bit (old platform) vs. 12-bit (new platform) depending on the maximum
host physical APIC ID available on the system.
I'll simplify this in v4.
Regards,
Suravee