v3:
* fixes off-by-one error in [2/4]
* moves phys_map changes from [2/4] to [4/4]; [2/4] doesn't carry
David's r-b to [v2 2/4] because of this change
* expands a comment in [4/4]
v2: http://www.spinics.net/lists/kvm/msg142414.html
> Removes the capability that let userspace know of changes.
v1: http://www.spinics.net/lists/kvm/msg141944.html
> The problem is described in [4/4].
>
> [1/4] is a prerequisite to allow a cleanup in [2/4].
> [2/4] hopefully makes [4/4] easier to understand.
> [3/4] fixes mistakes from dealing with the mixed mode.
> [4/4] allows the hotplug and and add a capability for it.
Radim Krčmář (4):
KVM: x86: use delivery to self in hyperv synic
KVM: x86: replace kvm_apic_id with kvm_{x,x2}apic_id
KVM: x86: make interrupt delivery fast and slow path behave the same
KVM: x86: allow hotplug of VCPU with APIC ID over 0xff
arch/x86/kvm/hyperv.c | 4 +--
arch/x86/kvm/lapic.c | 67 +++++++++++++++++++++++++++++++++++++--------------
arch/x86/kvm/lapic.h | 11 ---------
3 files changed, 51 insertions(+), 31 deletions(-)
--
2.11.0
Interrupt to self can be sent without knowing the APIC ID.
Reviewed-by: David Hildenbrand <[email protected]>
Signed-off-by: Radim Krčmář <[email protected]>
---
v2: r-b David
---
arch/x86/kvm/hyperv.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 99cde5220e07..313957ec9a9d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -305,13 +305,13 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
return -ENOENT;
memset(&irq, 0, sizeof(irq));
- irq.dest_id = kvm_apic_id(vcpu->arch.apic);
+ irq.shorthand = APIC_DEST_SELF;
irq.dest_mode = APIC_DEST_PHYSICAL;
irq.delivery_mode = APIC_DM_FIXED;
irq.vector = vector;
irq.level = 1;
- ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
+ ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
return ret;
}
--
2.11.0
There were three calls sites:
- recalculate_apic_map and kvm_apic_match_physical_addr, where it would
only complicate implementation of x2APIC hotplug;
- in apic_debug, where it was still somewhat preserved, but keeping the
old function just for apic_debug was not worth it
Signed-off-by: Radim Krčmář <[email protected]>
---
v3:
* keep correct max_id = 255 [David]
* postpone phys_map changes to patch 4 [David]
---
arch/x86/kvm/lapic.c | 31 ++++++++++++++++++++++---------
arch/x86/kvm/lapic.h | 11 -----------
2 files changed, 22 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 09edd32b8e42..81cc93580c40 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -115,6 +115,16 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
+static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+{
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+}
+
+static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
+{
+ return apic->vcpu->vcpu_id;
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -159,13 +169,13 @@ static void recalculate_apic_map(struct kvm *kvm)
struct kvm_apic_map *new, *old = NULL;
struct kvm_vcpu *vcpu;
int i;
- u32 max_id = 255;
+ u32 max_id = 255; /* enough space for any xAPIC ID */
mutex_lock(&kvm->arch.apic_map_lock);
kvm_for_each_vcpu(i, vcpu, kvm)
if (kvm_apic_present(vcpu))
- max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+ max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
@@ -184,12 +194,13 @@ static void recalculate_apic_map(struct kvm *kvm)
if (!kvm_apic_present(vcpu))
continue;
- aid = kvm_apic_id(apic);
- ldr = kvm_lapic_get_reg(apic, APIC_LDR);
-
+ aid = apic_x2apic_mode(apic) ? kvm_x2apic_id(apic)
+ : kvm_xapic_id(apic);
if (aid <= new->max_apic_id)
new->phys_map[aid] = apic;
+ ldr = kvm_lapic_get_reg(apic, APIC_LDR);
+
if (apic_x2apic_mode(apic)) {
new->mode |= KVM_APIC_MODE_X2APIC;
} else if (ldr) {
@@ -250,6 +261,8 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
+ WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
+
kvm_lapic_set_reg(apic, APIC_ID, id);
kvm_lapic_set_reg(apic, APIC_LDR, ldr);
recalculate_apic_map(apic->vcpu->kvm);
@@ -591,9 +604,9 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
return true;
if (apic_x2apic_mode(apic))
- return mda == kvm_apic_id(apic);
+ return mda == kvm_x2apic_id(apic);
- return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
+ return mda == SET_APIC_DEST_FIELD(kvm_xapic_id(apic));
}
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -1907,9 +1920,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
- apic_debug("%s: vcpu=%p, id=%d, base_msr="
+ apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
- vcpu, kvm_apic_id(apic),
+ vcpu, kvm_lapic_get_reg(apic, APIC_ID),
vcpu->arch.apic_base, apic->base_address);
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index e0c80233b3e1..cb16e6fd2330 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -202,17 +202,6 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
-static inline u32 kvm_apic_id(struct kvm_lapic *apic)
-{
- /* To avoid a race between apic_base and following APIC_ID update when
- * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
- */
- if (apic_x2apic_mode(apic))
- return apic->vcpu->vcpu_id;
-
- return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
-}
-
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void wait_lapic_expire(struct kvm_vcpu *vcpu);
--
2.11.0
LAPIC after reset is in xAPIC mode, which poses a problem for hotplug of
VCPUs with high APIC ID, because reset VCPU is waiting for INIT/SIPI,
but there is no way to uniquely address it using xAPIC.
>From many possible options, we chose the one that also works on real
hardware: accepting interrupts addressed to LAPIC's x2APIC ID even in
xAPIC mode.
KVM intentionally differs from real hardware, because real hardware
(Knights Landing) does just "x2apic_id & 0xff" to decide whether to
accept the interrupt in xAPIC mode and it can deliver one interrupt to
more than one physical destination, e.g. 0x123 to 0x123 and 0x23.
Fixes: 682f732ecf73 ("KVM: x86: bump MAX_VCPUS to 288")
Signed-off-by: Radim Krčmář <[email protected]>
---
v3:
* move code that populated physical_map from patch 2 [David]
* extend a comment kvm_apic_match_physical_addr()
v2:
* Do not add capability [David, Paolo]
* Tag it as a fix [David]
---
arch/x86/kvm/lapic.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 519b72bbf947..c6758bdc2c24 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -189,15 +189,26 @@ static void recalculate_apic_map(struct kvm *kvm)
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_lapic **cluster;
u16 mask;
- u32 ldr, aid;
+ u32 ldr;
+ u8 xapic_id;
+ u32 x2apic_id;
if (!kvm_apic_present(vcpu))
continue;
- aid = apic_x2apic_mode(apic) ? kvm_x2apic_id(apic)
- : kvm_xapic_id(apic);
- if (aid <= new->max_apic_id)
- new->phys_map[aid] = apic;
+ xapic_id = kvm_xapic_id(apic);
+ x2apic_id = kvm_x2apic_id(apic);
+
+ /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
+ if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
+ x2apic_id <= new->max_apic_id)
+ new->phys_map[x2apic_id] = apic;
+ /*
+ * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
+ * prevent them from masking VCPUs with APIC ID <= 0xff.
+ */
+ if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
+ new->phys_map[xapic_id] = apic;
ldr = kvm_lapic_get_reg(apic, APIC_LDR);
@@ -606,6 +617,15 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
if (apic_x2apic_mode(apic))
return mda == kvm_x2apic_id(apic);
+ /*
+ * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
+ * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and
+ * this allows unique addressing of VCPUs with APIC ID over 0xff.
+ * The 0xff condition is needed because writeable xAPIC ID.
+ */
+ if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
+ return true;
+
return mda == kvm_xapic_id(apic);
}
--
2.11.0
Slow path tried to prevent IPIs from x2APIC VCPUs from being delivered
to xAPIC VCPUs and vice-versa. Make slow path behave like fast path,
which never distinguished that.
Signed-off-by: Radim Krčmář <[email protected]>
---
arch/x86/kvm/lapic.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 81cc93580c40..519b72bbf947 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -606,7 +606,7 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
if (apic_x2apic_mode(apic))
return mda == kvm_x2apic_id(apic);
- return mda == SET_APIC_DEST_FIELD(kvm_xapic_id(apic));
+ return mda == kvm_xapic_id(apic);
}
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -623,7 +623,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
&& (logical_id & mda & 0xffff) != 0;
logical_id = GET_APIC_LOGICAL_ID(logical_id);
- mda = GET_APIC_DEST_FIELD(mda);
switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
case APIC_DFR_FLAT:
@@ -640,9 +639,9 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
/* The KVM local APIC implementation has two quirks:
*
- * - the xAPIC MDA stores the destination at bits 24-31, while this
- * is not true of struct kvm_lapic_irq's dest_id field. This is
- * just a quirk in the API and is not problematic.
+ * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
+ * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
+ * KVM doesn't do that aliasing.
*
* - in-kernel IOAPIC messages have to be delivered directly to
* x2APIC, because the kernel does not support interrupt remapping.
@@ -658,13 +657,12 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
struct kvm_lapic *source, struct kvm_lapic *target)
{
bool ipi = source != NULL;
- bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
- !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+ !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
return X2APIC_BROADCAST;
- return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+ return dest_id;
}
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
--
2.11.0
Am 15.12.2016 um 18:06 schrieb Radim Krčmář:
> There were three calls sites:
> - recalculate_apic_map and kvm_apic_match_physical_addr, where it would
> only complicate implementation of x2APIC hotplug;
> - in apic_debug, where it was still somewhat preserved, but keeping the
> old function just for apic_debug was not worth it
>
> Signed-off-by: Radim Krčmář <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
--
David
Am 15.12.2016 um 18:06 schrieb Radim Krčmář:
> Slow path tried to prevent IPIs from x2APIC VCPUs from being delivered
> to xAPIC VCPUs and vice-versa. Make slow path behave like fast path,
> which never distinguished that.
>
> Signed-off-by: Radim Krčmář <[email protected]>
Just to verify:
The GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST is still correct in
kvm_apic_broadcast() ?
Apart from that thing confusing me, this looks good to me :)
--
David
Am 15.12.2016 um 18:06 schrieb Radim Krčmář:
> LAPIC after reset is in xAPIC mode, which poses a problem for hotplug of
> VCPUs with high APIC ID, because reset VCPU is waiting for INIT/SIPI,
> but there is no way to uniquely address it using xAPIC.
>
> From many possible options, we chose the one that also works on real
> hardware: accepting interrupts addressed to LAPIC's x2APIC ID even in
> xAPIC mode.
>
> KVM intentionally differs from real hardware, because real hardware
> (Knights Landing) does just "x2apic_id & 0xff" to decide whether to
> accept the interrupt in xAPIC mode and it can deliver one interrupt to
> more than one physical destination, e.g. 0x123 to 0x123 and 0x23.
>
> Fixes: 682f732ecf73 ("KVM: x86: bump MAX_VCPUS to 288")
> Signed-off-by: Radim Krčmář <[email protected]>
Reviewed-by: David Hildenbrand <[email protected]>
--
David
On 16/12/2016 17:41, David Hildenbrand wrote:
>
> Just to verify:
>
> The GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST is still correct in
> kvm_apic_broadcast() ?
No, it seems wrong.
Paolo
2016-12-16 17:53+0100, Paolo Bonzini:
> On 16/12/2016 17:41, David Hildenbrand wrote:
>>
>> Just to verify:
>>
>> The GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST is still correct in
>> kvm_apic_broadcast() ?
>
> No, it seems wrong.
Definitely, sending v4 of this patch.
I'll also add broadcast address check into kvm-unit-tests ...
---8<---
Slow path tried to prevent IPIs from x2APIC VCPUs from being delivered
to xAPIC VCPUs and vice-versa. Make slow path behave like fast path,
which never distinguished that.
Signed-off-by: Radim Krčmář <[email protected]>
---
v4: fix xAPIC broadcast address detection [David]
---
arch/x86/kvm/lapic.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 81cc93580c40..dba19cb82c3e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -592,10 +592,8 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
{
- if (apic_x2apic_mode(apic))
- return mda == X2APIC_BROADCAST;
-
- return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
+ return mda == (apic_x2apic_mode(apic) ?
+ X2APIC_BROADCAST : APIC_BROADCAST);
}
static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
@@ -606,7 +604,7 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
if (apic_x2apic_mode(apic))
return mda == kvm_x2apic_id(apic);
- return mda == SET_APIC_DEST_FIELD(kvm_xapic_id(apic));
+ return mda == kvm_xapic_id(apic);
}
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -623,7 +621,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
&& (logical_id & mda & 0xffff) != 0;
logical_id = GET_APIC_LOGICAL_ID(logical_id);
- mda = GET_APIC_DEST_FIELD(mda);
switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
case APIC_DFR_FLAT:
@@ -640,9 +637,9 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
/* The KVM local APIC implementation has two quirks:
*
- * - the xAPIC MDA stores the destination at bits 24-31, while this
- * is not true of struct kvm_lapic_irq's dest_id field. This is
- * just a quirk in the API and is not problematic.
+ * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
+ * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
+ * KVM doesn't do that aliasing.
*
* - in-kernel IOAPIC messages have to be delivered directly to
* x2APIC, because the kernel does not support interrupt remapping.
@@ -658,13 +655,12 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
struct kvm_lapic *source, struct kvm_lapic *target)
{
bool ipi = source != NULL;
- bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
- !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+ !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
return X2APIC_BROADCAST;
- return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+ return dest_id;
}
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
--
2.11.0
Am 16.12.2016 um 18:07 schrieb Radim Krčmář:
> 2016-12-16 17:53+0100, Paolo Bonzini:
>> On 16/12/2016 17:41, David Hildenbrand wrote:
>>>
>>> Just to verify:
>>>
>>> The GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST is still correct in
>>> kvm_apic_broadcast() ?
>>
>> No, it seems wrong.
>
> Definitely, sending v4 of this patch.
> I'll also add broadcast address check into kvm-unit-tests ...
> ---8<---
> Slow path tried to prevent IPIs from x2APIC VCPUs from being delivered
> to xAPIC VCPUs and vice-versa. Make slow path behave like fast path,
> which never distinguished that.
>
> Signed-off-by: Radim Krčmář <[email protected]>
> ---
> v4: fix xAPIC broadcast address detection [David]
> ---
> arch/x86/kvm/lapic.c | 20 ++++++++------------
> 1 file changed, 8 insertions(+), 12 deletions(-)
>
Looks good to me.
Reviewed-by: David Hildenbrand <[email protected]>
--
David