Tim Shearer reported that "There is a guest which is running a packet
forwarding app based on the DPDK (dpdk.org). The packet receive routine
writes to 0xc070 using glibc's "outw_p" function which does an additional
write to I/O port 0x80. It does this write for every packet that's
received, causing a flood of KVM userspace context switches". He uses
mpstat to observe a CPU performing L2 packet forwarding on a pinned
guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
disabled.
This patchset introduces per-VM I/O permission bitmaps, the userspace
can disable the ioport intercept when they are more concern the
performance than the security.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Tim Shearer <[email protected]>
Cc: Liran Alon <[email protected]>
Wanpeng Li (3):
KVM: VMX: Introduce per-VM I/O permission bitmaps
KVM: X86: Allow userspace to disable ioport intercept
KVM: VMX: Allow I/O port 0x80 bypass when userspace prefer
Documentation/virtual/kvm/api.txt | 11 +++++++++++
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++++++++++++++++---
arch/x86/kvm/x86.c | 5 +++++
include/uapi/linux/kvm.h | 1 +
5 files changed, 57 insertions(+), 3 deletions(-)
--
2.7.4
From: Wanpeng Li <[email protected]>
Tim Shearer reported that "There is a guest which is running a packet
forwarding app based on the DPDK (dpdk.org). The packet receive routine
writes to 0xc070 using glibc's "outw_p" function which does an additional
write to I/O port 80. It does this write for every packet that's received,
causing a flood of KVM userspace context switches". He uses mpstat to
observe a CPU performing L2 packet forwarding on a pinned guest vCPU,
the guest time is 95 percent when allowing I/O port 0x80 bypass, however,
it is 65.78 percent when I/O port 0x80 bypss is disabled.
This patch allows I/O port 0x80 bypass when userspace prefer.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Tim Shearer <[email protected]>
Cc: Liran Alon <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/vmx.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ebf1140..d3e5fef 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10118,6 +10118,13 @@ static int vmx_vm_init(struct kvm *kvm)
goto out;
memset(kvm_vmx->vmx_io_bitmap[i], 0xff, PAGE_SIZE);
}
+ if (kvm->arch.ioport_disable_intercept) {
+ /*
+ * Allow direct access to the PC debug port (it is often used for I/O
+ * delays, but the vmexits simply slow things down).
+ */
+ clear_bit(0x80, kvm_vmx->vmx_io_bitmap[VMX_IO_BITMAP_A]);
+ }
return 0;
out:
--
2.7.4
From: Wanpeng Li <[email protected]>
This patch introduces per-VM I/O permission bitmaps, it will be used by
later patches.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Tim Shearer <[email protected]>
Cc: Liran Alon <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
arch/x86/kvm/vmx.c | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5b49ad4..ebf1140 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -189,12 +189,19 @@ module_param(ple_window_max, uint, 0444);
extern const ulong vmx_return;
+enum {
+ VMX_IO_BITMAP_A,
+ VMX_IO_BITMAP_B,
+ VMX_IO_BITMAP_NR
+};
+
struct kvm_vmx {
struct kvm kvm;
unsigned int tss_addr;
bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;
+ unsigned long *vmx_io_bitmap[VMX_IO_BITMAP_NR];
};
#define NR_AUTOLOAD_MSRS 8
@@ -3963,7 +3970,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
#endif
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_UNCOND_IO_EXITING |
+ CPU_BASED_USE_IO_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_MWAIT_EXITING |
@@ -5940,6 +5947,12 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
#endif
int i;
+ /* I/O */
+ vmcs_write64(IO_BITMAP_A,
+ __pa(to_kvm_vmx(vmx->vcpu.kvm)->vmx_io_bitmap[VMX_IO_BITMAP_A]));
+ vmcs_write64(IO_BITMAP_B,
+ __pa(to_kvm_vmx(vmx->vcpu.kvm)->vmx_io_bitmap[VMX_IO_BITMAP_B]));
+
if (enable_shadow_vmcs) {
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
@@ -10093,9 +10106,24 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
static int vmx_vm_init(struct kvm *kvm)
{
+ int i;
+ struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
+
if (!ple_gap)
kvm->arch.pause_in_guest = true;
+
+ for (i = 0; i < VMX_IO_BITMAP_NR; i++) {
+ kvm_vmx->vmx_io_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!kvm_vmx->vmx_io_bitmap[i])
+ goto out;
+ memset(kvm_vmx->vmx_io_bitmap[i], 0xff, PAGE_SIZE);
+ }
return 0;
+
+out:
+ for (i = 0; i < VMX_IO_BITMAP_NR; i++)
+ free_page((unsigned long)kvm_vmx->vmx_io_bitmap[i]);
+ return -ENOMEM;
}
static void __init vmx_check_processor_compat(void *rtn)
@@ -11128,8 +11156,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
/*
- * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
- * for I/O port accesses.
+ * Merging of IO bitmap not currently supported.
+ * Rather, exit every time.
*/
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
--
2.7.4
From: Wanpeng Li <[email protected]>
Allow to disable ioport intercept by userspace.
Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Tim Shearer <[email protected]>
Cc: Liran Alon <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
Documentation/virtual/kvm/api.txt | 11 +++++++++++
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/x86.c | 5 +++++
include/uapi/linux/kvm.h | 1 +
4 files changed, 19 insertions(+)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 1c7958b..3d0488e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4378,6 +4378,17 @@ all such vmexits.
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
+7.14 KVM_CAP_IOPORT_DISABLE_INTERCEPT
+
+Architectures: x86
+Parameters: args[0] defines whether ioport intercept
+
+When disable intercept (args[0] == 1), some ioports which frequently
+access will not be intercepted.
+
+When enable intercept (args[0] == 0), behavior is as if this facility
+is unsupported.
+
8. Other capabilities.
----------------------
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c25775f..2f29f64 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -808,6 +808,8 @@ struct kvm_arch {
bool hlt_in_guest;
bool pause_in_guest;
+ bool ioport_disable_intercept;
+
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
raw_spinlock_t tsc_write_lock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51ecd38..044e314 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2881,6 +2881,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES:
+ case KVM_CAP_IOPORT_DISABLE_INTERCEPT:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -4250,6 +4251,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.pause_in_guest = true;
r = 0;
break;
+ case KVM_CAP_IOPORT_DISABLE_INTERCEPT:
+ kvm->arch.ioport_disable_intercept = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1065006..92730d8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -941,6 +941,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_BPB 152
#define KVM_CAP_GET_MSR_FEATURES 153
#define KVM_CAP_HYPERV_EVENTFD 154
+#define KVM_CAP_IOPORT_DISABLE_INTERCEPT 155
#ifdef KVM_CAP_IRQ_ROUTING
--
2.7.4
2018-04-17 13:45 GMT+08:00 Wanpeng Li <[email protected]>:
> Tim Shearer reported that "There is a guest which is running a packet
> forwarding app based on the DPDK (dpdk.org). The packet receive routine
> writes to 0xc070 using glibc's "outw_p" function which does an additional
> write to I/O port 0x80. It does this write for every packet that's
> received, causing a flood of KVM userspace context switches". He uses
> mpstat to observe a CPU performing L2 packet forwarding on a pinned
> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
> disabled.
>
> This patchset introduces per-VM I/O permission bitmaps, the userspace
> can disable the ioport intercept when they are more concern the
> performance than the security.
>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Cc: Tim Shearer <[email protected]>
> Cc: Liran Alon <[email protected]>
>
Hi Paolo,
Did you send the patch to glibc or the patchset still can be considered?
Regards,
Wanpeng Li
> Wanpeng Li (3):
> KVM: VMX: Introduce per-VM I/O permission bitmaps
> KVM: X86: Allow userspace to disable ioport intercept
> KVM: VMX: Allow I/O port 0x80 bypass when userspace prefer
>
> Documentation/virtual/kvm/api.txt | 11 +++++++++++
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++++++++++++++++---
> arch/x86/kvm/x86.c | 5 +++++
> include/uapi/linux/kvm.h | 1 +
> 5 files changed, 57 insertions(+), 3 deletions(-)
>
> --
> 2.7.4
>
On 08/05/2018 09:55, Wanpeng Li wrote:
> 2018-04-17 13:45 GMT+08:00 Wanpeng Li <[email protected]>:
>> Tim Shearer reported that "There is a guest which is running a packet
>> forwarding app based on the DPDK (dpdk.org). The packet receive routine
>> writes to 0xc070 using glibc's "outw_p" function which does an additional
>> write to I/O port 0x80. It does this write for every packet that's
>> received, causing a flood of KVM userspace context switches". He uses
>> mpstat to observe a CPU performing L2 packet forwarding on a pinned
>> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
>> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
>> disabled.
>>
>> This patchset introduces per-VM I/O permission bitmaps, the userspace
>> can disable the ioport intercept when they are more concern the
>> performance than the security.
>>
>> Cc: Paolo Bonzini <[email protected]>
>> Cc: Radim Krčmář <[email protected]>
>> Cc: Tim Shearer <[email protected]>
>> Cc: Liran Alon <[email protected]>
>>
>
> Hi Paolo,
>
> Did you send the patch to glibc or the patchset still can be considered?
I haven't, but I'm still not sure about the usefulness of these patches.
Paolo
>
> Regards,
> Wanpeng Li
>
>> Wanpeng Li (3):
>> KVM: VMX: Introduce per-VM I/O permission bitmaps
>> KVM: X86: Allow userspace to disable ioport intercept
>> KVM: VMX: Allow I/O port 0x80 bypass when userspace prefer
>>
>> Documentation/virtual/kvm/api.txt | 11 +++++++++++
>> arch/x86/include/asm/kvm_host.h | 2 ++
>> arch/x86/kvm/vmx.c | 41 ++++++++++++++++++++++++++++++++++++---
>> arch/x86/kvm/x86.c | 5 +++++
>> include/uapi/linux/kvm.h | 1 +
>> 5 files changed, 57 insertions(+), 3 deletions(-)
>>
>> --
>> 2.7.4
>>
> /*
> - * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
> - * for I/O port accesses.
> + * Merging of IO bitmap not currently supported.
s/not/is not/
And with that
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Thank you!
> + * Rather, exit every time.
> */
> exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
> exec_control |= CPU_BASED_UNCOND_IO_EXITING;
> --
> 2.7.4
>
On Mon, Apr 16, 2018 at 10:45:59PM -0700, Wanpeng Li wrote:
> Tim Shearer reported that "There is a guest which is running a packet
> forwarding app based on the DPDK (dpdk.org). The packet receive routine
> writes to 0xc070 using glibc's "outw_p" function which does an additional
> write to I/O port 0x80. It does this write for every packet that's
> received, causing a flood of KVM userspace context switches". He uses
> mpstat to observe a CPU performing L2 packet forwarding on a pinned
> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
> disabled.
>
> This patchset introduces per-VM I/O permission bitmaps, the userspace
> can disable the ioport intercept when they are more concern the
> performance than the security.
Could you kindly also add:
Suggested-by: Konrad Rzeszutek Wilk <[email protected]>
Thank you.
On Mon, Apr 16, 2018 at 10:46:01PM -0700, Wanpeng Li wrote:
> From: Wanpeng Li <[email protected]>
>
> Allow to disable ioport intercept by userspace.
>
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Cc: Tim Shearer <[email protected]>
> Cc: Liran Alon <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> Documentation/virtual/kvm/api.txt | 11 +++++++++++
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/x86.c | 5 +++++
> include/uapi/linux/kvm.h | 1 +
> 4 files changed, 19 insertions(+)
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 1c7958b..3d0488e 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -4378,6 +4378,17 @@ all such vmexits.
>
> Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
>
> +7.14 KVM_CAP_IOPORT_DISABLE_INTERCEPT
> +
> +Architectures: x86
> +Parameters: args[0] defines whether ioport intercept
"defines whether ioport intercept" -> "defines whether to intercept the provided ioport"
> +
> +When disable intercept (args[0] == 1), some ioports which frequently
When args[0] == 1 is one, that is disable intercept, some ioports..
> +access will not be intercepted.
Why not proide args[1] which will have the ioport in questions? Or an array
of them?
> +
> +When enable intercept (args[0] == 0), behavior is as if this facility
> +is unsupported.
> +
> 8. Other capabilities.
> ----------------------
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c25775f..2f29f64 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -808,6 +808,8 @@ struct kvm_arch {
> bool hlt_in_guest;
> bool pause_in_guest;
>
> + bool ioport_disable_intercept;
> +
> unsigned long irq_sources_bitmap;
> s64 kvmclock_offset;
> raw_spinlock_t tsc_write_lock;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 51ecd38..044e314 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2881,6 +2881,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> case KVM_CAP_SPLIT_IRQCHIP:
> case KVM_CAP_IMMEDIATE_EXIT:
> case KVM_CAP_GET_MSR_FEATURES:
> + case KVM_CAP_IOPORT_DISABLE_INTERCEPT:
> r = 1;
> break;
> case KVM_CAP_SYNC_REGS:
> @@ -4250,6 +4251,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> kvm->arch.pause_in_guest = true;
> r = 0;
> break;
> + case KVM_CAP_IOPORT_DISABLE_INTERCEPT:
> + kvm->arch.ioport_disable_intercept = cap->args[0];
> + r = 0;
> + break;
> default:
> r = -EINVAL;
> break;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 1065006..92730d8 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -941,6 +941,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_S390_BPB 152
> #define KVM_CAP_GET_MSR_FEATURES 153
> #define KVM_CAP_HYPERV_EVENTFD 154
> +#define KVM_CAP_IOPORT_DISABLE_INTERCEPT 155
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> --
> 2.7.4
>
On Fri, May 11, 2018 at 11:42:46AM -0400, Konrad Rzeszutek Wilk wrote:
> On Mon, Apr 16, 2018 at 10:46:01PM -0700, Wanpeng Li wrote:
> > From: Wanpeng Li <[email protected]>
> >
> > Allow to disable ioport intercept by userspace.
> >
> > Cc: Paolo Bonzini <[email protected]>
> > Cc: Radim Krčmář <[email protected]>
> > Cc: Tim Shearer <[email protected]>
> > Cc: Liran Alon <[email protected]>
> > Signed-off-by: Wanpeng Li <[email protected]>
> > ---
> > Documentation/virtual/kvm/api.txt | 11 +++++++++++
> > arch/x86/include/asm/kvm_host.h | 2 ++
> > arch/x86/kvm/x86.c | 5 +++++
> > include/uapi/linux/kvm.h | 1 +
> > 4 files changed, 19 insertions(+)
> >
> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> > index 1c7958b..3d0488e 100644
> > --- a/Documentation/virtual/kvm/api.txt
> > +++ b/Documentation/virtual/kvm/api.txt
> > @@ -4378,6 +4378,17 @@ all such vmexits.
> >
> > Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
> >
> > +7.14 KVM_CAP_IOPORT_DISABLE_INTERCEPT
> > +
> > +Architectures: x86
And to be exact: Intel only..
On Mon, Apr 16, 2018 at 10:46:02PM -0700, Wanpeng Li wrote:
> From: Wanpeng Li <[email protected]>
>
> Tim Shearer reported that "There is a guest which is running a packet
> forwarding app based on the DPDK (dpdk.org). The packet receive routine
> writes to 0xc070 using glibc's "outw_p" function which does an additional
> write to I/O port 80. It does this write for every packet that's received,
> causing a flood of KVM userspace context switches". He uses mpstat to
> observe a CPU performing L2 packet forwarding on a pinned guest vCPU,
> the guest time is 95 percent when allowing I/O port 0x80 bypass, however,
> it is 65.78 percent when I/O port 0x80 bypss is disabled.
>
> This patch allows I/O port 0x80 bypass when userspace prefer.
s/prefer/requests it/
>
Perhaps:
Reported-by: Tim Shearer as well?
> Cc: Paolo Bonzini <[email protected]>
> Cc: Radim Krčmář <[email protected]>
> Cc: Tim Shearer <[email protected]>
> Cc: Liran Alon <[email protected]>
> Signed-off-by: Wanpeng Li <[email protected]>
> ---
> arch/x86/kvm/vmx.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index ebf1140..d3e5fef 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10118,6 +10118,13 @@ static int vmx_vm_init(struct kvm *kvm)
> goto out;
> memset(kvm_vmx->vmx_io_bitmap[i], 0xff, PAGE_SIZE);
> }
> + if (kvm->arch.ioport_disable_intercept) {
> + /*
> + * Allow direct access to the PC debug port (it is often used for I/O
> + * delays, but the vmexits simply slow things down).
> + */
> + clear_bit(0x80, kvm_vmx->vmx_io_bitmap[VMX_IO_BITMAP_A]);
> + }
> return 0;
>
> out:
> --
> 2.7.4
>
2018-05-11 23:40 GMT+08:00 Konrad Rzeszutek Wilk <[email protected]>:
> On Mon, Apr 16, 2018 at 10:45:59PM -0700, Wanpeng Li wrote:
>> Tim Shearer reported that "There is a guest which is running a packet
>> forwarding app based on the DPDK (dpdk.org). The packet receive routine
>> writes to 0xc070 using glibc's "outw_p" function which does an additional
>> write to I/O port 0x80. It does this write for every packet that's
>> received, causing a flood of KVM userspace context switches". He uses
>> mpstat to observe a CPU performing L2 packet forwarding on a pinned
>> guest vCPU, the guest time is 95 percent when allowing I/O port 0x80
>> bypass, however, it is 65.78 percent when I/O port 0x80 bypss is
>> disabled.
>>
>> This patchset introduces per-VM I/O permission bitmaps, the userspace
>> can disable the ioport intercept when they are more concern the
>> performance than the security.
>
> Could you kindly also add:
>
> Suggested-by: Konrad Rzeszutek Wilk <[email protected]>
Yeah, both you and Liran give the original idea. :) Tim and Liran, any
review for the patchset?
Regards,
Wanpeng Li
> From: Wanpeng Li <[email protected]>
> Sent: Friday, May 11, 2018 9:03 PM
>
> Yeah, both you and Liran give the original idea. :) Tim and Liran, any
> review for the patchset?
>
> +Architectures: x86
> +Parameters: args[0] defines whether ioport intercept
Agree with Konrad that the flexibility of specifying the ioport via args[1] could be valuable, otherwise this looks good to me. Thank you!
Reviewed-by: Tim Shearer <[email protected]>
Since these pages are typically not used, can we allocate them conditionally?
On Fri, May 11, 2018 at 8:39 AM, Konrad Rzeszutek Wilk
<[email protected]> wrote:
>> /*
>> - * A vmexit (to either L1 hypervisor or L0 userspace) is always needed
>> - * for I/O port accesses.
>> + * Merging of IO bitmap not currently supported.
>
> s/not/is not/
>
> And with that
>
> Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
>
> Thank you!
>> + * Rather, exit every time.
>> */
>> exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
>> exec_control |= CPU_BASED_UNCOND_IO_EXITING;
>> --
>> 2.7.4
>>
I'd prefer to let the kvm module have the final say as to whether or
not to allow userspace to do this.
On Fri, May 11, 2018 at 8:43 AM, Konrad Rzeszutek Wilk
<[email protected]> wrote:
> On Fri, May 11, 2018 at 11:42:46AM -0400, Konrad Rzeszutek Wilk wrote:
>> On Mon, Apr 16, 2018 at 10:46:01PM -0700, Wanpeng Li wrote:
>> > From: Wanpeng Li <[email protected]>
>> >
>> > Allow to disable ioport intercept by userspace.
>> >
>> > Cc: Paolo Bonzini <[email protected]>
>> > Cc: Radim Krčmář <[email protected]>
>> > Cc: Tim Shearer <[email protected]>
>> > Cc: Liran Alon <[email protected]>
>> > Signed-off-by: Wanpeng Li <[email protected]>
>> > ---
>> > Documentation/virtual/kvm/api.txt | 11 +++++++++++
>> > arch/x86/include/asm/kvm_host.h | 2 ++
>> > arch/x86/kvm/x86.c | 5 +++++
>> > include/uapi/linux/kvm.h | 1 +
>> > 4 files changed, 19 insertions(+)
>> >
>> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>> > index 1c7958b..3d0488e 100644
>> > --- a/Documentation/virtual/kvm/api.txt
>> > +++ b/Documentation/virtual/kvm/api.txt
>> > @@ -4378,6 +4378,17 @@ all such vmexits.
>> >
>> > Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
>> >
>> > +7.14 KVM_CAP_IOPORT_DISABLE_INTERCEPT
>> > +
>> > +Architectures: x86
>
> And to be exact: Intel only..
This does seem to allow a DoS from userspace if userspace prefers it.
That doesn't seem wise.
On Fri, May 11, 2018 at 8:44 AM, Konrad Rzeszutek Wilk
<[email protected]> wrote:
> On Mon, Apr 16, 2018 at 10:46:02PM -0700, Wanpeng Li wrote:
>> From: Wanpeng Li <[email protected]>
>>
>> Tim Shearer reported that "There is a guest which is running a packet
>> forwarding app based on the DPDK (dpdk.org). The packet receive routine
>> writes to 0xc070 using glibc's "outw_p" function which does an additional
>> write to I/O port 80. It does this write for every packet that's received,
>> causing a flood of KVM userspace context switches". He uses mpstat to
>> observe a CPU performing L2 packet forwarding on a pinned guest vCPU,
>> the guest time is 95 percent when allowing I/O port 0x80 bypass, however,
>> it is 65.78 percent when I/O port 0x80 bypss is disabled.
>>
>> This patch allows I/O port 0x80 bypass when userspace prefer.
>
> s/prefer/requests it/
>>
>
> Perhaps:
>
> Reported-by: Tim Shearer as well?
>
>> Cc: Paolo Bonzini <[email protected]>
>> Cc: Radim Krčmář <[email protected]>
>> Cc: Tim Shearer <[email protected]>
>> Cc: Liran Alon <[email protected]>
>> Signed-off-by: Wanpeng Li <[email protected]>
>> ---
>> arch/x86/kvm/vmx.c | 7 +++++++
>> 1 file changed, 7 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index ebf1140..d3e5fef 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -10118,6 +10118,13 @@ static int vmx_vm_init(struct kvm *kvm)
>> goto out;
>> memset(kvm_vmx->vmx_io_bitmap[i], 0xff, PAGE_SIZE);
>> }
>> + if (kvm->arch.ioport_disable_intercept) {
>> + /*
>> + * Allow direct access to the PC debug port (it is often used for I/O
>> + * delays, but the vmexits simply slow things down).
>> + */
>> + clear_bit(0x80, kvm_vmx->vmx_io_bitmap[VMX_IO_BITMAP_A]);
>> + }
>> return 0;
>>
>> out:
>> --
>> 2.7.4
>>
2018-05-16 5:56 GMT+08:00 Jim Mattson <[email protected]>:
> I'd prefer to let the kvm module have the final say as to whether or
> not to allow userspace to do this.
I have an IRC discussion with Paolo, it seems that he didn't like the
original idea of the patchset.
Regards,
Wanpeng Li
>
> On Fri, May 11, 2018 at 8:43 AM, Konrad Rzeszutek Wilk
> <[email protected]> wrote:
>> On Fri, May 11, 2018 at 11:42:46AM -0400, Konrad Rzeszutek Wilk wrote:
>>> On Mon, Apr 16, 2018 at 10:46:01PM -0700, Wanpeng Li wrote:
>>> > From: Wanpeng Li <[email protected]>
>>> >
>>> > Allow to disable ioport intercept by userspace.
>>> >
>>> > Cc: Paolo Bonzini <[email protected]>
>>> > Cc: Radim Krčmář <[email protected]>
>>> > Cc: Tim Shearer <[email protected]>
>>> > Cc: Liran Alon <[email protected]>
>>> > Signed-off-by: Wanpeng Li <[email protected]>
>>> > ---
>>> > Documentation/virtual/kvm/api.txt | 11 +++++++++++
>>> > arch/x86/include/asm/kvm_host.h | 2 ++
>>> > arch/x86/kvm/x86.c | 5 +++++
>>> > include/uapi/linux/kvm.h | 1 +
>>> > 4 files changed, 19 insertions(+)
>>> >
>>> > diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
>>> > index 1c7958b..3d0488e 100644
>>> > --- a/Documentation/virtual/kvm/api.txt
>>> > +++ b/Documentation/virtual/kvm/api.txt
>>> > @@ -4378,6 +4378,17 @@ all such vmexits.
>>> >
>>> > Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
>>> >
>>> > +7.14 KVM_CAP_IOPORT_DISABLE_INTERCEPT
>>> > +
>>> > +Architectures: x86
>>
>> And to be exact: Intel only..