2008-08-04 18:33:01

by Dave Hansen

[permalink] [raw]
Subject: [PATCH 1/4] reduce kvm stack usage in kvm_arch_vm_ioctl()

On my machine with gcc 3.4, kvm uses ~2k of stack in a few
select functions. This is mostly because gcc fails to
notice that the different case: statements could have their
stack usage combined. It overflows very nicely if interrupts
happen during one of these large uses.

This patch uses two methods for reducing stack usage.
1. dynamically allocate large objects instead of putting
on the stack.
2. Use a union{} member for all of the case variables. This
tricks gcc into combining them all into a single stack
allocation.
---
arch/x86/kvm/x86.c | 116 ++++++++++++++++++++++++++++++++--------------------
1 files changed, 72 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0d682fc..9d77da1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1624,12 +1624,73 @@ out:
return r;
}

+static inline int kvm_arch_vm_irqchip_ioctl(struct kvm *kvm, void *argp,
+ unsigned int ioctl)
+{
+ int ret = 0;
+ struct kvm_irqchip *chip = kmalloc(sizeof(struct kvm_irqchip), GFP_KERNEL);
+
+ if (!chip)
+ return -ENOMEM;
+
+ /* cheaper than the copy, so do this first */
+ if (!irqchip_in_kernel(kvm)) {
+ ret = -ENXIO;
+ goto out;
+ }
+ if (copy_from_user(chip, argp, sizeof(struct kvm_irqchip))) {
+ ret = -EFAULT;
+ goto out;
+ }
+ switch (ioctl) {
+ case KVM_GET_IRQCHIP:
+ ret = kvm_vm_ioctl_get_irqchip(kvm, chip);
+ if (ret)
+ goto out;
+ ret = copy_to_user(argp, chip, sizeof(struct kvm_irqchip));
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
+ case KVM_SET_IRQCHIP:
+ ret = kvm_vm_ioctl_set_irqchip(kvm, chip);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+out:
+ kfree(chip);
+ return ret;
+}
+
+
+static inline int x86_kvm_vm_ioctl_set_memory_region(struct kvm *kvm, void *argp)
+{
+ struct kvm_memory_region kvm_mem;
+ struct kvm_userspace_memory_region kvm_userspace_mem;
+
+ if (copy_from_user(&kvm_mem, argp, sizeof(struct kvm_memory_region)))
+ return -EFAULT;
+ kvm_userspace_mem.slot = kvm_mem.slot;
+ kvm_userspace_mem.flags = kvm_mem.flags;
+ kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
+ kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+ return kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm = filp->private_data;
void __user *argp = (void __user *)arg;
int r = -EINVAL;
+ union {
+ /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+ struct kvm_pit_state ps;
+ struct kvm_memory_alias alias;
+ } u;

switch (ioctl) {
case KVM_SET_TSS_ADDR:
@@ -1661,17 +1722,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
case KVM_GET_NR_MMU_PAGES:
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
break;
- case KVM_SET_MEMORY_ALIAS: {
- struct kvm_memory_alias alias;
-
+ case KVM_SET_MEMORY_ALIAS:
r = -EFAULT;
- if (copy_from_user(&alias, argp, sizeof alias))
+ if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
goto out;
- r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
+ r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
if (r)
goto out;
break;
- }
case KVM_CREATE_IRQCHIP:
r = -ENOMEM;
kvm->arch.vpic = kvm_create_pic(kvm);
@@ -1711,67 +1769,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
break;
}
- case KVM_GET_IRQCHIP: {
- /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
- struct kvm_irqchip chip;
-
- r = -EFAULT;
- if (copy_from_user(&chip, argp, sizeof chip))
- goto out;
- r = -ENXIO;
- if (!irqchip_in_kernel(kvm))
- goto out;
- r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
- if (r)
- goto out;
- r = -EFAULT;
- if (copy_to_user(argp, &chip, sizeof chip))
- goto out;
- r = 0;
- break;
- }
- case KVM_SET_IRQCHIP: {
- /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
- struct kvm_irqchip chip;
-
- r = -EFAULT;
- if (copy_from_user(&chip, argp, sizeof chip))
- goto out;
- r = -ENXIO;
- if (!irqchip_in_kernel(kvm))
- goto out;
- r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+ case KVM_GET_IRQCHIP:
+ case KVM_SET_IRQCHIP:
+ r = kvm_arch_vm_irqchip_ioctl(kvm, argp, ioctl);
if (r)
goto out;
r = 0;
break;
- }
case KVM_GET_PIT: {
- struct kvm_pit_state ps;
r = -EFAULT;
- if (copy_from_user(&ps, argp, sizeof ps))
+ if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
goto out;
r = -ENXIO;
if (!kvm->arch.vpit)
goto out;
- r = kvm_vm_ioctl_get_pit(kvm, &ps);
+ r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &ps, sizeof ps))
+ if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
goto out;
r = 0;
break;
}
case KVM_SET_PIT: {
- struct kvm_pit_state ps;
r = -EFAULT;
- if (copy_from_user(&ps, argp, sizeof ps))
+ if (copy_from_user(&u.ps, argp, sizeof u.ps))
goto out;
r = -ENXIO;
if (!kvm->arch.vpit)
goto out;
- r = kvm_vm_ioctl_set_pit(kvm, &ps);
+ r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
if (r)
goto out;
r = 0;
--
1.5.4.3


2008-08-04 18:32:13

by Dave Hansen

[permalink] [raw]
Subject: [PATCH 2/4] reduce stack usage in kvm_vcpu_ioctl()

Same as the last one, but this time we use kmalloc()
for all of the uses.

Note that the kfree()s take advantage of the fact that
kfree() is OK on NULL.

Signed-off-by: Dave Hansen <[email protected]>
---
virt/kvm/kvm_main.c | 48 ++++++++++++++++++++++++++++++------------------
1 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7dd9b0b..70bf180 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1118,6 +1118,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int r;
+ struct kvm_fpu *fpu = NULL;
+ struct kvm_sregs *kvm_sregs = NULL;
+

if (vcpu->kvm->mm != current->mm)
return -EIO;
@@ -1165,25 +1168,29 @@ out_free2:
break;
}
case KVM_GET_SREGS: {
- struct kvm_sregs kvm_sregs;
-
- memset(&kvm_sregs, 0, sizeof kvm_sregs);
- r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
+ kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!kvm_sregs)
+ goto out;
+ memset(kvm_sregs, 0, sizeof(struct kvm_sregs));
+ r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &kvm_sregs, sizeof kvm_sregs))
+ if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs)))
goto out;
r = 0;
break;
}
case KVM_SET_SREGS: {
- struct kvm_sregs kvm_sregs;
-
+ kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!kvm_sregs)
+ goto out;
r = -EFAULT;
- if (copy_from_user(&kvm_sregs, argp, sizeof kvm_sregs))
+ if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
goto out;
- r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, &kvm_sregs);
+ r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
if (r)
goto out;
r = 0;
@@ -1264,25 +1271,28 @@ out_free2:
break;
}
case KVM_GET_FPU: {
- struct kvm_fpu fpu;
-
- memset(&fpu, 0, sizeof fpu);
- r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, &fpu);
+ fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!fpu)
+ goto out;
+ r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
if (r)
goto out;
r = -EFAULT;
- if (copy_to_user(argp, &fpu, sizeof fpu))
+ if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu)))
goto out;
r = 0;
break;
}
case KVM_SET_FPU: {
- struct kvm_fpu fpu;
-
+ fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!fpu)
+ goto out;
r = -EFAULT;
- if (copy_from_user(&fpu, argp, sizeof fpu))
+ if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
goto out;
- r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, &fpu);
+ r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
if (r)
goto out;
r = 0;
@@ -1292,6 +1302,8 @@ out_free2:
r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
}
out:
+ kfree(fpu);
+ kfree(kvm_sregs);
return r;
}

--
1.5.4.3

2008-08-04 18:32:36

by Dave Hansen

[permalink] [raw]
Subject: [PATCH 3/4] reduce stack usage in kvm_arch_vcpu_ioctl()

This time it is kvm_arch_vcpu_ioctl(). Use dynamic
allocations to reduce its stack usage.

Signed-off-by: Dave Hansen <[email protected]>
---
arch/x86/kvm/x86.c | 20 +++++++++++++-------
1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d77da1..3bbd123 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1303,13 +1303,16 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
int r;
+ struct kvm_lapic_state *lapic = NULL;

switch (ioctl) {
case KVM_GET_LAPIC: {
- struct kvm_lapic_state lapic;
+ lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);

- memset(&lapic, 0, sizeof lapic);
- r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
+ r = -ENOMEM;
+ if (!lapic)
+ goto out;
+ r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
if (r)
goto out;
r = -EFAULT;
@@ -1319,12 +1322,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
}
case KVM_SET_LAPIC: {
- struct kvm_lapic_state lapic;
-
+ lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
+ r = -ENOMEM;
+ if (!lapic)
+ goto out;
r = -EFAULT;
- if (copy_from_user(&lapic, argp, sizeof lapic))
+ if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
goto out;
- r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
+ r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
if (r)
goto out;
r = 0;
@@ -1422,6 +1427,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
}
out:
+ kfree(lapic);
return r;
}

--
1.5.4.3

2008-08-04 18:33:33

by Dave Hansen

[permalink] [raw]
Subject: [PATCH 4/4] kvm: reduce stack usage in kvm_pv_mmu_op()

This time, we're in a hot path. We can't use kmalloc() because
it might impact performance. So, we just stick the buffer that
we need into the kvm_vcpu_arch structure. This is used very
often, so it is not really a waste.

We also have to move the buffer structure's definition to the
arch-specific x86 kvm header.

Signed-off-by: Dave Hansen <[email protected]>
---
arch/x86/kvm/mmu.c | 23 ++++++++---------------
include/asm-x86/kvm_host.h | 10 ++++++++++
2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0bfe2bd..c2ce32c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -135,13 +135,6 @@ module_param(dbg, bool, 0644);
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)

-struct kvm_pv_mmu_op_buffer {
- void *ptr;
- unsigned len;
- unsigned processed;
- char buf[512] __aligned(sizeof(long));
-};
-
struct kvm_rmap_desc {
u64 *shadow_ptes[RMAP_EXT];
struct kvm_rmap_desc *more;
@@ -2287,18 +2280,18 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret)
{
int r;
- struct kvm_pv_mmu_op_buffer buffer;
+ struct kvm_pv_mmu_op_buffer *buffer = &vcpu->arch.mmu_op_buffer;

- buffer.ptr = buffer.buf;
- buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
- buffer.processed = 0;
+ buffer->ptr = buffer->buf;
+ buffer->len = min_t(unsigned long, bytes, sizeof buffer->buf);
+ buffer->processed = 0;

- r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
+ r = kvm_read_guest(vcpu->kvm, addr, buffer->buf, buffer->len);
if (r)
goto out;

- while (buffer.len) {
- r = kvm_pv_mmu_op_one(vcpu, &buffer);
+ while (buffer->len) {
+ r = kvm_pv_mmu_op_one(vcpu, buffer);
if (r < 0)
goto out;
if (r == 0)
@@ -2307,7 +2300,7 @@ int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,

r = 1;
out:
- *ret = buffer.processed;
+ *ret = buffer->processed;
return r;
}

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 0f3c531..ab0b351 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -195,6 +195,13 @@ struct kvm_mmu_page {
};
};

+struct kvm_pv_mmu_op_buffer {
+ void *ptr;
+ unsigned len;
+ unsigned processed;
+ char buf[512] __aligned(sizeof(long));
+};
+
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
@@ -237,6 +244,9 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;

struct kvm_mmu mmu;
+ /* only needed in kvm_pv_mmu_op() path, but it's hot so
+ * put it here to avoid allocation */
+ struct kvm_pv_mmu_op_buffer mmu_op_buffer;

struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
--
1.5.4.3

2008-08-11 09:29:39

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 1/4] reduce kvm stack usage in kvm_arch_vm_ioctl()

(Please use [email protected], not [email protected])

Dave Hansen wrote:
> On my machine with gcc 3.4, kvm uses ~2k of stack in a few
> select functions. This is mostly because gcc fails to
> notice that the different case: statements could have their
> stack usage combined. It overflows very nicely if interrupts
> happen during one of these large uses.
>
> This patch uses two methods for reducing stack usage.
> 1. dynamically allocate large objects instead of putting
> on the stack.
> 2. Use a union{} member for all of the case variables. This
> tricks gcc into combining them all into a single stack
> allocation.
>

Missing signoff.

> ---
> arch/x86/kvm/x86.c | 116 ++++++++++++++++++++++++++++++++--------------------
> 1 files changed, 72 insertions(+), 44 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 0d682fc..9d77da1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1624,12 +1624,73 @@ out:
> return r;
> }
>
> +static inline int kvm_arch_vm_irqchip_ioctl(struct kvm *kvm, void *argp,
> + unsigned int ioctl)
> +{
> + int ret = 0;
> + struct kvm_irqchip *chip = kmalloc(sizeof(struct kvm_irqchip), GFP_KERNEL);
> +
> + if (!chip)
> + return -ENOMEM;
> +
> + /* cheaper than the copy, so do this first */
> + if (!irqchip_in_kernel(kvm)) {
> + ret = -ENXIO;
> + goto out;
> + }
> + if (copy_from_user(chip, argp, sizeof(struct kvm_irqchip))) {
> + ret = -EFAULT;
> + goto out;
> + }
> + switch (ioctl) {
> + case KVM_GET_IRQCHIP:
> + ret = kvm_vm_ioctl_get_irqchip(kvm, chip);
> + if (ret)
> + goto out;
> + ret = copy_to_user(argp, chip, sizeof(struct kvm_irqchip));
> + if (ret) {
> + ret = -EFAULT;
> + goto out;
> + }
> + break;
> + case KVM_SET_IRQCHIP:
> + ret = kvm_vm_ioctl_set_irqchip(kvm, chip);
> + break;
> + default:
> + ret = -EINVAL;
> + break;
> + }
> +out:
> + kfree(chip);
> + return ret;
> +}
> +
> +
>

Please fold this back into the parent function. It will cause a bit of
code duplication, but I'd like to keep the patch small and obvious since
it needs to be backported. Later patches can refactor the code to
reduce the duplication (these won't be backported obviously).

> +static inline int x86_kvm_vm_ioctl_set_memory_region(struct kvm *kvm, void *argp)
> +{
> + struct kvm_memory_region kvm_mem;
> + struct kvm_userspace_memory_region kvm_userspace_mem;
> +
> + if (copy_from_user(&kvm_mem, argp, sizeof(struct kvm_memory_region)))
> + return -EFAULT;
> + kvm_userspace_mem.slot = kvm_mem.slot;
> + kvm_userspace_mem.flags = kvm_mem.flags;
> + kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
> + kvm_userspace_mem.memory_size = kvm_mem.memory_size;
> + return kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
> +}
>

Ditto.




--
error compiling committee.c: too many arguments to function

2008-08-11 09:41:45

by Avi Kivity

[permalink] [raw]
Subject: Re: [PATCH 2/4] reduce stack usage in kvm_vcpu_ioctl()

Dave Hansen wrote:
> Same as the last one, but this time we use kmalloc()
> for all of the uses.
>
> Note that the kfree()s take advantage of the fact that
> kfree() is OK on NULL.
>
> Signed-off-by: Dave Hansen <[email protected]>
> ---
> virt/kvm/kvm_main.c | 48 ++++++++++++++++++++++++++++++------------------
> 1 files changed, 30 insertions(+), 18 deletions(-)
>
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 7dd9b0b..70bf180 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1118,6 +1118,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
> struct kvm_vcpu *vcpu = filp->private_data;
> void __user *argp = (void __user *)arg;
> int r;
> + struct kvm_fpu *fpu = NULL;
> + struct kvm_sregs *kvm_sregs = NULL;
> +
>

Spurious blank line.

>
> if (vcpu->kvm->mm != current->mm)
> return -EIO;
> @@ -1165,25 +1168,29 @@ out_free2:
> break;
> }
> case KVM_GET_SREGS: {
> - struct kvm_sregs kvm_sregs;
> -
> - memset(&kvm_sregs, 0, sizeof kvm_sregs);
> - r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, &kvm_sregs);
> + kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
> + r = -ENOMEM;
> + if (!kvm_sregs)
> + goto out;
> + memset(kvm_sregs, 0, sizeof(struct kvm_sregs));
>

memset unneeded after kzalloc.


btw, this is a generic problem, and could be handled generically:

struct ioctl_handler_entry {
u32 ioctl;
union {
long (*handler_parg)(void *arg);
long (*handler_larg)(long arg);
};
};

void process_ioctl(struct ioctl_handler_entry *ioctls)
{
search for correct entry;
allocate memory (get size from ioctl number);
copy from user (if _IOW);
call handler;
copy to user (if _IOR, and no error);
free memory;
}

I imagine this can be used to simplify many ioctls.

--
error compiling committee.c: too many arguments to function