2012-08-29 23:31:32

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 0/8] KVM: minor cleanups and optimizations

Hi Avi, Marcelo

this patch series implements a few micro optimizations for the x86 KVM
code base. The two major changes are constification of variables and an
optimization for the SSE emulation. The former gives the compiler more
opportunities for optimizations and ensures the r/o data is not put in
a cache line together with data that gets written to. The latter should
speed up the emulation of SSE register moves as the aligned variant of
MOVDQ has lower latencies and higher throughput than the unaligned one.

Regards,

Mathias Krause (8):
KVM: x86: minor size optimization
KVM: x86 emulator: use aligned variants of SSE register ops
KVM: x86: mark opcode tables const
KVM: x86: constify emulate_ops
KVM: x86: constify read_write_emulator_ops
KVM: x86: more constification
KVM: VMX: constify lookup tables
KVM: SVM: constify lookup tables

arch/x86/include/asm/kvm_emulate.h | 2 +-
arch/x86/kvm/cpuid.c | 12 ++--
arch/x86/kvm/emulate.c | 126 ++++++++++++++++++------------------
arch/x86/kvm/lapic.c | 2 +-
arch/x86/kvm/svm.c | 8 +--
arch/x86/kvm/vmx.c | 14 ++--
arch/x86/kvm/x86.c | 12 ++--
7 files changed, 88 insertions(+), 88 deletions(-)

--
1.7.10.4


2012-08-29 23:31:45

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 1/8] KVM: x86: minor size optimization

Some fields can be constified and/or made static to reduce code and data
size.

Numbers for a 32 bit build:

text data bss dec hex filename
before: 3351 80 0 3431 d67 cpuid.o
after: 3391 0 0 3391 d3f cpuid.o

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/cpuid.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b496da6..ec79e77 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
}
case KVM_CPUID_SIGNATURE: {
- char signature[12] = "KVMKVMKVM\0\0";
- u32 *sigptr = (u32 *)signature;
+ static const char signature[12] = "KVMKVMKVM\0\0";
+ const u32 *sigptr = (const u32 *)signature;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
@@ -484,10 +484,10 @@ struct kvm_cpuid_param {
u32 func;
u32 idx;
bool has_leaf_count;
- bool (*qualifier)(struct kvm_cpuid_param *param);
+ bool (*qualifier)(const struct kvm_cpuid_param *param);
};

-static bool is_centaur_cpu(struct kvm_cpuid_param *param)
+static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
{
return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
}
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 *cpuid_entries;
int limit, nent = 0, r = -E2BIG, i;
u32 func;
- static struct kvm_cpuid_param param[] = {
+ static const struct kvm_cpuid_param param[] = {
{ .func = 0, .has_leaf_count = true },
{ .func = 0x80000000, .has_leaf_count = true },
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,

r = 0;
for (i = 0; i < ARRAY_SIZE(param); i++) {
- struct kvm_cpuid_param *ent = &param[i];
+ const struct kvm_cpuid_param *ent = &param[i];

if (ent->qualifier && !ent->qualifier(ent))
continue;
--
1.7.10.4

2012-08-29 23:32:34

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 8/8] KVM: SVM: constify lookup tables

We never modify direct_access_msrs[], msrpm_ranges[],
svm_exit_handlers[] or x86_intercept_map[] at runtime.
Mark them r/o.

Signed-off-by: Mathias Krause <[email protected]>
Cc: Joerg Roedel <[email protected]>
---
arch/x86/kvm/svm.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31be4a5..611c728 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);

#define MSR_INVALID 0xffffffffU

-static struct svm_direct_access_msrs {
+static const struct svm_direct_access_msrs {
u32 index; /* Index of the MSR */
bool always; /* True if intercept is always on */
} direct_access_msrs[] = {
@@ -400,7 +400,7 @@ struct svm_init_data {
int r;
};

-static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
+static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};

#define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
#define MSRS_RANGE_SIZE 2048
@@ -3267,7 +3267,7 @@ static int pause_interception(struct vcpu_svm *svm)
return 1;
}

-static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
+static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR0] = cr_interception,
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
@@ -4068,7 +4068,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
#define POST_MEM(exit) { .exit_code = (exit), \
.stage = X86_ICPT_POST_MEMACCESS, }

-static struct __x86_intercept {
+static const struct __x86_intercept {
u32 exit_code;
enum x86_intercept_stage stage;
} x86_intercept_map[] = {
--
1.7.10.4

2012-08-29 23:32:32

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 7/8] KVM: VMX: constify lookup tables

We use vmcs_field_to_offset_table[], kvm_vmx_segment_fields[] and
kvm_vmx_exit_handlers[] as lookup tables only -- make them r/o.

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/vmx.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 248c2b4..d62b413 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -450,7 +450,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
#define FIELD64(number, name) [number] = VMCS12_OFFSET(name), \
[number##_HIGH] = VMCS12_OFFSET(name)+4

-static unsigned short vmcs_field_to_offset_table[] = {
+static const unsigned short vmcs_field_to_offset_table[] = {
FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
FIELD(GUEST_ES_SELECTOR, guest_es_selector),
FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
@@ -666,7 +666,7 @@ static struct vmx_capability {
.ar_bytes = GUEST_##seg##_AR_BYTES, \
}

-static struct kvm_vmx_segment_field {
+static const struct kvm_vmx_segment_field {
unsigned selector;
unsigned base;
unsigned limit;
@@ -2695,7 +2695,7 @@ static __exit void hardware_unsetup(void)

static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
struct kvm_segment tmp = *save;

if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
@@ -2764,7 +2764,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)

static void fix_rmode_seg(int seg, struct kvm_segment *save)
{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];

vmcs_write16(sf->selector, save->base >> 4);
vmcs_write32(sf->base, save->base & 0xffff0);
@@ -3202,7 +3202,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
u32 ar;

vmx_segment_cache_clear(vmx);
@@ -3572,7 +3572,7 @@ out:

static void seg_setup(int seg)
{
- struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+ const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
unsigned int ar;

vmcs_write16(sf->selector, 0);
@@ -5655,7 +5655,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
* to be done to userspace and return 0.
*/
-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
--
1.7.10.4

2012-08-29 23:32:30

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 6/8] KVM: x86: more constification

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/lapic.c | 2 +-
arch/x86/kvm/x86.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 18d149d..07ad628 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -198,7 +198,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
}

-static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
LVT_MASK | APIC_MODE_MASK, /* LVTPC */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79ac03c..d981008 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -817,7 +817,7 @@ static u32 msrs_to_save[] = {

static unsigned num_msrs_to_save;

-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs[] = {
MSR_IA32_TSCDEADLINE,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
--
1.7.10.4

2012-08-29 23:32:28

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 5/8] KVM: x86: constify read_write_emulator_ops

We never change those, make them r/o.

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/x86.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f8b0148..79ac03c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3773,14 +3773,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
return X86EMUL_CONTINUE;
}

-static struct read_write_emulator_ops read_emultor = {
+static const struct read_write_emulator_ops read_emultor = {
.read_write_prepare = read_prepare,
.read_write_emulate = read_emulate,
.read_write_mmio = vcpu_mmio_read,
.read_write_exit_mmio = read_exit_mmio,
};

-static struct read_write_emulator_ops write_emultor = {
+static const struct read_write_emulator_ops write_emultor = {
.read_write_emulate = write_emulate,
.read_write_mmio = write_mmio,
.read_write_exit_mmio = write_exit_mmio,
@@ -3791,7 +3791,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
unsigned int bytes,
struct x86_exception *exception,
struct kvm_vcpu *vcpu,
- struct read_write_emulator_ops *ops)
+ const struct read_write_emulator_ops *ops)
{
gpa_t gpa;
int handled, ret;
@@ -3840,7 +3840,7 @@ mmio:
int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
void *val, unsigned int bytes,
struct x86_exception *exception,
- struct read_write_emulator_ops *ops)
+ const struct read_write_emulator_ops *ops)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
gpa_t gpa;
--
1.7.10.4

2012-08-29 23:32:26

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 4/8] KVM: x86: constify emulate_ops

We never change emulate_ops[] at runtime so it should be r/o.

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/include/asm/kvm_emulate.h | 2 +-
arch/x86/kvm/emulate.c | 22 +++++++++++-----------
arch/x86/kvm/x86.c | 2 +-
3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 282aee5..b5bb73a 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -250,7 +250,7 @@ struct read_cache {
};

struct x86_emulate_ctxt {
- struct x86_emulate_ops *ops;
+ const struct x86_emulate_ops *ops;

/* Register state before/after emulation. */
unsigned long eflags;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fd06f9d..663e958 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1325,7 +1325,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
u16 selector, struct desc_ptr *dt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;

if (selector & 1 << 2) {
struct desc_struct desc;
@@ -1747,7 +1747,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)

static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
int rc;
struct desc_ptr dt;
gva_t cs_addr;
@@ -2129,7 +2129,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)

static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
u32 eax, ebx, ecx, edx;

/*
@@ -2173,7 +2173,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)

static int em_syscall(struct x86_emulate_ctxt *ctxt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
u64 msr_data;
u16 cs_sel, ss_sel;
@@ -2231,7 +2231,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)

static int em_sysenter(struct x86_emulate_ctxt *ctxt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
u64 msr_data;
u16 cs_sel, ss_sel;
@@ -2294,7 +2294,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)

static int em_sysexit(struct x86_emulate_ctxt *ctxt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
u64 msr_data;
int usermode;
@@ -2357,7 +2357,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
u16 port, u16 len)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct tr_seg;
u32 base3;
int r;
@@ -2476,7 +2476,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, u16 old_tss_sel,
ulong old_tss_base, struct desc_struct *new_desc)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct tss_segment_16 tss_seg;
int ret;
u32 new_tss_base = get_desc_base(new_desc);
@@ -2623,7 +2623,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, u16 old_tss_sel,
ulong old_tss_base, struct desc_struct *new_desc)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct tss_segment_32 tss_seg;
int ret;
u32 new_tss_base = get_desc_base(new_desc);
@@ -2667,7 +2667,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
bool has_error_code, u32 error_code)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct curr_tss_desc, next_tss_desc;
int ret;
u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
@@ -4339,7 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,

int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
- struct x86_emulate_ops *ops = ctxt->ops;
+ const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e00050c..f8b0148 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4323,7 +4323,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
kvm_register_write(emul_to_vcpu(ctxt), reg, val);
}

-static struct x86_emulate_ops emulate_ops = {
+static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
.read_std = kvm_read_guest_virt_system,
--
1.7.10.4

2012-08-29 23:32:22

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 3/8] KVM: x86: mark opcode tables const

The opcode tables never change at runtime, therefor mark them const.

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/emulate.c | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5a0fee1..fd06f9d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,9 +161,9 @@ struct opcode {
u64 intercept : 8;
union {
int (*execute)(struct x86_emulate_ctxt *ctxt);
- struct opcode *group;
- struct group_dual *gdual;
- struct gprefix *gprefix;
+ const struct opcode *group;
+ const struct group_dual *gdual;
+ const struct gprefix *gprefix;
} u;
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
};
@@ -3574,13 +3574,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \
I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)

-static struct opcode group7_rm1[] = {
+static const struct opcode group7_rm1[] = {
DI(SrcNone | Priv, monitor),
DI(SrcNone | Priv, mwait),
N, N, N, N, N, N,
};

-static struct opcode group7_rm3[] = {
+static const struct opcode group7_rm3[] = {
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
II(SrcNone | Prot | VendorSpecific, em_vmmcall, vmmcall),
DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
@@ -3591,13 +3591,13 @@ static struct opcode group7_rm3[] = {
DIP(SrcNone | Prot | Priv, invlpga, check_svme),
};

-static struct opcode group7_rm7[] = {
+static const struct opcode group7_rm7[] = {
N,
DIP(SrcNone, rdtscp, check_rdtsc),
N, N, N, N, N, N,
};

-static struct opcode group1[] = {
+static const struct opcode group1[] = {
I(Lock, em_add),
I(Lock | PageTable, em_or),
I(Lock, em_adc),
@@ -3608,11 +3608,11 @@ static struct opcode group1[] = {
I(0, em_cmp),
};

-static struct opcode group1A[] = {
+static const struct opcode group1A[] = {
I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
};

-static struct opcode group3[] = {
+static const struct opcode group3[] = {
I(DstMem | SrcImm, em_test),
I(DstMem | SrcImm, em_test),
I(DstMem | SrcNone | Lock, em_not),
@@ -3623,13 +3623,13 @@ static struct opcode group3[] = {
I(SrcMem, em_idiv_ex),
};

-static struct opcode group4[] = {
+static const struct opcode group4[] = {
I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
N, N, N, N, N, N,
};

-static struct opcode group5[] = {
+static const struct opcode group5[] = {
I(DstMem | SrcNone | Lock, em_grp45),
I(DstMem | SrcNone | Lock, em_grp45),
I(SrcMem | Stack, em_grp45),
@@ -3639,7 +3639,7 @@ static struct opcode group5[] = {
I(SrcMem | Stack, em_grp45), N,
};

-static struct opcode group6[] = {
+static const struct opcode group6[] = {
DI(Prot, sldt),
DI(Prot, str),
II(Prot | Priv | SrcMem16, em_lldt, lldt),
@@ -3647,7 +3647,7 @@ static struct opcode group6[] = {
N, N, N, N,
};

-static struct group_dual group7 = { {
+static const struct group_dual group7 = { {
II(Mov | DstMem | Priv, em_sgdt, sgdt),
II(Mov | DstMem | Priv, em_sidt, sidt),
II(SrcMem | Priv, em_lgdt, lgdt),
@@ -3664,7 +3664,7 @@ static struct group_dual group7 = { {
EXT(0, group7_rm7),
} };

-static struct opcode group8[] = {
+static const struct opcode group8[] = {
N, N, N, N,
I(DstMem | SrcImmByte, em_bt),
I(DstMem | SrcImmByte | Lock | PageTable, em_bts),
@@ -3672,26 +3672,26 @@ static struct opcode group8[] = {
I(DstMem | SrcImmByte | Lock | PageTable, em_btc),
};

-static struct group_dual group9 = { {
+static const struct group_dual group9 = { {
N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
}, {
N, N, N, N, N, N, N, N,
} };

-static struct opcode group11[] = {
+static const struct opcode group11[] = {
I(DstMem | SrcImm | Mov | PageTable, em_mov),
X7(D(Undefined)),
};

-static struct gprefix pfx_0f_6f_0f_7f = {
+static const struct gprefix pfx_0f_6f_0f_7f = {
I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
};

-static struct gprefix pfx_vmovntpx = {
+static const struct gprefix pfx_vmovntpx = {
I(0, em_mov), N, N, N,
};

-static struct opcode opcode_table[256] = {
+static const struct opcode opcode_table[256] = {
/* 0x00 - 0x07 */
I6ALU(Lock, em_add),
I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
@@ -3808,7 +3808,7 @@ static struct opcode opcode_table[256] = {
D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
};

-static struct opcode twobyte_table[256] = {
+static const struct opcode twobyte_table[256] = {
/* 0x00 - 0x0F */
G(0, group6), GD(0, &group7), N, N,
N, I(ImplicitOps | VendorSpecific, em_syscall),
--
1.7.10.4

2012-08-29 23:32:19

by Mathias Krause

[permalink] [raw]
Subject: [PATCH 2/8] KVM: x86 emulator: use aligned variants of SSE register ops

As the the compiler ensures that the memory operand is always aligned
to a 16 byte memory location, use the aligned variant of MOVDQ for
read_sse_reg() and write_sse_reg().

Signed-off-by: Mathias Krause <[email protected]>
---
arch/x86/kvm/emulate.c | 64 ++++++++++++++++++++++++------------------------
1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1451cff..5a0fee1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -909,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
- case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
- case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
- case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
- case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
- case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
- case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
- case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+ case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
+ case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
+ case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
+ case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
+ case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
+ case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
+ case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
+ case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
#ifdef CONFIG_X86_64
- case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
- case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
- case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
- case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
- case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
- case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
- case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
- case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+ case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
+ case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
+ case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
+ case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
+ case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
+ case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
+ case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
+ case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
#endif
default: BUG();
}
@@ -937,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
{
ctxt->ops->get_fpu(ctxt);
switch (reg) {
- case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
- case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
- case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
- case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
- case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
- case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
- case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
- case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+ case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
+ case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
+ case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
+ case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
+ case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
+ case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
+ case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
+ case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
#ifdef CONFIG_X86_64
- case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
- case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
- case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
- case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
- case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
- case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
- case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
- case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+ case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
+ case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
+ case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
+ case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
+ case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
+ case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
+ case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
+ case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
#endif
default: BUG();
}
--
1.7.10.4