While initializing emulation context structure, kvm memsets to 0 a
number of fields some of which are redundant since they get set
eventually in x86_decode_insn. Cleanup unnecessary initializations
and remove some fields.
This is on top of Paolo's RFC
KVM: x86: speedups for emulator memory accesses
https://lkml.org/lkml/2014/4/1/494
Here are the new realmode.flat numbers with improvement
wrt unpatched kernel -
639 cycles/emulated jump instruction (4.3%)
776 cycles/emulated move instruction (7.5%)
791 cycles/emulated arithmetic instruction (11%)
943 cycles/emulated memory load instruction (5.2%)
948 cycles/emulated memory store instruction (7.6%)
929 cycles/emulated memory RMW instruction (9.0%)
v1 numbers -
639 cycles/emulated jump instruction
786 cycles/emulated move instruction
802 cycles/emulated arithmetic instruction
936 cycles/emulated memory load instruction
970 cycles/emulated memory store instruction
1000 cycles/emulated memory RMW instruction
v3:
Minor changes as proposed in review
- 3/6 - cleanup typos
- 6/6 - change comment in struct x86_emulate_ctxt and add back a missing
if in decode_modrm
v2:
All thanks and credit to Paolo!
- 1/6 - no change
- 2/6 - new patch, inercept and check_perm replaced with checks for bits in ctxt->d
- 3/6 - new patch, remove if condition in decode_rm and rearrange bit operations
- 4/6 - remove else conditions from v1 and misc cleanups
- 5/6 - new patch, remove seg_override and related fields and functions
- 6/6 - new patch, remove memopp and move rip_relative to a local variable in
decode_modrm
Bandan Das (6):
KVM: emulate: move init_decode_cache to emulate.c
KVM: emulate: Remove ctxt->intercept and ctxt->check_perm checks
KVM: emulate: cleanup decode_modrm
KVM: emulate: clean up initializations in init_decode_cache
KVM: emulate: rework seg_override
KVM: emulate: remove memopp and rip_relative
arch/x86/include/asm/kvm_emulate.h | 26 +++++++-----
arch/x86/kvm/emulate.c | 85 ++++++++++++++++++--------------------
arch/x86/kvm/x86.c | 13 ------
3 files changed, 56 insertions(+), 68 deletions(-)
--
1.8.3.1
The same information can be gleaned from ctxt->d and avoids having
to zero/NULL initialize intercept and check_perm
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/emulate.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8e2b866..a6d9892 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4615,7 +4615,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
}
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -4635,13 +4635,13 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
}
/* Do instruction specific permission checks */
- if (ctxt->check_perm) {
+ if (ctxt->d & CheckPerm) {
rc = ctxt->check_perm(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
}
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -4685,7 +4685,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->guest_mode) && ctxt->intercept) {
+ if (unlikely(ctxt->guest_mode) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
--
1.8.3.1
x86_decode_insn already sets a default for seg_override,
so remove it from the zeroed area. Also replace set/get functions
with direct access to the field.
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/kvm_emulate.h | 3 +--
arch/x86/kvm/emulate.c | 41 +++++++++++++++-----------------------
2 files changed, 17 insertions(+), 27 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index d2ec45d..08a7696 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -322,12 +322,10 @@ struct x86_emulate_ctxt {
struct operand dst;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
- bool has_seg_override;
bool rip_relative;
u8 rex_prefix;
u8 lock_prefix;
u8 rep_prefix;
- u8 seg_override;
/* bitmaps of registers in _regs[] that can be read */
u32 regs_valid;
/* bitmaps of registers in _regs[] that have been written */
@@ -338,6 +336,7 @@ struct x86_emulate_ctxt {
u8 modrm_reg;
u8 modrm_rm;
u8 modrm_seg;
+ u8 seg_override;
u64 d;
unsigned long _eip;
struct operand memop;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e459c4f..2b146b7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -512,12 +512,6 @@ static u32 desc_limit_scaled(struct desc_struct *desc)
return desc->g ? (limit << 12) | 0xfff : limit;
}
-static void set_seg_override(struct x86_emulate_ctxt *ctxt, int seg)
-{
- ctxt->has_seg_override = true;
- ctxt->seg_override = seg;
-}
-
static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
{
if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
@@ -526,14 +520,6 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
return ctxt->ops->get_cached_segment_base(ctxt, seg);
}
-static unsigned seg_override(struct x86_emulate_ctxt *ctxt)
-{
- if (!ctxt->has_seg_override)
- return 0;
-
- return ctxt->seg_override;
-}
-
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
@@ -4192,7 +4178,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
op->addr.mem.ea =
register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
- op->addr.mem.seg = seg_override(ctxt);
+ op->addr.mem.seg = ctxt->seg_override;
op->val = 0;
op->count = 1;
break;
@@ -4203,7 +4189,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
register_address(ctxt,
reg_read(ctxt, VCPU_REGS_RBX) +
(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
- op->addr.mem.seg = seg_override(ctxt);
+ op->addr.mem.seg = ctxt->seg_override;
op->val = 0;
break;
case OpImmFAddr:
@@ -4250,6 +4236,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
int mode = ctxt->mode;
int def_op_bytes, def_ad_bytes, goffset, simd_prefix;
bool op_prefix = false;
+ bool has_seg_override = false;
struct opcode opcode;
ctxt->memop.type = OP_NONE;
@@ -4303,11 +4290,13 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
case 0x2e: /* CS override */
case 0x36: /* SS override */
case 0x3e: /* DS override */
- set_seg_override(ctxt, (ctxt->b >> 3) & 3);
+ has_seg_override = true;
+ ctxt->seg_override = (ctxt->b >> 3) & 3;
break;
case 0x64: /* FS override */
case 0x65: /* GS override */
- set_seg_override(ctxt, ctxt->b & 7);
+ has_seg_override = true;
+ ctxt->seg_override = ctxt->b & 7;
break;
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
@@ -4438,17 +4427,19 @@ done_prefixes:
/* ModRM and SIB bytes. */
if (ctxt->d & ModRM) {
rc = decode_modrm(ctxt, &ctxt->memop);
- if (!ctxt->has_seg_override)
- set_seg_override(ctxt, ctxt->modrm_seg);
+ if (!has_seg_override) {
+ has_seg_override = true;
+ ctxt->seg_override = ctxt->modrm_seg;
+ }
} else if (ctxt->d & MemAbs)
rc = decode_abs(ctxt, &ctxt->memop);
if (rc != X86EMUL_CONTINUE)
goto done;
- if (!ctxt->has_seg_override)
- set_seg_override(ctxt, VCPU_SREG_DS);
+ if (!has_seg_override)
+ ctxt->seg_override = VCPU_SREG_DS;
- ctxt->memop.addr.mem.seg = seg_override(ctxt);
+ ctxt->memop.addr.mem.seg = ctxt->seg_override;
if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
@@ -4550,8 +4541,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
- memset(&ctxt->has_seg_override, 0,
- (void *)&ctxt->modrm - (void *)&ctxt->has_seg_override);
+ memset(&ctxt->rip_relative, 0,
+ (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
--
1.8.3.1
Move typecast on "out of range value" of mem.ea to
decode_modrm. rip_relative is only set in decode_modrm,
change it to a local var
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/kvm_emulate.h | 8 +++++---
arch/x86/kvm/emulate.c | 20 +++++++++-----------
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 08a7696..085d688 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -322,7 +322,11 @@ struct x86_emulate_ctxt {
struct operand dst;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
- bool rip_relative;
+ /*
+ * The following five fields are cleared together,
+ * the rest are initialized unconditionally in x86_decode_insn
+ * or elsewhere
+ */
u8 rex_prefix;
u8 lock_prefix;
u8 rep_prefix;
@@ -340,9 +344,7 @@ struct x86_emulate_ctxt {
u64 d;
unsigned long _eip;
struct operand memop;
- /* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
- struct operand *memopp;
struct fetch_cache fetch;
struct read_cache io_read;
struct read_cache mem_read;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2b146b7..b9881e7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1053,6 +1053,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
int index_reg, base_reg, scale;
int rc = X86EMUL_CONTINUE;
ulong modrm_ea = 0;
+ bool rip_relative = false;
ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
@@ -1155,7 +1156,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
modrm_ea += reg_read(ctxt, index_reg) << scale;
} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
if (ctxt->mode == X86EMUL_MODE_PROT64)
- ctxt->rip_relative = 1;
+ rip_relative = true;
} else {
base_reg = ctxt->modrm_rm;
modrm_ea += reg_read(ctxt, base_reg);
@@ -1175,6 +1176,11 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
}
}
op->addr.mem.ea = modrm_ea;
+ if (ctxt->ad_bytes != 8)
+ ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
+ if (rip_relative)
+ ctxt->memop.addr.mem.ea += ctxt->_eip;
+
done:
return rc;
}
@@ -4088,7 +4094,6 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
ctxt->memop.bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
mem_common:
*op = ctxt->memop;
- ctxt->memopp = op;
if (ctxt->d & BitOp)
fetch_bit_operand(ctxt);
op->orig_val = op->val;
@@ -4240,7 +4245,6 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
struct opcode opcode;
ctxt->memop.type = OP_NONE;
- ctxt->memopp = NULL;
ctxt->_eip = ctxt->eip;
ctxt->fetch.start = ctxt->_eip;
ctxt->fetch.end = ctxt->fetch.start + insn_len;
@@ -4441,9 +4445,6 @@ done_prefixes:
ctxt->memop.addr.mem.seg = ctxt->seg_override;
- if (ctxt->memop.type == OP_MEM && ctxt->ad_bytes != 8)
- ctxt->memop.addr.mem.ea = (u32)ctxt->memop.addr.mem.ea;
-
/*
* Decode and fetch the source operand: register, memory
* or immediate.
@@ -4464,9 +4465,6 @@ done_prefixes:
rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
done:
- if (ctxt->memopp && ctxt->memopp->type == OP_MEM && ctxt->rip_relative)
- ctxt->memopp->addr.mem.ea += ctxt->_eip;
-
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
}
@@ -4541,8 +4539,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
- memset(&ctxt->rip_relative, 0,
- (void *)&ctxt->modrm - (void *)&ctxt->rip_relative);
+ memset(&ctxt->rex_prefix, 0,
+ (void *)&ctxt->modrm - (void *)&ctxt->rex_prefix);
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
--
1.8.3.1
A lot of initializations are unnecessary as they get set to
appropriate values before actually being used. Optimize
placement of fields in x86_emulate_ctxt
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/kvm_emulate.h | 20 +++++++++++---------
arch/x86/kvm/emulate.c | 7 ++-----
2 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index ad4cca8..d2ec45d 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -315,30 +315,32 @@ struct x86_emulate_ctxt {
u8 opcode_len;
u8 b;
u8 intercept;
- u8 lock_prefix;
- u8 rep_prefix;
u8 op_bytes;
u8 ad_bytes;
- u8 rex_prefix;
struct operand src;
struct operand src2;
struct operand dst;
- bool has_seg_override;
- u8 seg_override;
- u64 d;
int (*execute)(struct x86_emulate_ctxt *ctxt);
int (*check_perm)(struct x86_emulate_ctxt *ctxt);
+ bool has_seg_override;
+ bool rip_relative;
+ u8 rex_prefix;
+ u8 lock_prefix;
+ u8 rep_prefix;
+ u8 seg_override;
+ /* bitmaps of registers in _regs[] that can be read */
+ u32 regs_valid;
+ /* bitmaps of registers in _regs[] that have been written */
+ u32 regs_dirty;
/* modrm */
u8 modrm;
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
u8 modrm_seg;
- bool rip_relative;
+ u64 d;
unsigned long _eip;
struct operand memop;
- u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */
- u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */
/* Fields above regs are cleared together. */
unsigned long _regs[NR_VCPU_REGS];
struct operand *memopp;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e12ac87..e459c4f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4550,14 +4550,11 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
void init_decode_cache(struct x86_emulate_ctxt *ctxt)
{
- memset(&ctxt->opcode_len, 0,
- (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
+ memset(&ctxt->has_seg_override, 0,
+ (void *)&ctxt->modrm - (void *)&ctxt->has_seg_override);
- ctxt->fetch.start = 0;
- ctxt->fetch.end = 0;
ctxt->io_read.pos = 0;
ctxt->io_read.end = 0;
- ctxt->mem_read.pos = 0;
ctxt->mem_read.end = 0;
}
--
1.8.3.1
Core emulator functions all belong in emulator.c,
x86 should have no knowledge of emulator internals
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/include/asm/kvm_emulate.h | 1 +
arch/x86/kvm/emulate.c | 13 +++++++++++++
arch/x86/kvm/x86.c | 13 -------------
3 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index d085f73..ad4cca8 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -436,6 +436,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
#define EMULATION_OK 0
#define EMULATION_RESTART 1
#define EMULATION_INTERCEPTED 2
+void init_decode_cache(struct x86_emulate_ctxt *ctxt);
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, int idt_index, int reason,
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8ca292c..8e2b866 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4550,6 +4550,19 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *))
return X86EMUL_CONTINUE;
}
+void init_decode_cache(struct x86_emulate_ctxt *ctxt)
+{
+ memset(&ctxt->opcode_len, 0,
+ (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
+
+ ctxt->fetch.start = 0;
+ ctxt->fetch.end = 0;
+ ctxt->io_read.pos = 0;
+ ctxt->io_read.end = 0;
+ ctxt->mem_read.pos = 0;
+ ctxt->mem_read.end = 0;
+}
+
int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4fad05d..122410d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4951,19 +4951,6 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
kvm_queue_exception(vcpu, ctxt->exception.vector);
}
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
-{
- memset(&ctxt->opcode_len, 0,
- (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
-
- ctxt->fetch.start = 0;
- ctxt->fetch.end = 0;
- ctxt->io_read.pos = 0;
- ctxt->io_read.end = 0;
- ctxt->mem_read.pos = 0;
- ctxt->mem_read.end = 0;
-}
-
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
--
1.8.3.1
Remove the if conditional - that will help us avoid
an "else initialize to 0" Also, rearrange operators
for slightly better code.
Signed-off-by: Bandan Das <[email protected]>
---
arch/x86/kvm/emulate.c | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a6d9892..e12ac87 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1064,19 +1064,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
struct operand *op)
{
u8 sib;
- int index_reg = 0, base_reg = 0, scale;
+ int index_reg, base_reg, scale;
int rc = X86EMUL_CONTINUE;
ulong modrm_ea = 0;
- if (ctxt->rex_prefix) {
- ctxt->modrm_reg = (ctxt->rex_prefix & 4) << 1; /* REX.R */
- index_reg = (ctxt->rex_prefix & 2) << 2; /* REX.X */
- ctxt->modrm_rm = base_reg = (ctxt->rex_prefix & 1) << 3; /* REG.B */
- }
+ ctxt->modrm_reg = ((ctxt->rex_prefix << 1) & 8); /* REX.R */
+ index_reg = (ctxt->rex_prefix << 2) & 8; /* REX.X */
+ base_reg = (ctxt->rex_prefix << 3) & 8; /* REX.B */
- ctxt->modrm_mod |= (ctxt->modrm & 0xc0) >> 6;
+ ctxt->modrm_mod = (ctxt->modrm & 0xc0) >> 6;
ctxt->modrm_reg |= (ctxt->modrm & 0x38) >> 3;
- ctxt->modrm_rm |= (ctxt->modrm & 0x07);
+ ctxt->modrm_rm = base_reg | (ctxt->modrm & 0x07);
ctxt->modrm_seg = VCPU_SREG_DS;
if (ctxt->modrm_mod == 3) {
--
1.8.3.1
Il 16/04/2014 18:46, Bandan Das ha scritto:
> While initializing emulation context structure, kvm memsets to 0 a
> number of fields some of which are redundant since they get set
> eventually in x86_decode_insn. Cleanup unnecessary initializations
> and remove some fields.
>
> This is on top of Paolo's RFC
> KVM: x86: speedups for emulator memory accesses
> https://lkml.org/lkml/2014/4/1/494
>
> Here are the new realmode.flat numbers with improvement
> wrt unpatched kernel -
>
> 639 cycles/emulated jump instruction (4.3%)
> 776 cycles/emulated move instruction (7.5%)
> 791 cycles/emulated arithmetic instruction (11%)
> 943 cycles/emulated memory load instruction (5.2%)
> 948 cycles/emulated memory store instruction (7.6%)
> 929 cycles/emulated memory RMW instruction (9.0%)
>
> v1 numbers -
> 639 cycles/emulated jump instruction
> 786 cycles/emulated move instruction
> 802 cycles/emulated arithmetic instruction
> 936 cycles/emulated memory load instruction
> 970 cycles/emulated memory store instruction
> 1000 cycles/emulated memory RMW instruction
>
> v3:
> Minor changes as proposed in review
> - 3/6 - cleanup typos
> - 6/6 - change comment in struct x86_emulate_ctxt and add back a missing
> if in decode_modrm
>
> v2:
> All thanks and credit to Paolo!
> - 1/6 - no change
> - 2/6 - new patch, inercept and check_perm replaced with checks for bits in ctxt->d
> - 3/6 - new patch, remove if condition in decode_rm and rearrange bit operations
> - 4/6 - remove else conditions from v1 and misc cleanups
> - 5/6 - new patch, remove seg_override and related fields and functions
> - 6/6 - new patch, remove memopp and move rip_relative to a local variable in
> decode_modrm
>
> Bandan Das (6):
> KVM: emulate: move init_decode_cache to emulate.c
> KVM: emulate: Remove ctxt->intercept and ctxt->check_perm checks
> KVM: emulate: cleanup decode_modrm
> KVM: emulate: clean up initializations in init_decode_cache
> KVM: emulate: rework seg_override
> KVM: emulate: remove memopp and rip_relative
>
> arch/x86/include/asm/kvm_emulate.h | 26 +++++++-----
> arch/x86/kvm/emulate.c | 85 ++++++++++++++++++--------------------
> arch/x86/kvm/x86.c | 13 ------
> 3 files changed, 56 insertions(+), 68 deletions(-)
>
Applied to a private branch collecting all the emulator speedups. Thanks!
Paolo