Subject: [RFC v2-fix-v2 1/2] x86/sev-es: Abstract out MMIO instruction decoding

From: "Kirill A. Shutemov" <[email protected]>

For regular virtual machine, MMIO is handled by the VMM: KVM
emulates instruction that caused MMIO. But, this model doesn't
work for a secure VMs (like SEV or TDX) as VMM doesn't have
access to the guest memory and register state. VMM needs
assistance in handling MMIO: it induces exception in the guest.
Guest has to decode the instruction and handle it on its own.

Instruction decoding logic is similar between AMD SEV and TDX
code. So extract the decoding code to insn-eval.c where it can
be used by both SEV and TDX.

This code adds no functional changes. It is only build-tested
for SEV.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Joerg Roedel <[email protected]>
---
arch/x86/include/asm/insn-eval.h | 13 +++
arch/x86/kernel/sev.c | 171 ++++++++-----------------------
arch/x86/lib/insn-eval.c | 102 ++++++++++++++++++
3 files changed, 155 insertions(+), 131 deletions(-)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182ad2d6..4a4ca7e7be66 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -19,6 +19,7 @@ bool insn_has_rep_prefix(struct insn *insn);
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+void *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
int insn_fetch_from_user(struct pt_regs *regs,
@@ -28,4 +29,16 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs,
bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE], int buf_size);

+enum mmio_type {
+ MMIO_DECODE_FAILED,
+ MMIO_WRITE,
+ MMIO_WRITE_IMM,
+ MMIO_READ,
+ MMIO_READ_ZERO_EXTEND,
+ MMIO_READ_SIGN_EXTEND,
+ MMIO_MOVS,
+};
+
+enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 651b81cd648e..f7a743d122eb 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -789,22 +789,6 @@ static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
do_early_exception(ctxt->regs, trapnr);
}

-static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
-{
- long *reg_array;
- int offset;
-
- reg_array = (long *)ctxt->regs;
- offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
-
- if (offset < 0)
- return NULL;
-
- offset /= sizeof(long);
-
- return reg_array + offset;
-}
-
static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
{
long *reg_array;
@@ -852,76 +836,6 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}

-static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- struct insn *insn = &ctxt->insn;
- unsigned int bytes = 0;
- enum es_result ret;
- int sign_byte;
- long *reg_data;
-
- switch (insn->opcode.bytes[1]) {
- /* MMIO Read w/ zero-extension */
- case 0xb6:
- bytes = 1;
- fallthrough;
- case 0xb7:
- if (!bytes)
- bytes = 2;
-
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Zero extend based on operand size */
- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
- memset(reg_data, 0, insn->opnd_bytes);
-
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
-
- /* MMIO Read w/ sign-extension */
- case 0xbe:
- bytes = 1;
- fallthrough;
- case 0xbf:
- if (!bytes)
- bytes = 2;
-
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Sign extend based on operand size */
- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
- if (bytes == 1) {
- u8 *val = (u8 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x80) ? 0xff : 0x00;
- } else {
- u16 *val = (u16 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x8000) ? 0xff : 0x00;
- }
- memset(reg_data, sign_byte, insn->opnd_bytes);
-
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
-
- default:
- ret = ES_UNSUPPORTED;
- }
-
- return ret;
-}
-
/*
* The MOVS instruction has two memory operands, which raises the
* problem that it is not known whether the access to the source or the
@@ -989,83 +903,78 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
return ES_RETRY;
}

-static enum es_result vc_handle_mmio(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
+static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct insn *insn = &ctxt->insn;
unsigned int bytes = 0;
+ enum mmio_type mmio;
enum es_result ret;
+ u8 sign_byte;
long *reg_data;

- switch (insn->opcode.bytes[0]) {
- /* MMIO Write */
- case 0x88:
- bytes = 1;
- fallthrough;
- case 0x89:
- if (!bytes)
- bytes = insn->opnd_bytes;
+ mmio = insn_decode_mmio(insn, &bytes);
+ if (mmio == MMIO_DECODE_FAILED)
+ return ES_DECODE_FAILED;

- reg_data = vc_insn_get_reg(ctxt);
+ if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
if (!reg_data)
return ES_DECODE_FAILED;
+ }

+ switch (mmio) {
+ case MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
-
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
-
- case 0xc6:
- bytes = 1;
- fallthrough;
- case 0xc7:
- if (!bytes)
- bytes = insn->opnd_bytes;
-
+ case MMIO_WRITE_IMM:
memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
-
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
-
- /* MMIO Read */
- case 0x8a:
- bytes = 1;
- fallthrough;
- case 0x8b:
- if (!bytes)
- bytes = insn->opnd_bytes;
-
+ case MMIO_READ:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;

- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
/* Zero-extend for 32-bit operation */
if (bytes == 4)
*reg_data = 0;

memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
+ case MMIO_READ_ZERO_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ memset(reg_data, 0, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+ case MMIO_READ_SIGN_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;

- /* MOVS instruction */
- case 0xa4:
- bytes = 1;
- fallthrough;
- case 0xa5:
- if (!bytes)
- bytes = insn->opnd_bytes;
+ if (bytes == 1) {
+ u8 *val = (u8 *)ghcb->shared_buffer;

- ret = vc_handle_mmio_movs(ctxt, bytes);
+ sign_byte = (*val & 0x80) ? 0xff : 0x00;
+ } else {
+ u16 *val = (u16 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+ }
+
+ /* Sign extend based on operand size */
+ memset(reg_data, sign_byte, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
- /* Two-Byte Opcodes */
- case 0x0f:
- ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+ case MMIO_MOVS:
+ ret = vc_handle_mmio_movs(ctxt, bytes);
break;
default:
ret = ES_UNSUPPORTED;
+ break;
}

return ret;
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a67afd74232c..81e8fe0bdc39 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -850,6 +850,26 @@ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs)
return get_reg_offset(insn, regs, REG_TYPE_REG);
}

+/**
+ * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte
+ * @insn: Instruction containing the ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * Returns:
+ *
+ * The register indicated by the reg part of the ModRM byte.
+ * The register is obtained as an pointer within pt_regs.
+ */
+void *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs)
+{
+ int offset;
+
+ offset = insn_get_modrm_reg_off(insn, regs);
+ if (offset < 0)
+ return NULL;
+ return (void *)regs + offset;
+}
+
/**
* get_seg_base_limit() - obtain base address and limit of a segment
* @insn: Instruction. Must be valid.
@@ -1539,3 +1559,85 @@ bool insn_decode_from_regs(struct insn *insn, struct pt_regs *regs,

return true;
}
+
+/**
+ * insn_decode() - Decode a MMIO instruction
+ * @insn: Structure to store decoded instruction
+ * @bytes: Returns size of memory operand
+ *
+ * Decodes instruction that used for Memory-mapped I/O.
+ *
+ * Returns:
+ *
+ * Type of the instruction. Size of memory operand is stored in @bytes.
+ * If decode failed, MMIO_DECODE_FAILED returned.
+ */
+enum mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
+{
+ int type = MMIO_DECODE_FAILED;
+
+ *bytes = 0;
+
+ insn_get_opcode(insn);
+ switch (insn->opcode.bytes[0]) {
+ case 0x88: /* MOV m8,r8 */
+ *bytes = 1;
+ fallthrough;
+ case 0x89: /* MOV m16/m32/m64, r16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_WRITE;
+ break;
+
+ case 0xc6: /* MOV m8, imm8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xc7: /* MOV m16/m32/m64, imm16/imm32/imm64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_WRITE_IMM;
+ break;
+
+ case 0x8a: /* MOV r8, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0x8b: /* MOV r16/r32/r64, m16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_READ;
+ break;
+
+ case 0xa4: /* MOVS m8, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xa5: /* MOVS m16/m32/m64, m16/m32/m64 */
+ if (!*bytes)
+ *bytes = insn->opnd_bytes;
+ type = MMIO_MOVS;
+ break;
+
+ case 0x0f: /* Two-byte instruction */
+ switch (insn->opcode.bytes[1]) {
+ case 0xb6: /* MOVZX r16/r32/r64, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xb7: /* MOVZX r32/r64, m16 */
+ if (!*bytes)
+ *bytes = 2;
+ type = MMIO_READ_ZERO_EXTEND;
+ break;
+
+ case 0xbe: /* MOVSX r16/r32/r64, m8 */
+ *bytes = 1;
+ fallthrough;
+ case 0xbf: /* MOVSX r32/r64, m16 */
+ if (!*bytes)
+ *bytes = 2;
+ type = MMIO_READ_SIGN_EXTEND;
+ break;
+ }
+ break;
+ }
+
+ return type;
+}
--
2.25.1


2021-06-05 22:00:16

by Dan Williams

[permalink] [raw]
Subject: Re: [RFC v2-fix-v2 1/2] x86/sev-es: Abstract out MMIO instruction decoding

On Wed, Jun 2, 2021 at 12:42 PM Kuppuswamy Sathyanarayanan
<[email protected]> wrote:
>
> From: "Kirill A. Shutemov" <[email protected]>
>

Lead in with the what because I read 2 paragraphs to figure out that
this was a prep patch.

"In preparation for sharing MMIO instruction decode between SEV-ES and
TDX factor out the common decode into a new insn_decode_mmio()
helper."

> For regular virtual machine, MMIO is handled by the VMM: KVM
> emulates instruction that caused MMIO. But, this model doesn't
> work for a secure VMs (like SEV or TDX) as VMM doesn't have
> access to the guest memory and register state. VMM needs
> assistance in handling MMIO: it induces exception in the guest.
> Guest has to decode the instruction and handle it on its own.
>
> Instruction decoding logic is similar between AMD SEV and TDX
> code. So extract the decoding code to insn-eval.c where it can
> be used by both SEV and TDX.
>
> This code adds no functional changes. It is only build-tested
> for SEV.

The diff is such that I could not verify "no functional change" change
without doing more careful analysis. Typically with non-trivial
refactoring they are split out over a few patches with a final removal
of replaced infra at the end. This does the entire conversion all at
once.

How about an approach that has vc_handle_mmio() handle
MMIO_DECODE_FAILED for missing support in the common helper until the
final patch that can do:

> + mmio = insn_decode_mmio(insn, &bytes);
> + if (mmio == MMIO_DECODE_FAILED)
> + return ES_DECODE_FAILED;

...i.e. insn_decode_mmio() is finally prepared to handle all scenarios
and vc_handle_mmio_twobyte_ops() can finally be deleted. This also
helps a future bisect that finds "whoops, 'no functional changes' was
incorrect".

>
> Signed-off-by: Kirill A. Shutemov <[email protected]>
> Cc: Tom Lendacky <[email protected]>
> Cc: Joerg Roedel <[email protected]>

Missing Sathya signed-off-by...

Subject: [RFC v2-fix-v3 0/4] x86/tdx: Handle in-kernel MMIO

This patchset addresses the review comments in the patch titled
"x86/tdx: Handle in-kernel MMIO". Since it requires
patch split, sending these together.

Changes since RFC-v2-fix-v2:
* Further divided patch "x86/sev-es: Abstract out MMIO instruction
decoding", and moved generic APIs common to both TDX/SEV into
seperate patches. Also introduced new patch for AMD code to utilize
the generic APIs.
* Fixed commit log as per review comments.

Changes since RFC v2-fix:
* Introduced "x86/sev-es: Abstract out MMIO instruction
decoding" patch for sharing common code between TDX
and SEV.
* Modified TDX MMIO code to utilize common shared functions.
* Modified commit log to reflect latest changes and to
address review comments.

Changes since RFC v2:
* Fixed commit log as per Dave's review.

Kirill A. Shutemov (4):
x86/insn-eval: Introduce insn_get_modrm_reg_ptr()
x86/insn-eval: Introduce insn_decode_mmio()
x86/sev-es: Use insn_decode_mmio() for MMIO implementation
x86/tdx: Handle in-kernel MMIO

arch/x86/include/asm/insn-eval.h | 13 +++
arch/x86/kernel/sev.c | 171 ++++++++-----------------------
arch/x86/kernel/tdx.c | 109 ++++++++++++++++++++
arch/x86/lib/insn-eval.c | 102 ++++++++++++++++++
4 files changed, 264 insertions(+), 131 deletions(-)

--
2.25.1

Subject: [RFC v2-fix-v3 3/4] x86/sev-es: Use insn_decode_mmio() for MMIO implementation

From: "Kirill A. Shutemov" <[email protected]>

Switch SEV implementation to insn_decode_mmio(). The helper is going
to be used by TDX too.

No functional changes. It is only build-tested.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Cc: Tom Lendacky <[email protected]>
Cc: Joerg Roedel <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
---
arch/x86/kernel/sev.c | 171 ++++++++++--------------------------------
1 file changed, 40 insertions(+), 131 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c6b0ee3e2345..cdb5c453e21c 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -790,22 +790,6 @@ static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
do_early_exception(ctxt->regs, trapnr);
}

-static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
-{
- long *reg_array;
- int offset;
-
- reg_array = (long *)ctxt->regs;
- offset = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
-
- if (offset < 0)
- return NULL;
-
- offset /= sizeof(long);
-
- return reg_array + offset;
-}
-
static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
{
long *reg_array;
@@ -853,76 +837,6 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}

-static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
-{
- struct insn *insn = &ctxt->insn;
- unsigned int bytes = 0;
- enum es_result ret;
- int sign_byte;
- long *reg_data;
-
- switch (insn->opcode.bytes[1]) {
- /* MMIO Read w/ zero-extension */
- case 0xb6:
- bytes = 1;
- fallthrough;
- case 0xb7:
- if (!bytes)
- bytes = 2;
-
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Zero extend based on operand size */
- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
- memset(reg_data, 0, insn->opnd_bytes);
-
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
-
- /* MMIO Read w/ sign-extension */
- case 0xbe:
- bytes = 1;
- fallthrough;
- case 0xbf:
- if (!bytes)
- bytes = 2;
-
- ret = vc_do_mmio(ghcb, ctxt, bytes, true);
- if (ret)
- break;
-
- /* Sign extend based on operand size */
- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
- if (bytes == 1) {
- u8 *val = (u8 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x80) ? 0xff : 0x00;
- } else {
- u16 *val = (u16 *)ghcb->shared_buffer;
-
- sign_byte = (*val & 0x8000) ? 0xff : 0x00;
- }
- memset(reg_data, sign_byte, insn->opnd_bytes);
-
- memcpy(reg_data, ghcb->shared_buffer, bytes);
- break;
-
- default:
- ret = ES_UNSUPPORTED;
- }
-
- return ret;
-}
-
/*
* The MOVS instruction has two memory operands, which raises the
* problem that it is not known whether the access to the source or the
@@ -990,83 +904,78 @@ static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
return ES_RETRY;
}

-static enum es_result vc_handle_mmio(struct ghcb *ghcb,
- struct es_em_ctxt *ctxt)
+static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
struct insn *insn = &ctxt->insn;
unsigned int bytes = 0;
+ enum mmio_type mmio;
enum es_result ret;
+ u8 sign_byte;
long *reg_data;

- switch (insn->opcode.bytes[0]) {
- /* MMIO Write */
- case 0x88:
- bytes = 1;
- fallthrough;
- case 0x89:
- if (!bytes)
- bytes = insn->opnd_bytes;
+ mmio = insn_decode_mmio(insn, &bytes);
+ if (mmio == MMIO_DECODE_FAILED)
+ return ES_DECODE_FAILED;

- reg_data = vc_insn_get_reg(ctxt);
+ if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
if (!reg_data)
return ES_DECODE_FAILED;
+ }

+ switch (mmio) {
+ case MMIO_WRITE:
memcpy(ghcb->shared_buffer, reg_data, bytes);
-
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
-
- case 0xc6:
- bytes = 1;
- fallthrough;
- case 0xc7:
- if (!bytes)
- bytes = insn->opnd_bytes;
-
+ case MMIO_WRITE_IMM:
memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
-
ret = vc_do_mmio(ghcb, ctxt, bytes, false);
break;
-
- /* MMIO Read */
- case 0x8a:
- bytes = 1;
- fallthrough;
- case 0x8b:
- if (!bytes)
- bytes = insn->opnd_bytes;
-
+ case MMIO_READ:
ret = vc_do_mmio(ghcb, ctxt, bytes, true);
if (ret)
break;

- reg_data = vc_insn_get_reg(ctxt);
- if (!reg_data)
- return ES_DECODE_FAILED;
-
/* Zero-extend for 32-bit operation */
if (bytes == 4)
*reg_data = 0;

memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
+ case MMIO_READ_ZERO_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;
+
+ memset(reg_data, 0, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
+ break;
+ case MMIO_READ_SIGN_EXTEND:
+ ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+ if (ret)
+ break;

- /* MOVS instruction */
- case 0xa4:
- bytes = 1;
- fallthrough;
- case 0xa5:
- if (!bytes)
- bytes = insn->opnd_bytes;
+ if (bytes == 1) {
+ u8 *val = (u8 *)ghcb->shared_buffer;

- ret = vc_handle_mmio_movs(ctxt, bytes);
+ sign_byte = (*val & 0x80) ? 0xff : 0x00;
+ } else {
+ u16 *val = (u16 *)ghcb->shared_buffer;
+
+ sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+ }
+
+ /* Sign extend based on operand size */
+ memset(reg_data, sign_byte, insn->opnd_bytes);
+ memcpy(reg_data, ghcb->shared_buffer, bytes);
break;
- /* Two-Byte Opcodes */
- case 0x0f:
- ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+ case MMIO_MOVS:
+ ret = vc_handle_mmio_movs(ctxt, bytes);
break;
default:
ret = ES_UNSUPPORTED;
+ break;
}

return ret;
--
2.25.1

Subject: [RFC v2-fix-v3 4/4] x86/tdx: Handle in-kernel MMIO

From: "Kirill A. Shutemov" <[email protected]>

In traditional VMs, MMIO is usually implemented by giving a
guest access to a mapping which will cause a VMEXIT on access
and then the VMM emulating the access. That's not possible in
TDX guest because VMEXIT will expose the register state to the
host. TDX guests don't trust the host and can't have its state
exposed to the host. In TDX the MMIO regions are instead
configured to trigger a #VE exception in the guest. The guest #VE
handler then emulates the MMIO instruction inside the guest and
converts them into a controlled TDCALL to the host, rather than
completely exposing the state to the host.

Currently, we only support MMIO for instructions that are known
to come from io.h macros (build_mmio_read/write()). For drivers
that don't use the io.h macros or uses structure overlay to do
MMIO are currently not supported in TDX guest (for example the
MMIO based XAPIC is disable at runtime for TDX).

This way of handling is similar to AMD SEV.

Also, reasons for supporting #VE based MMIO in TDX guest are,

* MMIO is widely used and we'll have more drivers in the future.
* We don't want to annotate every TDX specific MMIO readl/writel etc.
* If we didn't annotate we would need to add an alternative to every
  MMIO access in the kernel (even though 99.9% will never be used on
  TDX) which would be a complete waste and incredible binary bloat
  for nothing.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
---
arch/x86/kernel/tdx.c | 109 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)

diff --git a/arch/x86/kernel/tdx.c b/arch/x86/kernel/tdx.c
index 48a0cc2663ea..053e69782e3d 100644
--- a/arch/x86/kernel/tdx.c
+++ b/arch/x86/kernel/tdx.c
@@ -5,6 +5,9 @@

#include <asm/tdx.h>
#include <asm/vmx.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <linux/sched/signal.h> /* force_sig_fault() */

#include <linux/cpu.h>
#include <linux/protected_guest.h>
@@ -226,6 +229,104 @@ static void tdg_handle_io(struct pt_regs *regs, u32 exit_qual)
}
}

+static unsigned long tdg_mmio(int size, bool write, unsigned long addr,
+ unsigned long *val)
+{
+ struct tdx_hypercall_output out = {0};
+ u64 err;
+
+ err = __tdx_hypercall(EXIT_REASON_EPT_VIOLATION, size, write,
+ addr, *val, &out);
+ *val = out.r11;
+ return err;
+}
+
+static int tdg_handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+{
+ struct insn insn = {};
+ char buffer[MAX_INSN_SIZE];
+ enum mmio_type mmio;
+ unsigned long *reg;
+ int size, ret;
+ u8 sign_byte;
+ unsigned long val;
+
+ if (user_mode(regs)) {
+ ret = insn_fetch_from_user(regs, buffer);
+ if (!ret)
+ return -EFAULT;
+ if (!insn_decode_from_regs(&insn, regs, buffer, ret))
+ return -EFAULT;
+ } else {
+ ret = copy_from_kernel_nofault(buffer, (void *)regs->ip,
+ MAX_INSN_SIZE);
+ if (ret)
+ return -EFAULT;
+ insn_init(&insn, buffer, MAX_INSN_SIZE, 1);
+ insn_get_length(&insn);
+ }
+
+ mmio = insn_decode_mmio(&insn, &size);
+ if (mmio == MMIO_DECODE_FAILED)
+ return -EFAULT;
+
+ if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
+ reg = insn_get_modrm_reg_ptr(&insn, regs);
+ if (!reg)
+ return -EFAULT;
+ }
+
+ switch (mmio) {
+ case MMIO_WRITE:
+ memcpy(&val, reg, size);
+ ret = tdg_mmio(size, true, ve->gpa, &val);
+ break;
+ case MMIO_WRITE_IMM:
+ val = insn.immediate.value;
+ ret = tdg_mmio(size, true, ve->gpa, &val);
+ break;
+ case MMIO_READ:
+ ret = tdg_mmio(size, false, ve->gpa, &val);
+ if (ret)
+ break;
+ /* Zero-extend for 32-bit operation */
+ if (size == 4)
+ *reg = 0;
+ memcpy(reg, &val, size);
+ break;
+ case MMIO_READ_ZERO_EXTEND:
+ ret = tdg_mmio(size, false, ve->gpa, &val);
+ if (ret)
+ break;
+
+ /* Zero extend based on operand size */
+ memset(reg, 0, insn.opnd_bytes);
+ memcpy(reg, &val, size);
+ break;
+ case MMIO_READ_SIGN_EXTEND:
+ ret = tdg_mmio(size, false, ve->gpa, &val);
+ if (ret)
+ break;
+
+ if (size == 1)
+ sign_byte = (val & 0x80) ? 0xff : 0x00;
+ else
+ sign_byte = (val & 0x8000) ? 0xff : 0x00;
+
+ /* Sign extend based on operand size */
+ memset(reg, sign_byte, insn.opnd_bytes);
+ memcpy(reg, &val, size);
+ break;
+ case MMIO_MOVS:
+ case MMIO_DECODE_FAILED:
+ return -EFAULT;
+ }
+
+ if (ret)
+ return -EFAULT;
+ return insn.length;
+}
+
unsigned long tdg_get_ve_info(struct ve_info *ve)
{
u64 ret;
@@ -275,6 +376,14 @@ int tdg_handle_virtualization_exception(struct pt_regs *regs,
case EXIT_REASON_IO_INSTRUCTION:
tdg_handle_io(regs, ve->exit_qual);
break;
+ case EXIT_REASON_EPT_VIOLATION:
+ /* Currently only MMIO triggers EPT violation */
+ ve->instr_len = tdg_handle_mmio(regs, ve);
+ if (ve->instr_len < 0) {
+ pr_warn_once("MMIO failed\n");
+ return -EFAULT;
+ }
+ break;
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return -EFAULT;
--
2.25.1

Subject: [RFC v2-fix-v3 1/4] x86/insn-eval: Introduce insn_get_modrm_reg_ptr()

From: "Kirill A. Shutemov" <[email protected]>

The helper returns a pointer to the register indicated by
ModRM byte.

It's going to replace vc_insn_get_reg() in the SEV MMIO
implementation. TDX MMIO implementation will also use it.

Signed-off-by: Kirill A. Shutemov <[email protected]>
Signed-off-by: Kuppuswamy Sathyanarayanan <[email protected]>
---
arch/x86/include/asm/insn-eval.h | 1 +
arch/x86/lib/insn-eval.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)

diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 91d7182ad2d6..041f399153b9 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -19,6 +19,7 @@ bool insn_has_rep_prefix(struct insn *insn);
void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs);
int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs);
+void *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs);
unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
int insn_fetch_from_user(struct pt_regs *regs,
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a67afd74232c..9069d0703881 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -850,6 +850,26 @@ int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs)
return get_reg_offset(insn, regs, REG_TYPE_REG);
}

+/**
+ * insn_get_modrm_reg_ptr() - Obtain register pointer based on ModRM byte
+ * @insn: Instruction containing the ModRM byte
+ * @regs: Register values as seen when entering kernel mode
+ *
+ * Returns:
+ *
+ * The register indicated by the reg part of the ModRM byte.
+ * The register is obtained as a pointer within pt_regs.
+ */
+void *insn_get_modrm_reg_ptr(struct insn *insn, struct pt_regs *regs)
+{
+ int offset;
+
+ offset = insn_get_modrm_reg_off(insn, regs);
+ if (offset < 0)
+ return NULL;
+ return (void *)regs + offset;
+}
+
/**
* get_seg_base_limit() - obtain base address and limit of a segment
* @insn: Instruction. Must be valid.
--
2.25.1