2015-05-07 09:08:19

by Alex Bennée

[permalink] [raw]
Subject: [PATCH v3 09/12] KVM: arm64: guest debug, HW assisted debug support

This adds support for userspace to control the HW debug registers for
guest debug. In the debug ioctl we copy the IMPDEF defined number of
registers into a new register set called host_debug_state. There is now
a new vcpu parameter called debug_ptr which selects which register set
is to copied into the real registers when world switch occurs.

I've moved some helper functions into the hw_breakpoint.h header for
re-use.

As with single step we need to tweak the guest registers to enable the
exceptions so we need to save and restore those bits.

Two new capabilities have been added to the KVM_EXTENSION ioctl to allow
userspace to query the number of hardware break and watch points
available on the host hardware.

Signed-off-by: Alex Bennée <[email protected]>

---
v2
- switched to C setup
- replace host debug registers directly into context
- minor tweak to api docs
- setup right register for debug
- add FAR_EL2 to debug exit structure
- add support for trapping debug register access
v3
- remove stray trace statement
- fix spacing around operators (various)
- clean-up usage of trap_debug
- introduce debug_ptr, replace excessive memcpy stuff
- don't use memcpy in ioctl, just assign
- update cap ioctl documentation
- reword a number comments
- rename host_debug_state->external_debug_state

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5ef937c..419f7a8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2668,7 +2668,7 @@ The top 16 bits of the control field are architecture specific control
flags which can include the following:

- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
- - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
+ - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
@@ -2683,6 +2683,11 @@ updated to the correct (supplied) values.
The second part of the structure is architecture specific and
typically contains a set of debug registers.

+For arm64 the number of debug registers is implementation defined and
+can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
+KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which returns a +ve number
+indicating the number of supported registers.
+
When debug events exit the main run loop with the reason
KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
structure containing architecture specific debug information.
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9b3ed6d..2920185 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -279,6 +279,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);

+ /* Set the debug registers to be the guests */
+ vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
+ &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
+
return 0;
}

@@ -304,6 +308,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)

#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
KVM_GUESTDBG_USE_SW_BP | \
+ KVM_GUESTDBG_USE_HW_BP | \
KVM_GUESTDBG_SINGLESTEP)

/**
@@ -324,6 +329,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,

if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
+
+ /* Hardware assisted Break and Watch points */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ vcpu->arch.external_debug_state = dbg->arch;
+ }
+
} else {
/* If not enabled clear all flags */
vcpu->guest_debug = 0;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 52b484b..c450552 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
}
#endif

+/* Determine number of BRP registers available. */
+static inline int get_num_brps(void)
+{
+ return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
+}
+
+/* Determine number of WRP registers available. */
+static inline int get_num_wrps(void)
+{
+ return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
+}
+
extern struct pmu perf_ops_bp;

#endif /* __KERNEL__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b60fa7a..a44fb32 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -108,9 +108,18 @@ struct kvm_vcpu_arch {
/* Exception Information */
struct kvm_vcpu_fault_info fault;

- /* Debug state */
+ /* Guest debug state */
u64 debug_flags;

+ /*
+ * For debugging the guest we need to keep a set of debug
+ * registers which can override the guests own debug state
+ * while being used. These are set via the KVM_SET_GUEST_DEBUG
+ * ioctl.
+ */
+ struct kvm_guest_debug_arch *debug_ptr;
+ struct kvm_guest_debug_arch external_debug_state;
+
/* Pointer to host CPU context */
kvm_cpu_context_t *host_cpu_context;

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 04957d7..98e82ef 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -121,7 +121,7 @@ struct kvm_guest_debug_arch {

struct kvm_debug_exit_arch {
__u32 hsr;
- __u64 far;
+ __u64 far; /* used for watchpoints */
};

struct kvm_sync_regs {
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index ce7b7dd..671ab13 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -116,6 +116,7 @@ int main(void)
DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
+ DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index e7d934d..3a41bbf 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -49,18 +49,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
static int core_num_brps;
static int core_num_wrps;

-/* Determine number of BRP registers available. */
-static int get_num_brps(void)
-{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
-}
-
-/* Determine number of WRP registers available. */
-static int get_num_wrps(void)
-{
- return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
-}
-
int hw_breakpoint_slots(int type)
{
/*
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 19346e8..1ab63dd 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -99,12 +99,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
MDCR_EL2_TDRA |
MDCR_EL2_TDOSA);

- /* Trap on access to debug registers? */
- if (trap_debug)
- vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
- else
- vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
-
/* Is Guest debugging in effect? */
if (vcpu->guest_debug) {
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
@@ -128,14 +122,54 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
}

+ /*
+ * HW Break/Watch points
+ *
+ * We simply switch the debug_ptr to point to our new
+ * external_debug_state which has been populated by the
+ * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
+ * mechanism ensures the registers are updated on the
+ * world switch.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+
+ vcpu_sys_reg(vcpu, MDSCR_EL1) |=
+ (DBG_MDSCR_KDE | DBG_MDSCR_MDE);
+
+ vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
+ vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+ trap_debug = true;
+ }
+
} else {
/* Debug operations can go straight to the guest */
vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;
}
+
+ /*
+ * If the guest debug register state is dirty (the guest is
+ * actively accessing them), then we context-switch the
+ * registers in EL2. Otherwise, we trap-and-emulate all guest
+ * accesses to them.
+ */
+ if (trap_debug)
+ vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
+ else
+ vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
}

void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
{
- if (vcpu->guest_debug)
+ if (vcpu->guest_debug) {
restore_guest_debug_regs(vcpu);
+
+ /*
+ * If we were using HW debug we need to restore the
+ * debug_ptr to the guest debug state.
+ */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+ vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
+ &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
+ }
+ }
}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e9de13e..68a0759 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -103,7 +103,11 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->debug.arch.hsr = hsr;

switch (hsr >> ESR_ELx_EC_SHIFT) {
+ case ESR_ELx_EC_WATCHPT_LOW:
+ run->debug.arch.far = vcpu->arch.fault.far_el2;
+ /* fall through */
case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_BREAKPT_LOW:
case ESR_ELx_EC_BKPT32:
case ESR_ELx_EC_BRK64:
break;
@@ -132,6 +136,8 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
[ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
+ [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
[ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
};
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index dd51fb1..921d248 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -706,7 +706,8 @@ ENTRY(__kvm_vcpu_run)
bl __restore_fpsimd

skip_debug_state x3, 1f
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __restore_debug
1:
restore_guest_32bit_state
@@ -727,7 +728,8 @@ __kvm_vcpu_return:
bl __save_sysregs

skip_debug_state x3, 1f
- add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
+ ldr x3, [x0, #VCPU_DEBUG_PTR]
+ kern_hyp_va x3
bl __save_debug
1:
save_guest_32bit_state
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 0b43265..21d5a62 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -56,6 +56,12 @@ static bool cpu_has_32bit_el1(void)
return !!(pfr0 & 0x20);
}

+/**
+ * kvm_arch_dev_ioctl_check_extension
+ *
+ * We currently assume that the number of HW registers is uniform
+ * across all CPUs (see cpuinfo_sanity_check).
+ */
int kvm_arch_dev_ioctl_check_extension(long ext)
{
int r;
@@ -64,6 +70,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
case KVM_CAP_ARM_EL1_32BIT:
r = cpu_has_32bit_el1();
break;
+ case KVM_CAP_GUEST_DEBUG_HW_BPS:
+ r = get_num_brps();
+ break;
+ case KVM_CAP_GUEST_DEBUG_HW_WPS:
+ r = get_num_wrps();
+ break;
default:
r = 0;
}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3b6252e..923c2aa 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -825,6 +825,8 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_S390_INJECT_IRQ 113
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
+#define KVM_CAP_GUEST_DEBUG_HW_BPS 116
+#define KVM_CAP_GUEST_DEBUG_HW_WPS 117

#ifdef KVM_CAP_IRQ_ROUTING

--
2.3.5


2015-05-08 16:32:48

by Christoffer Dall

[permalink] [raw]
Subject: Re: [PATCH v3 09/12] KVM: arm64: guest debug, HW assisted debug support

On Thu, May 07, 2015 at 10:07:12AM +0100, Alex Benn?e wrote:
> This adds support for userspace to control the HW debug registers for
> guest debug. In the debug ioctl we copy the IMPDEF defined number of
> registers into a new register set called host_debug_state. There is now
> a new vcpu parameter called debug_ptr which selects which register set
> is to copied into the real registers when world switch occurs.
>
> I've moved some helper functions into the hw_breakpoint.h header for
> re-use.
>
> As with single step we need to tweak the guest registers to enable the
> exceptions so we need to save and restore those bits.
>
> Two new capabilities have been added to the KVM_EXTENSION ioctl to allow
> userspace to query the number of hardware break and watch points
> available on the host hardware.
>
> Signed-off-by: Alex Benn?e <[email protected]>
>
> ---
> v2
> - switched to C setup
> - replace host debug registers directly into context
> - minor tweak to api docs
> - setup right register for debug
> - add FAR_EL2 to debug exit structure
> - add support for trapping debug register access
> v3
> - remove stray trace statement
> - fix spacing around operators (various)
> - clean-up usage of trap_debug
> - introduce debug_ptr, replace excessive memcpy stuff
> - don't use memcpy in ioctl, just assign
> - update cap ioctl documentation
> - reword a number comments
> - rename host_debug_state->external_debug_state
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 5ef937c..419f7a8 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2668,7 +2668,7 @@ The top 16 bits of the control field are architecture specific control
> flags which can include the following:
>
> - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
> - - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
> + - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
> - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
> - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
> - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
> @@ -2683,6 +2683,11 @@ updated to the correct (supplied) values.
> The second part of the structure is architecture specific and
> typically contains a set of debug registers.
>
> +For arm64 the number of debug registers is implementation defined and
> +can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
> +KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which returns a +ve number

s/returns/return/
s/+ve/positive/

> +indicating the number of supported registers.
> +
> When debug events exit the main run loop with the reason
> KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
> structure containing architecture specific debug information.
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 9b3ed6d..2920185 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -279,6 +279,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> /* Set up the timer */
> kvm_timer_vcpu_init(vcpu);
>
> + /* Set the debug registers to be the guests */
> + vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
> + &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
> +

yikes, I don't like this cast, how bad is it to get rid of the debug
registers in the sys_regs array ?

Also, pretty sure this is part of the breakage for the 32-bit build...

> return 0;
> }
>
> @@ -304,6 +308,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>
> #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \
> KVM_GUESTDBG_USE_SW_BP | \
> + KVM_GUESTDBG_USE_HW_BP | \
> KVM_GUESTDBG_SINGLESTEP)
>
> /**
> @@ -324,6 +329,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>
> if (dbg->control & KVM_GUESTDBG_ENABLE) {
> vcpu->guest_debug = dbg->control;
> +
> + /* Hardware assisted Break and Watch points */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {

is this only breakpoints or breakpoints and watch points?

> + vcpu->arch.external_debug_state = dbg->arch;
> + }
> +
> } else {
> /* If not enabled clear all flags */
> vcpu->guest_debug = 0;
> diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
> index 52b484b..c450552 100644
> --- a/arch/arm64/include/asm/hw_breakpoint.h
> +++ b/arch/arm64/include/asm/hw_breakpoint.h
> @@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task)
> }
> #endif
>
> +/* Determine number of BRP registers available. */
> +static inline int get_num_brps(void)
> +{
> + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
> +}
> +
> +/* Determine number of WRP registers available. */
> +static inline int get_num_wrps(void)
> +{
> + return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
> +}
> +

I will need an ack from Catalin/Will to merge this. It may be better to
move these functions in a separate patch.

> extern struct pmu perf_ops_bp;
>
> #endif /* __KERNEL__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index b60fa7a..a44fb32 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -108,9 +108,18 @@ struct kvm_vcpu_arch {
> /* Exception Information */
> struct kvm_vcpu_fault_info fault;
>
> - /* Debug state */
> + /* Guest debug state */
> u64 debug_flags;
>
> + /*
> + * For debugging the guest we need to keep a set of debug
> + * registers which can override the guests own debug state

s/guests/guest's/

> + * while being used. These are set via the KVM_SET_GUEST_DEBUG
> + * ioctl.
> + */
> + struct kvm_guest_debug_arch *debug_ptr;
> + struct kvm_guest_debug_arch external_debug_state;
> +
> /* Pointer to host CPU context */
> kvm_cpu_context_t *host_cpu_context;
>
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index 04957d7..98e82ef 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -121,7 +121,7 @@ struct kvm_guest_debug_arch {
>
> struct kvm_debug_exit_arch {
> __u32 hsr;
> - __u64 far;
> + __u64 far; /* used for watchpoints */

seems strange to amend this now?

> };
>
> struct kvm_sync_regs {
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index ce7b7dd..671ab13 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -116,6 +116,7 @@ int main(void)
> DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2));
> DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
> DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags));
> + DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr));
> DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr));
> DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr));
> DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr));
> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
> index e7d934d..3a41bbf 100644
> --- a/arch/arm64/kernel/hw_breakpoint.c
> +++ b/arch/arm64/kernel/hw_breakpoint.c
> @@ -49,18 +49,6 @@ static DEFINE_PER_CPU(int, stepping_kernel_bp);
> static int core_num_brps;
> static int core_num_wrps;
>
> -/* Determine number of BRP registers available. */
> -static int get_num_brps(void)
> -{
> - return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
> -}
> -
> -/* Determine number of WRP registers available. */
> -static int get_num_wrps(void)
> -{
> - return ((read_cpuid(ID_AA64DFR0_EL1) >> 20) & 0xf) + 1;
> -}
> -
> int hw_breakpoint_slots(int type)
> {
> /*
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 19346e8..1ab63dd 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -99,12 +99,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
> MDCR_EL2_TDRA |
> MDCR_EL2_TDOSA);
>
> - /* Trap on access to debug registers? */
> - if (trap_debug)
> - vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
> - else
> - vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
> -
> /* Is Guest debugging in effect? */
> if (vcpu->guest_debug) {
> vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
> @@ -128,14 +122,54 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
> vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
> }
>
> + /*
> + * HW Break/Watch points
> + *
> + * We simply switch the debug_ptr to point to our new
> + * external_debug_state which has been populated by the
> + * debug ioctl. The existing KVM_ARM64_DEBUG_DIRTY
> + * mechanism ensures the registers are updated on the
> + * world switch.
> + */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> +
> + vcpu_sys_reg(vcpu, MDSCR_EL1) |=
> + (DBG_MDSCR_KDE | DBG_MDSCR_MDE);

Why do we need to set these two bits? Is it obvious or does it deserve
a comment?

> +
> + vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
> + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
> + trap_debug = true;
> + }
> +
> } else {
> /* Debug operations can go straight to the guest */
> vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;
> }
> +
> + /*
> + * If the guest debug register state is dirty (the guest is
> + * actively accessing them), then we context-switch the
> + * registers in EL2. Otherwise, we trap-and-emulate all guest
> + * accesses to them.
> + */

I think this comment now feels strange, because it was explaining why we
would set the trap_debug variable when the dirty flag was set, but the
code just sets TDA when trap_debug is set. So you should either move
this comment to the top of the function and have it above a separate
line that sets trap_debug based on KVM_ARM64_DEBUG_DIRTY (instead of
initializing at declaration), or you should explain which conditions set
trap_debug (guest is using the regs or we are debugging the guest), or
just get rid of the comment.

> + if (trap_debug)
> + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
> + else
> + vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;

still don't need the else.

> }
>
> void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
> {
> - if (vcpu->guest_debug)
> + if (vcpu->guest_debug) {
> restore_guest_debug_regs(vcpu);
> +
> + /*
> + * If we were using HW debug we need to restore the
> + * debug_ptr to the guest debug state.
> + */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> + vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
> + &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
> + }

I would find it easier to follow the code if you only configure the
debug_ptr in kvm_arm_setup_debug() because it feels like you're setting
up state here which will not be used before in a very long time (after
handle_exit, exit to userspace etc.).

> + }
> }
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index e9de13e..68a0759 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -103,7 +103,11 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
> run->debug.arch.hsr = hsr;
>
> switch (hsr >> ESR_ELx_EC_SHIFT) {
> + case ESR_ELx_EC_WATCHPT_LOW:
> + run->debug.arch.far = vcpu->arch.fault.far_el2;
> + /* fall through */
> case ESR_ELx_EC_SOFTSTP_LOW:
> + case ESR_ELx_EC_BREAKPT_LOW:
> case ESR_ELx_EC_BKPT32:
> case ESR_ELx_EC_BRK64:
> break;
> @@ -132,6 +136,8 @@ static exit_handle_fn arm_exit_handlers[] = {
> [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort,
> [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort,
> [ESR_ELx_EC_SOFTSTP_LOW]= kvm_handle_guest_debug,
> + [ESR_ELx_EC_WATCHPT_LOW]= kvm_handle_guest_debug,
> + [ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
> [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
> [ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
> };
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index dd51fb1..921d248 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -706,7 +706,8 @@ ENTRY(__kvm_vcpu_run)
> bl __restore_fpsimd
>
> skip_debug_state x3, 1f
> - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
> + ldr x3, [x0, #VCPU_DEBUG_PTR]
> + kern_hyp_va x3
> bl __restore_debug
> 1:
> restore_guest_32bit_state
> @@ -727,7 +728,8 @@ __kvm_vcpu_return:
> bl __save_sysregs
>
> skip_debug_state x3, 1f
> - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
> + ldr x3, [x0, #VCPU_DEBUG_PTR]
> + kern_hyp_va x3
> bl __save_debug
> 1:
> save_guest_32bit_state
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 0b43265..21d5a62 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -56,6 +56,12 @@ static bool cpu_has_32bit_el1(void)
> return !!(pfr0 & 0x20);
> }
>
> +/**
> + * kvm_arch_dev_ioctl_check_extension
> + *
> + * We currently assume that the number of HW registers is uniform
> + * across all CPUs (see cpuinfo_sanity_check).
> + */
> int kvm_arch_dev_ioctl_check_extension(long ext)
> {
> int r;
> @@ -64,6 +70,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext)
> case KVM_CAP_ARM_EL1_32BIT:
> r = cpu_has_32bit_el1();
> break;
> + case KVM_CAP_GUEST_DEBUG_HW_BPS:
> + r = get_num_brps();
> + break;
> + case KVM_CAP_GUEST_DEBUG_HW_WPS:
> + r = get_num_wrps();
> + break;
> default:
> r = 0;
> }
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 3b6252e..923c2aa 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -825,6 +825,8 @@ struct kvm_ppc_smmu_info {
> #define KVM_CAP_S390_INJECT_IRQ 113
> #define KVM_CAP_S390_IRQ_STATE 114
> #define KVM_CAP_PPC_HWRNG 115
> +#define KVM_CAP_GUEST_DEBUG_HW_BPS 116
> +#define KVM_CAP_GUEST_DEBUG_HW_WPS 117
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> --
> 2.3.5
>
Thanks,
-Christoffer