This series implements Sstc extension support which was ratified recently.
Before the Sstc extension, an SBI call is necessary to generate timer
interrupts as only M-mode have access to the timecompare registers. Thus,
there is significant latency to generate timer interrupts at kernel.
For virtualized enviornments, its even worse as the KVM handles the SBI call
and uses a software timer to emulate the timecomapre register.
Sstc extension solves both these problems by defining a stimecmp/vstimecmp
at supervisor (host/guest) level. It allows kernel to program a timer and
recieve interrupt without supervisor execution enviornment (M-mode/HS mode)
intervention.
KVM directly updates the vstimecmp as well if the guest kernel invokes the SBI
call instead of updating stimecmp directly. This is required because KVM will
enable sstc extension if the hardware supports it unless the VMM explicitly
disables it for that guest. The hardware is expected to compare the
vstimecmp at every cycle if sstc is enabled and any stale value in vstimecmp
will lead to spurious timer interrupts. This also helps maintaining the
backward compatibility with older kernels.
Similary, the M-mode firmware(OpenSBI) uses stimecmp for older kernel
without sstc support as STIP bit in mip is read only for hardware with sstc.
The PATCH 1 & 2 enables the basic infrastructure around Sstc extension while
PATCH 3 lets kernel use the Sstc extension if it is available in hardware.
PATCH 4 implements the Sstc extension in KVM.
This series has been tested on Qemu(RV32 & RV64) with additional patches in
Qemu[2]. This series can also be found at [3].
Changes from v5->v6:
1. Moved SSTC extension enum below SVPBMT.
Changes from v4->v5:
1. Added RB tag.
2. Changed the pr-format.
3. Rebased on 5.19-rc7 and kvm-queue.
4. Moved the henvcfg modification from hardware enable to vcpu_load.
Changes from v3->v4:
1. Rebased on 5.18-rc6
2. Unified vstimemp & next_cycles.
3. Addressed comments in PATCH 3 & 4.
Changes from v2->v3:
1. Dropped unrelated KVM fixes from this series.
2. Rebased on 5.18-rc3.
Changes from v1->v2:
1. Separate the static key from kvm usage
2. Makde the sstc specific static key local to the driver/clocksource
3. Moved the vstimecmp update code to the vcpu_timer
4. Used function pointers instead of static key to invoke vstimecmp vs
hrtimer at the run time. This will help in future for migration of vms
from/to sstc enabled hardware to non-sstc enabled hardware.
5. Unified the vstimer & timer to 1 timer as only one of them will be used
at runtime.
[1] https://drive.google.com/file/d/1m84Re2yK8m_vbW7TspvevCDR82MOBaSX/view
[2] https://github.com/atishp04/qemu/tree/sstc_v5
[3] https://github.com/atishp04/linux/tree/sstc_v6
Atish Patra (4):
RISC-V: Add SSTC extension CSR details
RISC-V: Enable sstc extension parsing from DT
RISC-V: Prefer sstc extension if available
RISC-V: KVM: Support sstc extension
arch/riscv/include/asm/csr.h | 5 +
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/include/asm/kvm_vcpu_timer.h | 7 ++
arch/riscv/include/uapi/asm/kvm.h | 1 +
arch/riscv/kernel/cpu.c | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
arch/riscv/kvm/vcpu.c | 8 +-
arch/riscv/kvm/vcpu_timer.c | 144 +++++++++++++++++++++++-
drivers/clocksource/timer-riscv.c | 24 +++-
9 files changed, 184 insertions(+), 8 deletions(-)
--
2.25.1
RISC-V ISA has sstc extension which allows updating the next clock event
via a CSR (stimecmp) instead of an SBI call. This should happen dynamically
if sstc extension is available. Otherwise, it will fallback to SBI call
to maintain backward compatibility.
Reviewed-by: Anup Patel <[email protected]>
Signed-off-by: Atish Patra <[email protected]>
---
drivers/clocksource/timer-riscv.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 593d5a957b69..3f100fb53d82 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -7,6 +7,9 @@
* either be read from the "time" and "timeh" CSRs, and can use the SBI to
* setup events, or directly accessed using MMIO registers.
*/
+
+#define pr_fmt(fmt) "riscv-timer: " fmt
+
#include <linux/clocksource.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
@@ -23,11 +26,24 @@
#include <asm/sbi.h>
#include <asm/timex.h>
+static DEFINE_STATIC_KEY_FALSE(riscv_sstc_available);
+
static int riscv_clock_next_event(unsigned long delta,
struct clock_event_device *ce)
{
+ u64 next_tval = get_cycles64() + delta;
+
csr_set(CSR_IE, IE_TIE);
- sbi_set_timer(get_cycles64() + delta);
+ if (static_branch_likely(&riscv_sstc_available)) {
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_STIMECMP, next_tval & 0xFFFFFFFF);
+ csr_write(CSR_STIMECMPH, next_tval >> 32);
+#else
+ csr_write(CSR_STIMECMP, next_tval);
+#endif
+ } else
+ sbi_set_timer(next_tval);
+
return 0;
}
@@ -165,6 +181,12 @@ static int __init riscv_timer_init_dt(struct device_node *n)
if (error)
pr_err("cpu hp setup state failed for RISCV timer [%d]\n",
error);
+
+ if (riscv_isa_extension_available(NULL, SSTC)) {
+ pr_info("Timer interrupt in S-mode is available via sstc extension\n");
+ static_branch_enable(&riscv_sstc_available);
+ }
+
return error;
}
--
2.25.1
This patch just introduces the required CSR fields related to the
SSTC extension.
Reviewed-by: Anup Patel <[email protected]>
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/csr.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 17516afc389a..0e571f6483d9 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -247,6 +247,9 @@
#define CSR_SIP 0x144
#define CSR_SATP 0x180
+#define CSR_STIMECMP 0x14D
+#define CSR_STIMECMPH 0x15D
+
#define CSR_VSSTATUS 0x200
#define CSR_VSIE 0x204
#define CSR_VSTVEC 0x205
@@ -256,6 +259,8 @@
#define CSR_VSTVAL 0x243
#define CSR_VSIP 0x244
#define CSR_VSATP 0x280
+#define CSR_VSTIMECMP 0x24D
+#define CSR_VSTIMECMPH 0x25D
#define CSR_HSTATUS 0x600
#define CSR_HEDELEG 0x602
--
2.25.1
The ISA extension framework now allows parsing any multi-letter
ISA extension.
Enable that for sstc extension.
Reviewed-by: Anup Patel <[email protected]>
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/hwcap.h | 1 +
arch/riscv/kernel/cpu.c | 1 +
arch/riscv/kernel/cpufeature.c | 1 +
3 files changed, 3 insertions(+)
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 4e2486881840..b186fff75198 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -53,6 +53,7 @@ extern unsigned long elf_hwcap;
enum riscv_isa_ext_id {
RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
RISCV_ISA_EXT_SVPBMT,
+ RISCV_ISA_EXT_SSTC,
RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index fba9e9f46a8c..0016d9337fe0 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -89,6 +89,7 @@ int riscv_of_parent_hartid(struct device_node *node)
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
+ __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
};
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 12b05ce164bb..034bdbd189d0 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -199,6 +199,7 @@ void __init riscv_fill_hwcap(void)
} else {
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
+ SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
}
#undef SET_ISA_EXT_MAP
}
--
2.25.1
Sstc extension allows the guest to program the vstimecmp CSR directly
instead of making an SBI call to the hypervisor to program the next
event. The timer interrupt is also directly injected to the guest by
the hardware in this case. To maintain backward compatibility, the
hypervisors also update the vstimecmp in an SBI set_time call if
the hardware supports it. Thus, the older kernels in guest also
take advantage of the sstc extension.
Reviewed-by: Anup Patel <[email protected]>
Signed-off-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/kvm_vcpu_timer.h | 7 ++
arch/riscv/include/uapi/asm/kvm.h | 1 +
arch/riscv/kvm/vcpu.c | 8 +-
arch/riscv/kvm/vcpu_timer.c | 144 +++++++++++++++++++++++-
4 files changed, 153 insertions(+), 7 deletions(-)
diff --git a/arch/riscv/include/asm/kvm_vcpu_timer.h b/arch/riscv/include/asm/kvm_vcpu_timer.h
index 50138e2eb91b..0d8fdb8ec63a 100644
--- a/arch/riscv/include/asm/kvm_vcpu_timer.h
+++ b/arch/riscv/include/asm/kvm_vcpu_timer.h
@@ -28,6 +28,11 @@ struct kvm_vcpu_timer {
u64 next_cycles;
/* Underlying hrtimer instance */
struct hrtimer hrt;
+
+ /* Flag to check if sstc is enabled or not */
+ bool sstc_enabled;
+ /* A function pointer to switch between stimecmp or hrtimer at runtime */
+ int (*timer_next_event)(struct kvm_vcpu *vcpu, u64 ncycles);
};
int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles);
@@ -40,5 +45,7 @@ int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu);
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu);
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu);
void kvm_riscv_guest_timer_init(struct kvm *kvm);
+void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu);
+bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu);
#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 24b2a6e27698..7351417afd62 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -97,6 +97,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_I,
KVM_RISCV_ISA_EXT_M,
KVM_RISCV_ISA_EXT_SVPBMT,
+ KVM_RISCV_ISA_EXT_SSTC,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 5d271b597613..d0f08d5b4282 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -52,6 +52,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
RISCV_ISA_EXT_i,
RISCV_ISA_EXT_m,
RISCV_ISA_EXT_SVPBMT,
+ RISCV_ISA_EXT_SSTC,
};
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
@@ -85,6 +86,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_C:
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
+ case KVM_RISCV_ISA_EXT_SSTC:
return false;
default:
break;
@@ -203,7 +205,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER);
+ return kvm_riscv_vcpu_timer_pending(vcpu);
}
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
@@ -785,6 +787,8 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT))
henvcfg |= ENVCFG_PBMTE;
+ if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC))
+ henvcfg |= ENVCFG_STCE;
csr_write(CSR_HENVCFG, henvcfg);
#ifdef CONFIG_32BIT
csr_write(CSR_HENVCFGH, henvcfg >> 32);
@@ -828,6 +832,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.isa);
kvm_riscv_vcpu_host_fp_restore(&vcpu->arch.host_context);
+ kvm_riscv_vcpu_timer_save(vcpu);
+
csr->vsstatus = csr_read(CSR_VSSTATUS);
csr->vsie = csr_read(CSR_VSIE);
csr->vstvec = csr_read(CSR_VSTVEC);
diff --git a/arch/riscv/kvm/vcpu_timer.c b/arch/riscv/kvm/vcpu_timer.c
index 595043857049..16f50c46ba39 100644
--- a/arch/riscv/kvm/vcpu_timer.c
+++ b/arch/riscv/kvm/vcpu_timer.c
@@ -69,7 +69,18 @@ static int kvm_riscv_vcpu_timer_cancel(struct kvm_vcpu_timer *t)
return 0;
}
-int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+static int kvm_riscv_vcpu_update_vstimecmp(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMP, ncycles & 0xFFFFFFFF);
+ csr_write(CSR_VSTIMECMPH, ncycles >> 32);
+#else
+ csr_write(CSR_VSTIMECMP, ncycles);
+#endif
+ return 0;
+}
+
+static int kvm_riscv_vcpu_update_hrtimer(struct kvm_vcpu *vcpu, u64 ncycles)
{
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
@@ -88,6 +99,65 @@ int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
return 0;
}
+int kvm_riscv_vcpu_timer_next_event(struct kvm_vcpu *vcpu, u64 ncycles)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ return t->timer_next_event(vcpu, ncycles);
+}
+
+static enum hrtimer_restart kvm_riscv_vcpu_vstimer_expired(struct hrtimer *h)
+{
+ u64 delta_ns;
+ struct kvm_vcpu_timer *t = container_of(h, struct kvm_vcpu_timer, hrt);
+ struct kvm_vcpu *vcpu = container_of(t, struct kvm_vcpu, arch.timer);
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (kvm_riscv_current_cycles(gt) < t->next_cycles) {
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ hrtimer_forward_now(&t->hrt, ktime_set(0, delta_ns));
+ return HRTIMER_RESTART;
+ }
+
+ t->next_set = false;
+ kvm_vcpu_kick(vcpu);
+
+ return HRTIMER_NORESTART;
+}
+
+bool kvm_riscv_vcpu_timer_pending(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+
+ if (!kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t) ||
+ kvm_riscv_vcpu_has_interrupts(vcpu, 1UL << IRQ_VS_TIMER))
+ return true;
+ else
+ return false;
+}
+
+static void kvm_riscv_vcpu_timer_blocking(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+ struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
+ u64 delta_ns;
+
+ if (!t->init_done)
+ return;
+
+ delta_ns = kvm_riscv_delta_cycles2ns(t->next_cycles, gt, t);
+ if (delta_ns) {
+ hrtimer_start(&t->hrt, ktime_set(0, delta_ns), HRTIMER_MODE_REL);
+ t->next_set = true;
+ }
+}
+
+static void kvm_riscv_vcpu_timer_unblocking(struct kvm_vcpu *vcpu)
+{
+ kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
+}
+
int kvm_riscv_vcpu_get_reg_timer(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
@@ -180,10 +250,20 @@ int kvm_riscv_vcpu_timer_init(struct kvm_vcpu *vcpu)
return -EINVAL;
hrtimer_init(&t->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
t->init_done = true;
t->next_set = false;
+ /* Enable sstc for every vcpu if available in hardware */
+ if (riscv_isa_extension_available(NULL, SSTC)) {
+ t->sstc_enabled = true;
+ t->hrt.function = kvm_riscv_vcpu_vstimer_expired;
+ t->timer_next_event = kvm_riscv_vcpu_update_vstimecmp;
+ } else {
+ t->sstc_enabled = false;
+ t->hrt.function = kvm_riscv_vcpu_hrtimer_expired;
+ t->timer_next_event = kvm_riscv_vcpu_update_hrtimer;
+ }
+
return 0;
}
@@ -199,21 +279,73 @@ int kvm_riscv_vcpu_timer_deinit(struct kvm_vcpu *vcpu)
int kvm_riscv_vcpu_timer_reset(struct kvm_vcpu *vcpu)
{
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ t->next_cycles = -1ULL;
return kvm_riscv_vcpu_timer_cancel(&vcpu->arch.timer);
}
-void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
{
struct kvm_guest_timer *gt = &vcpu->kvm->arch.timer;
-#ifdef CONFIG_64BIT
- csr_write(CSR_HTIMEDELTA, gt->time_delta);
-#else
+#if defined(CONFIG_32BIT)
csr_write(CSR_HTIMEDELTA, (u32)(gt->time_delta));
csr_write(CSR_HTIMEDELTAH, (u32)(gt->time_delta >> 32));
+#else
+ csr_write(CSR_HTIMEDELTA, gt->time_delta);
#endif
}
+void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr;
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ kvm_riscv_vcpu_update_timedelta(vcpu);
+
+ if (!t->sstc_enabled)
+ return;
+
+ csr = &vcpu->arch.guest_csr;
+#if defined(CONFIG_32BIT)
+ csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
+ csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
+#else
+ csr_write(CSR_VSTIMECMP, t->next_cycles);
+#endif
+
+ /* timer should be enabled for the remaining operations */
+ if (unlikely(!t->init_done))
+ return;
+
+ kvm_riscv_vcpu_timer_unblocking(vcpu);
+}
+
+void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_csr *csr;
+ struct kvm_vcpu_timer *t = &vcpu->arch.timer;
+
+ if (!t->sstc_enabled)
+ return;
+
+ csr = &vcpu->arch.guest_csr;
+ t = &vcpu->arch.timer;
+#if defined(CONFIG_32BIT)
+ t->next_cycles = csr_read(CSR_VSTIMECMP);
+ t->next_cycles |= (u64)csr_read(CSR_VSTIMECMPH) << 32;
+#else
+ t->next_cycles = csr_read(CSR_VSTIMECMP);
+#endif
+ /* timer should be enabled for the remaining operations */
+ if (unlikely(!t->init_done))
+ return;
+
+ if (kvm_vcpu_is_blocking(vcpu))
+ kvm_riscv_vcpu_timer_blocking(vcpu);
+}
+
void kvm_riscv_guest_timer_init(struct kvm *kvm)
{
struct kvm_guest_timer *gt = &kvm->arch.timer;
--
2.25.1