Hi all,
below is a series to support nommu mode on RISC-V. For now this series
just works under qemu with the qemu-virt platform, but Damien has also
been able to get kernel based on this tree with additional driver hacks
to work on the Kendryte KD210, but that will take a while to cleanup
an upstream.
A git tree is available here:
git://git.infradead.org/users/hch/riscv.git riscv-nommu.3
Gitweb:
http://git.infradead.org/users/hch/riscv.git/shortlog/refs/heads/riscv-nommu.3
I've also pushed out a builtroot branch that can build a RISC-V nommu
root filesystem here:
git://git.infradead.org/users/hch/buildroot.git riscv-nommu.2
Gitweb:
http://git.infradead.org/users/hch/buildroot.git/shortlog/refs/heads/riscv-nommu.2
Changes since v2:
- rebased to 5.3-rc
- remove the EFI image header for nommu builds
- set ARCH_SLAB_MINALIGN to ensure stack alignment in the flat binary
loader
- minor comment improvement
- use #defines for more CSRs
Changes since v1:
- fixes so that a kernel with this series still work on builds with an
IOMMU
- small clint cleanups
- the binfmt_flat base and buildroot now don't put arguments on the stack
Switch to our own constant for the satp register instead of using
the old name from a legacy version of the privileged spec.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/mm/context.c | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 89ceb3cbe218..beeb5d7f92ea 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -57,12 +57,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
- /*
- * Use the old spbtr name instead of using the current satp
- * name to support binutils 2.29 which doesn't know about the
- * privileged ISA 1.10 yet.
- */
- csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE);
+ csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
local_flush_tlb_all();
flush_icache_deferred(next);
--
2.20.1
This prepare for adding native non-SBI IPI code.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/kernel/smp.c | 55 +++++++++++++++++++++++------------------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 5a9834503a2f..8cd730239613 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -78,13 +78,38 @@ static void ipi_stop(void)
wait_for_interrupt();
}
+static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
+{
+ int cpuid, hartid;
+ struct cpumask hartid_mask;
+
+ cpumask_clear(&hartid_mask);
+ mb();
+ for_each_cpu(cpuid, mask) {
+ set_bit(op, &ipi_data[cpuid].bits);
+ hartid = cpuid_to_hartid_map(cpuid);
+ cpumask_set_cpu(hartid, &hartid_mask);
+ }
+ mb();
+ sbi_send_ipi(cpumask_bits(&hartid_mask));
+}
+
+static void send_ipi_single(int cpu, enum ipi_message_type op)
+{
+ send_ipi_mask(cpumask_of(cpu), op);
+}
+
+static inline void clear_ipi(void)
+{
+ csr_clear(CSR_SIP, SIE_SSIE);
+}
+
void riscv_software_interrupt(void)
{
unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits;
unsigned long *stats = ipi_data[smp_processor_id()].stats;
- /* Clear pending IPI */
- csr_clear(CSR_SIP, SIE_SSIE);
+ clear_ipi();
while (true) {
unsigned long ops;
@@ -118,23 +143,6 @@ void riscv_software_interrupt(void)
}
}
-static void
-send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
-{
- int cpuid, hartid;
- struct cpumask hartid_mask;
-
- cpumask_clear(&hartid_mask);
- mb();
- for_each_cpu(cpuid, to_whom) {
- set_bit(operation, &ipi_data[cpuid].bits);
- hartid = cpuid_to_hartid_map(cpuid);
- cpumask_set_cpu(hartid, &hartid_mask);
- }
- mb();
- sbi_send_ipi(cpumask_bits(&hartid_mask));
-}
-
static const char * const ipi_names[] = {
[IPI_RESCHEDULE] = "Rescheduling interrupts",
[IPI_CALL_FUNC] = "Function call interrupts",
@@ -156,12 +164,12 @@ void show_ipi_stats(struct seq_file *p, int prec)
void arch_send_call_function_ipi_mask(struct cpumask *mask)
{
- send_ipi_message(mask, IPI_CALL_FUNC);
+ send_ipi_mask(mask, IPI_CALL_FUNC);
}
void arch_send_call_function_single_ipi(int cpu)
{
- send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
+ send_ipi_single(cpu, IPI_CALL_FUNC);
}
void smp_send_stop(void)
@@ -176,7 +184,7 @@ void smp_send_stop(void)
if (system_state <= SYSTEM_RUNNING)
pr_crit("SMP: stopping secondary CPUs\n");
- send_ipi_message(&mask, IPI_CPU_STOP);
+ send_ipi_mask(&mask, IPI_CPU_STOP);
}
/* Wait up to one second for other CPUs to stop */
@@ -191,6 +199,5 @@ void smp_send_stop(void)
void smp_send_reschedule(int cpu)
{
- send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+ send_ipi_single(cpu, IPI_RESCHEDULE);
}
-
--
2.20.1
There is no SBI when we run in M-mode, so fail the compile for any code
trying to use SBI calls.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/include/asm/sbi.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 21134b3ef404..1e17f07eadaf 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
+#ifndef CONFIG_M_MODE
#define SBI_SET_TIMER 0
#define SBI_CONSOLE_PUTCHAR 1
#define SBI_CONSOLE_GETCHAR 2
@@ -94,4 +95,5 @@ static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
}
-#endif
+#endif /* CONFIG_M_MODE */
+#endif /* _ASM_RISCV_SBI_H */
--
2.20.1
The kernel runs in M-mode without using page tables, and thus can't run
bare metal without help from additional firmware.
Most of the patch is just stubbing out code not needed without page
tables, but there is an interesting detail in the signals implementation:
- The normal RISC-V syscall ABI only implements rt_sigreturn as VDSO
entry point, but the ELF VDSO is not supported for nommu Linux.
We instead copy the code to call the syscall onto the stack.
In addition to enabling the nommu code a new defconfig for a small
kernel image that can run in nommu mode on qemu is also provided, to run
a kernel in qemu you can use the following command line:
qemu-system-riscv64 -smp 2 -m 64 -machine virt -nographic \
-kernel arch/riscv/boot/loader \
-drive file=rootfs.ext2,format=raw,id=hd0 \
-device virtio-blk-device,drive=hd0
Contains contributions from Damien Le Moal <[email protected]>.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/Kconfig | 24 +++++---
arch/riscv/configs/nommu_virt_defconfig | 78 +++++++++++++++++++++++++
arch/riscv/include/asm/cache.h | 8 +++
arch/riscv/include/asm/elf.h | 4 +-
arch/riscv/include/asm/futex.h | 6 ++
arch/riscv/include/asm/io.h | 4 ++
arch/riscv/include/asm/mmu.h | 3 +
arch/riscv/include/asm/page.h | 10 +++-
arch/riscv/include/asm/pgalloc.h | 2 +
arch/riscv/include/asm/pgtable.h | 38 ++++++++----
arch/riscv/include/asm/tlbflush.h | 7 ++-
arch/riscv/include/asm/uaccess.h | 4 ++
arch/riscv/kernel/Makefile | 3 +-
arch/riscv/kernel/entry.S | 11 ++++
arch/riscv/kernel/head.S | 6 ++
arch/riscv/kernel/signal.c | 17 +++++-
arch/riscv/lib/Makefile | 11 ++--
arch/riscv/mm/Makefile | 3 +-
arch/riscv/mm/cacheflush.c | 2 +
arch/riscv/mm/context.c | 2 +
arch/riscv/mm/init.c | 13 ++++-
21 files changed, 218 insertions(+), 38 deletions(-)
create mode 100644 arch/riscv/configs/nommu_virt_defconfig
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7765b4a3b23e..b40c0957310f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -26,13 +26,13 @@ config RISCV
select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
+ select GENERIC_STRNCPY_FROM_USER if MMU
+ select GENERIC_STRNLEN_USER if MMU
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ATOMIC64 if !64BIT
select HAVE_ARCH_AUDITSYSCALL
select HAVE_MEMBLOCK_NODE_MAP
- select HAVE_DMA_CONTIGUOUS
+ select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_PERF_EVENTS
select HAVE_SYSCALL_TRACEPOINTS
@@ -47,6 +47,7 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
select RISCV_TIMER
+ select UACCESS_MEMCPY if !MMU
select GENERIC_IRQ_MULTI_HANDLER
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MMIOWB
@@ -58,9 +59,14 @@ config RISCV
# set if we run in machine mode, cleared if we run in supervisor mode
config M_MODE
bool
+ default y if !MMU
config MMU
- def_bool y
+ bool "MMU-based Paged Memory Management Support"
+ default y
+ help
+ Select if you want MMU-based virtualised addressing space
+ support by paged memory management. If unsure, say 'Y'.
config ZONE_DMA32
bool
@@ -69,6 +75,7 @@ config ZONE_DMA32
config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+ default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
@@ -102,7 +109,7 @@ config GENERIC_HWEIGHT
def_bool y
config FIX_EARLYCON_MEM
- def_bool y
+ def_bool CONFIG_MMU
config PGTABLE_LEVELS
int
@@ -127,6 +134,7 @@ config ARCH_RV32I
select GENERIC_LIB_ASHRDI3
select GENERIC_LIB_LSHRDI3
select GENERIC_LIB_UCMPDI2
+ select MMU
config ARCH_RV64I
bool "RV64I"
@@ -135,9 +143,9 @@ config ARCH_RV64I
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select SWIOTLB
+ select HAVE_DYNAMIC_FTRACE if MMU
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+ select SWIOTLB if MMU
endchoice
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
new file mode 100644
index 000000000000..cf74e179bf90
--- /dev/null
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -0,0 +1,78 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_MEMBARRIER is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+# CONFIG_MMU is not set
+CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SMP=y
+CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_VIRTIO_BLK=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_SIFIVE_PLIC=y
+# CONFIG_VALIDATE_FS_PARSER is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index bfd523e8f0b2..9b58b104559e 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -11,4 +11,12 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+/*
+ * RISC-V requires the stack pointer to be 16-byte aligned, so ensure that
+ * the flat loader aligns it accordingly.
+ */
+#ifndef CONFIG_MMU
+#define ARCH_SLAB_MINALIGN 16
+#endif
+
#endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index ef04084bf0de..d83a4efd052b 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -56,16 +56,16 @@ extern unsigned long elf_hwcap;
*/
#define ELF_PLATFORM (NULL)
+#ifdef CONFIG_MMU
#define ARCH_DLINFO \
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
} while (0)
-
-
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 4ad6409c4647..418564b96dc4 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -12,6 +12,12 @@
#include <linux/errno.h>
#include <asm/asm.h>
+/* We don't even really need the extable code, but for now keep it simple */
+#ifndef CONFIG_MMU
+#define __enable_user_access() do { } while (0)
+#define __disable_user_access() do { } while (0)
+#endif
+
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
uintptr_t tmp; \
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index fc1189ad3777..d39a8f03e85e 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <asm/mmiowb.h>
+#ifdef CONFIG_MMU
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
/*
@@ -26,6 +27,9 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
#define ioremap_wt(addr, size) ioremap((addr), (size))
extern void iounmap(volatile void __iomem *addr);
+#else
+#define pgprot_noncached(x) (x)
+#endif /* CONFIG_MMU */
/* Generic IO read/write. These perform native-endian accesses. */
#define __raw_writeb __raw_writeb
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 151476fb58cb..967eacb01ab5 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -10,6 +10,9 @@
#ifndef __ASSEMBLY__
typedef struct {
+#ifndef CONFIG_MMU
+ unsigned long end_brk;
+#endif
void *vdso;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 707e00a8430b..93c4f40e8c95 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -88,8 +88,14 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif
+#ifdef CONFIG_MMU
extern unsigned long va_pa_offset;
extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET (pfn_base)
+#else
+#define va_pa_offset 0
+#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
+#endif /* CONFIG_MMU */
extern unsigned long max_low_pfn;
extern unsigned long min_low_pfn;
@@ -111,9 +117,7 @@ extern unsigned long min_low_pfn;
#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr)))
#define pfn_valid(pfn) \
- (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
-
-#define ARCH_PFN_OFFSET (pfn_base)
+ (((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 56a67d66f72f..d810b9e7476f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <asm/tlb.h>
+#ifdef CONFIG_MMU
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -81,6 +82,7 @@ do { \
pgtable_page_dtor(pte); \
tlb_remove_page((tlb), pte); \
} while (0)
+#endif /* CONFIG_MMU */
static inline void check_pgt_cache(void)
{
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index a364aba23d55..cb408755d723 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,6 +24,7 @@
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
+#ifdef CONFIG_MMU
/* Number of entries in the page global directory */
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
@@ -31,7 +32,6 @@
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS 0
/* Page protection bits */
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
@@ -83,13 +83,6 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
-/*
- * ZERO_PAGE is a global shared page that is always zero,
- * used for zero-mapped memory areas, etc.
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
@@ -403,6 +396,18 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
+
+#else /* CONFIG_MMU */
+
+#define PAGE_KERNEL __pgprot(0)
+#define swapper_pg_dir NULL
+#define VMALLOC_START 0
+
+#endif /* CONFIG_MMU */
+
#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr) (1) /* FIXME */
#endif
@@ -416,10 +421,7 @@ static inline void pgtable_cache_init(void)
/* No page table caches to initialize */
}
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-
+#ifdef CONFIG_MMU
/*
* Task size is 0x4000000000 for RV64 or 0xb800000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
@@ -429,6 +431,18 @@ static inline void pgtable_cache_init(void)
#else
#define TASK_SIZE VMALLOC_START
#endif
+#else /* CONFIG_MMU */
+#define TASK_SIZE 0xffffffffUL
+#endif /* !CONFIG_MMU */
+
+#define FIRST_USER_ADDRESS 0
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero,
+ * used for zero-mapped memory areas, etc.
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
#include <asm-generic/pgtable.h>
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 687dd19735a7..f127b3d17ec2 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,6 +10,7 @@
#include <linux/mm_types.h>
#include <asm/smp.h>
+#ifdef CONFIG_MMU
/*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
* cache as well, so a 'fence.i' is not necessary.
@@ -24,8 +25,12 @@ static inline void local_flush_tlb_page(unsigned long addr)
{
__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
}
+#else /* CONFIG_MMU */
+#define local_flush_tlb_all() do { } while (0)
+#define local_flush_tlb_page(addr) do { } while (0)
+#endif /* CONFIG_MMU */
-#ifndef CONFIG_SMP
+#if !defined(CONFIG_SMP) || !defined(CONFIG_MMU)
#define flush_tlb_all() local_flush_tlb_all()
#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index e076437cfafe..f462a183a9c2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -11,6 +11,7 @@
/*
* User space memory access functions
*/
+#ifdef CONFIG_MMU
#include <linux/errno.h>
#include <linux/compiler.h>
#include <linux/thread_info.h>
@@ -475,4 +476,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__ret; \
})
+#else /* CONFIG_MMU */
+#include <asm-generic/uaccess.h>
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f933c04f89db..d720666283d6 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -25,9 +25,8 @@ obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
-obj-y += vdso.o
obj-y += cacheinfo.o
-obj-y += vdso/
+obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_M_MODE) += clint.o
obj-$(CONFIG_FPU) += fpu.o
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b722ce20dc20..0e040573d6a8 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -370,6 +370,10 @@ ENTRY(__switch_to)
ret
ENDPROC(__switch_to)
+#ifndef CONFIG_MMU
+#define do_page_fault do_trap_unknown
+#endif
+
.section ".rodata"
/* Exception vector table */
ENTRY(excp_vect_table)
@@ -391,3 +395,10 @@ ENTRY(excp_vect_table)
RISCV_PTR do_page_fault /* store page fault */
excp_vect_table_end:
END(excp_vect_table)
+
+#ifndef CONFIG_MMU
+ENTRY(__user_rt_sigreturn)
+ li a7, __NR_rt_sigreturn
+ scall
+END(__user_rt_sigreturn)
+#endif
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index b603edb29e73..670e5cacb24e 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -104,8 +104,10 @@ clear_bss_done:
la sp, init_thread_union + THREAD_SIZE
mv a0, s1
call setup_vm
+#ifdef CONFIG_MMU
la a0, early_pg_dir
call relocate
+#endif /* CONFIG_MMU */
/* Restore C environment */
la tp, init_task
@@ -116,6 +118,7 @@ clear_bss_done:
call parse_dtb
tail start_kernel
+#ifdef CONFIG_MMU
relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
@@ -166,6 +169,7 @@ relocate:
sfence.vma
ret
+#endif /* CONFIG_MMU */
.Lsecondary_start:
#ifdef CONFIG_SMP
@@ -194,9 +198,11 @@ relocate:
beqz tp, .Lwait_for_cpu_up
fence
+#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
call relocate
+#endif
tail smp_callin
#endif
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index e1a2cee340f7..eb8ba201feff 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -17,11 +17,16 @@
#include <asm/switch_to.h>
#include <asm/csr.h>
+extern u32 __user_rt_sigreturn[2];
+
#define DEBUG_SIG 0
struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
+#ifndef CONFIG_MMU
+ u32 sigreturn_code[2];
+#endif
};
#ifdef CONFIG_FPU
@@ -166,7 +171,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
return (void __user *)sp;
}
-
static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
@@ -189,8 +193,19 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
/* Set up to return from userspace. */
+#ifdef CONFIG_MMU
regs->ra = (unsigned long)VDSO_SYMBOL(
current->mm->context.vdso, rt_sigreturn);
+#else
+ /*
+ * For the nommu case we don't have a VDSO. Instead we push two
+ * instructions to call the rt_sigreturn syscall onto the user stack.
+ */
+ if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn,
+ sizeof(frame->sigreturn_code)))
+ return -EFAULT;
+ regs->ra = (unsigned long)&frame->sigreturn_code;;
+#endif /* CONFIG_MMU */
/*
* Set up registers for signal handler.
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 267feaa10f6a..47e7a8204460 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-lib-y += delay.o
-lib-y += memcpy.o
-lib-y += memset.o
-lib-y += uaccess.o
-
-lib-$(CONFIG_64BIT) += tishift.o
+lib-y += delay.o
+lib-y += memcpy.o
+lib-y += memset.o
+lib-$(CONFIG_MMU) += uaccess.o
+lib-$(CONFIG_64BIT) += tishift.o
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 74055e1d6f21..e9ef19725c6c 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -6,9 +6,8 @@ CFLAGS_REMOVE_init.o = -pg
endif
obj-y += init.o
-obj-y += fault.o
obj-y += extable.o
-obj-y += ioremap.o
+obj-$(CONFIG_MMU) += fault.o ioremap.o
obj-y += cacheflush.o
obj-y += context.o
obj-y += sifive_l2_cache.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 10875ea1065e..0fc69eedc921 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -90,6 +90,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
#endif /* CONFIG_SMP */
+#ifdef CONFIG_MMU
void flush_icache_pte(pte_t pte)
{
struct page *page = pte_page(pte);
@@ -97,3 +98,4 @@ void flush_icache_pte(pte_t pte)
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
flush_icache_all();
}
+#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index beeb5d7f92ea..073ff12a838a 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -57,8 +57,10 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
+#ifdef CONFIG_MMU
csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
local_flush_tlb_all();
+#endif
flush_icache_deferred(next);
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 42bf939693d3..cfd4675aed2c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -23,6 +23,7 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
+void *dtb_early_va;
static void __init zone_sizes_init(void)
{
@@ -132,12 +133,12 @@ void __init setup_bootmem(void)
}
}
+#ifdef CONFIG_MMU
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
-void *dtb_early_va;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -438,6 +439,16 @@ static void __init setup_vm_final(void)
csr_write(sptbr, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
}
+#else
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+ dtb_early_va = (void *)dtb_pa;
+}
+
+static inline void setup_vm_final(void)
+{
+}
+#endif /* CONFIG_MMU */
void __init paging_init(void)
{
--
2.20.1
From: Damien Le Moal <[email protected]>
When in M-Mode, we can use the mhartid CSR to get the ID of the running
HART. Doing so, direct M-Mode boot without firmware is possible.
Signed-off-by: Damien Le Moal <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/include/asm/csr.h | 1 +
arch/riscv/kernel/head.S | 8 ++++++++
2 files changed, 9 insertions(+)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 53d7ce74b447..64f8fe84b88f 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -81,6 +81,7 @@
#define SIE_SEIE (_AC(0x1, UL) << IRQ_S_EXT)
/* symbolic CSR names: */
+#define CSR_MHARTID 0xf14
#define CSR_MSTATUS 0x300
#define CSR_MIE 0x304
#define CSR_MTVEC 0x305
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index bb96bb7b95d2..275c2ab1e990 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -50,6 +50,14 @@ _start_kernel:
csrw CSR_XIE, zero
csrw CSR_XIP, zero
+#ifdef CONFIG_M_MODE
+ /*
+ * The hartid in a0 is expected later on, and we have no firmware
+ * to hand it to us.
+ */
+ csrr a0, CSR_MHARTID
+#endif
+
/* Load the global pointer */
.option push
.option norelax
--
2.20.1
When running in M-mode we still the S-mode plic handlers in the DT.
Ignore them by setting the maximum threshold.
Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/irqchip/irq-sifive-plic.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index cf755964f2f8..c72c036aea76 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -244,6 +244,7 @@ static int __init plic_init(struct device_node *node,
struct plic_handler *handler;
irq_hw_number_t hwirq;
int cpu, hartid;
+ u32 threshold = 0;
if (of_irq_parse_one(node, i, &parent)) {
pr_err("failed to parse parent for context %d.\n", i);
@@ -266,10 +267,16 @@ static int __init plic_init(struct device_node *node,
continue;
}
+ /*
+ * When running in M-mode we need to ignore the S-mode handler.
+ * Here we assume it always comes later, but that might be a
+ * little fragile.
+ */
handler = per_cpu_ptr(&plic_handlers, cpu);
if (handler->present) {
pr_warn("handler already present for context %d.\n", i);
- continue;
+ threshold = 0xffffffff;
+ goto done;
}
handler->present = true;
@@ -279,8 +286,9 @@ static int __init plic_init(struct device_node *node,
handler->enable_base =
plic_regs + ENABLE_BASE + i * ENABLE_PER_HART;
+done:
/* priority must be > threshold to trigger an interrupt */
- writel(0, handler->hart_base + CONTEXT_THRESHOLD);
+ writel(threshold, handler->hart_base + CONTEXT_THRESHOLD);
for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
plic_toggle(handler, hwirq, 0);
nr_handlers++;
--
2.20.1
From: Damien Le Moal <[email protected]>
Do not allow selecting SBI related options with MMU option not set.
Signed-off-by: Damien Le Moal <[email protected]>
Signed-off-by: Christoph Hellwig <[email protected]>
---
drivers/tty/hvc/Kconfig | 2 +-
drivers/tty/serial/Kconfig | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index 4d22b911111f..5a1ab6b536ff 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -89,7 +89,7 @@ config HVC_DCC
config HVC_RISCV_SBI
bool "RISC-V SBI console support"
- depends on RISCV
+ depends on RISCV && !M_MODE
select HVC_DRIVER
help
This enables support for console output via RISC-V SBI calls, which
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 3083dbae35f7..b2d07e150203 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -88,7 +88,7 @@ config SERIAL_EARLYCON_ARM_SEMIHOST
config SERIAL_EARLYCON_RISCV_SBI
bool "Early console using RISC-V SBI"
- depends on RISCV
+ depends on RISCV && !M_MODE
select SERIAL_CORE
select SERIAL_CORE_CONSOLE
select SERIAL_EARLYCON
--
2.20.1
This allows just loading the kernel at a pre-set address without
qemu going bonkers trying to map the ELF file.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/Makefile | 13 +++++++++----
arch/riscv/boot/Makefile | 7 ++++++-
arch/riscv/boot/loader.S | 8 ++++++++
arch/riscv/boot/loader.lds | 14 ++++++++++++++
4 files changed, 37 insertions(+), 5 deletions(-)
create mode 100644 arch/riscv/boot/loader.S
create mode 100644 arch/riscv/boot/loader.lds
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 7a117be8297c..aa9e377400e2 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -80,13 +80,18 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
-all: Image.gz
+ifeq ($(CONFIG_M_MODE),y)
+KBUILD_IMAGE := $(boot)/loader
+else
+KBUILD_IMAGE := $(boot)/Image.gz
+endif
+BOOT_TARGETS := Image Image.gz loader
-Image: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+all: $(notdir $(KBUILD_IMAGE))
-Image.%: Image
+$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+ @$(kecho) ' Kernel: $(boot)/$@ is ready'
zinstall install:
$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 0990a9fdbe5d..32d2addeddba 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -16,7 +16,7 @@
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
-targets := Image
+targets := Image loader
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@@ -24,6 +24,11 @@ $(obj)/Image: vmlinux FORCE
$(obj)/Image.gz: $(obj)/Image FORCE
$(call if_changed,gzip)
+loader.o: $(src)/loader.S $(obj)/Image
+
+$(obj)/loader: $(obj)/loader.o $(obj)/Image FORCE
+ $(Q)$(LD) -T $(src)/loader.lds -o $@ $(obj)/loader.o
+
install:
$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
$(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/loader.S b/arch/riscv/boot/loader.S
new file mode 100644
index 000000000000..5586e2610dbb
--- /dev/null
+++ b/arch/riscv/boot/loader.S
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+ .align 4
+ .section .payload, "ax", %progbits
+ .globl _start
+_start:
+ .incbin "arch/riscv/boot/Image"
+
diff --git a/arch/riscv/boot/loader.lds b/arch/riscv/boot/loader.lds
new file mode 100644
index 000000000000..da9efd57bf44
--- /dev/null
+++ b/arch/riscv/boot/loader.lds
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = 0x80000000;
+
+ .payload : {
+ *(.payload)
+ . = ALIGN(8);
+ }
+}
--
2.20.1
Only call the SBI code if we are not running in M mode, and if we didn't
do the SBI call, or it didn't succeed call wfi in a loop to at least
save some power.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/kernel/reset.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index d0fe623bfb8f..2f5ca379747e 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -8,8 +8,11 @@
static void default_power_off(void)
{
+#ifndef CONFIG_M_MODE
sbi_shutdown();
- while (1);
+#endif
+ while (1)
+ wait_for_interrupt();
}
void (*pm_power_off)(void) = default_power_off;
--
2.20.1
Many of the privileged CSRs exist in a supervisor and machine version
that are used very similarly. Provide a new X-naming layer so that
we don't have to ifdef everywhere for M-mode Linux support.
Contains contributions from Damien Le Moal <[email protected]>.
Signed-off-by: Christoph Hellwig <[email protected]>
Reviewed-by: Atish Patra <[email protected]>
---
arch/riscv/Kconfig | 4 ++
arch/riscv/include/asm/asm.h | 6 +++
arch/riscv/include/asm/csr.h | 58 ++++++++++++++++++++++++++--
arch/riscv/include/asm/irqflags.h | 12 +++---
arch/riscv/include/asm/processor.h | 2 +-
arch/riscv/include/asm/ptrace.h | 16 ++++----
arch/riscv/include/asm/switch_to.h | 8 ++--
arch/riscv/kernel/asm-offsets.c | 8 ++--
arch/riscv/kernel/entry.S | 62 ++++++++++++++++--------------
arch/riscv/kernel/fpu.S | 8 ++--
arch/riscv/kernel/head.S | 12 +++---
arch/riscv/kernel/irq.c | 4 +-
arch/riscv/kernel/process.c | 15 ++++----
arch/riscv/kernel/signal.c | 21 +++++-----
arch/riscv/kernel/traps.c | 16 ++++----
arch/riscv/lib/uaccess.S | 12 +++---
arch/riscv/mm/extable.c | 4 +-
arch/riscv/mm/fault.c | 6 +--
drivers/clocksource/timer-riscv.c | 8 ++--
drivers/irqchip/irq-sifive-plic.c | 4 +-
20 files changed, 177 insertions(+), 109 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 59a4727ecd6c..7765b4a3b23e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -55,6 +55,10 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
+# set if we run in machine mode, cleared if we run in supervisor mode
+config M_MODE
+ bool
+
config MMU
def_bool y
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 5a02b7d50940..14604f01e9f8 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -65,4 +65,10 @@
#error "Unexpected __SIZEOF_SHORT__"
#endif
+#ifdef CONFIG_M_MODE
+# define Xret mret
+#else
+# define Xret sret
+#endif
+
#endif /* _ASM_RISCV_ASM_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index a18923fa23c8..53d7ce74b447 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -11,8 +11,11 @@
/* Status register flags */
#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_MIE _AC(0x00000008, UL) /* Machine Interrupt Enable */
#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_MPIE _AC(0x00000080, UL) /* Previous Machine IE */
#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_MPP _AC(0x00001800, UL) /* Previously Machine */
#define SR_SUM _AC(0x00040000, UL) /* Supervisor User Memory Access */
#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
@@ -44,8 +47,8 @@
#define SATP_MODE SATP_MODE_39
#endif
-/* SCAUSE */
-#define SCAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
+/* *CAUSE */
+#define XCAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
#define IRQ_U_SOFT 0
#define IRQ_S_SOFT 1
@@ -67,11 +70,26 @@
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
-/* SIE (Interrupt Enable) and SIP (Interrupt Pending) flags */
+/* SIE/SIP (Machine Interrupt Enable/Pending) flags */
+#define MIE_MSIE (_AC(0x1, UL) << IRQ_M_SOFT)
+#define MIE_MTIE (_AC(0x1, UL) << IRQ_M_TIMER)
+#define MIE_MEIE (_AC(0x1, UL) << IRQ_M_EXT)
+
+/* SIE/SIP (Supervisor Interrupt Enable/Pending) flags */
#define SIE_SSIE (_AC(0x1, UL) << IRQ_S_SOFT)
#define SIE_STIE (_AC(0x1, UL) << IRQ_S_TIMER)
#define SIE_SEIE (_AC(0x1, UL) << IRQ_S_EXT)
+/* symbolic CSR names: */
+#define CSR_MSTATUS 0x300
+#define CSR_MIE 0x304
+#define CSR_MTVEC 0x305
+#define CSR_MSCRATCH 0x340
+#define CSR_MEPC 0x341
+#define CSR_MCAUSE 0x342
+#define CSR_MTVAL 0x343
+#define CSR_MIP 0x344
+
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
@@ -89,6 +107,40 @@
#define CSR_TIMEH 0xc81
#define CSR_INSTRETH 0xc82
+#ifdef CONFIG_M_MODE
+# define CSR_XSTATUS CSR_MSTATUS
+# define CSR_XIE CSR_MIE
+# define CSR_XTVEC CSR_MTVEC
+# define CSR_XSCRATCH CSR_MSCRATCH
+# define CSR_XEPC CSR_MEPC
+# define CSR_XCAUSE CSR_MCAUSE
+# define CSR_XTVAL CSR_MTVAL
+# define CSR_XIP CSR_MIP
+
+# define SR_XIE SR_MIE
+# define SR_XPIE SR_MPIE
+# define SR_XPP SR_MPP
+
+# define XIE_XTIE MIE_MTIE
+# define XIE_XEIE MIE_MEIE
+#else /* CONFIG_M_MODE */
+# define CSR_XSTATUS CSR_SSTATUS
+# define CSR_XIE CSR_SIE
+# define CSR_XTVEC CSR_STVEC
+# define CSR_XSCRATCH CSR_SSCRATCH
+# define CSR_XEPC CSR_SEPC
+# define CSR_XCAUSE CSR_SCAUSE
+# define CSR_XTVAL CSR_STVAL
+# define CSR_XIP CSR_SIP
+
+# define SR_XIE SR_SIE
+# define SR_XPIE SR_SPIE
+# define SR_XPP SR_SPP
+
+# define XIE_XTIE SIE_STIE
+# define XIE_XEIE SIE_SEIE
+#endif /* CONFIG_M_MODE */
+
#ifndef __ASSEMBLY__
#define csr_swap(csr, val) \
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index e70f647ce3b7..ba2828a37d72 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -13,31 +13,31 @@
/* read interrupt enabled status */
static inline unsigned long arch_local_save_flags(void)
{
- return csr_read(CSR_SSTATUS);
+ return csr_read(CSR_XSTATUS);
}
/* unconditionally enable interrupts */
static inline void arch_local_irq_enable(void)
{
- csr_set(CSR_SSTATUS, SR_SIE);
+ csr_set(CSR_XSTATUS, SR_XIE);
}
/* unconditionally disable interrupts */
static inline void arch_local_irq_disable(void)
{
- csr_clear(CSR_SSTATUS, SR_SIE);
+ csr_clear(CSR_XSTATUS, SR_XIE);
}
/* get status and disable interrupts */
static inline unsigned long arch_local_irq_save(void)
{
- return csr_read_clear(CSR_SSTATUS, SR_SIE);
+ return csr_read_clear(CSR_XSTATUS, SR_XIE);
}
/* test flags */
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
- return !(flags & SR_SIE);
+ return !(flags & SR_XIE);
}
/* test hardware interrupt enable bit */
@@ -49,7 +49,7 @@ static inline int arch_irqs_disabled(void)
/* set interrupt enabled status */
static inline void arch_local_irq_restore(unsigned long flags)
{
- csr_set(CSR_SSTATUS, flags & SR_SIE);
+ csr_set(CSR_XSTATUS, flags & SR_XIE);
}
#endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f539149d04c2..46f825e34575 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -42,7 +42,7 @@ struct thread_struct {
((struct pt_regs *)(task_stack_page(tsk) + THREAD_SIZE \
- ALIGN(sizeof(struct pt_regs), STACK_ALIGN)))
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->sepc)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->xepc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index d48d1e13973c..7684d81d0e84 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -12,7 +12,7 @@
#ifndef __ASSEMBLY__
struct pt_regs {
- unsigned long sepc;
+ unsigned long xepc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -44,10 +44,10 @@ struct pt_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- /* Supervisor CSRs */
- unsigned long sstatus;
- unsigned long sbadaddr;
- unsigned long scause;
+ /* Supervisor/Machine CSRs */
+ unsigned long xstatus;
+ unsigned long xbadaddr;
+ unsigned long xcause;
/* a0 value before the syscall */
unsigned long orig_a0;
};
@@ -58,18 +58,18 @@ struct pt_regs {
#define REG_FMT "%08lx"
#endif
-#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
+#define user_mode(regs) (((regs)->xstatus & SR_XPP) == 0)
/* Helpers for working with the instruction pointer */
static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
- return regs->sepc;
+ return regs->xepc;
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->sepc = val;
+ regs->xepc = val;
}
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 853b65ef656d..4cbe33bb5dd7 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -16,13 +16,13 @@ extern void __fstate_restore(struct task_struct *restore_from);
static inline void __fstate_clean(struct pt_regs *regs)
{
- regs->sstatus |= (regs->sstatus & ~(SR_FS)) | SR_FS_CLEAN;
+ regs->xstatus |= (regs->xstatus & ~(SR_FS)) | SR_FS_CLEAN;
}
static inline void fstate_save(struct task_struct *task,
struct pt_regs *regs)
{
- if ((regs->sstatus & SR_FS) == SR_FS_DIRTY) {
+ if ((regs->xstatus & SR_FS) == SR_FS_DIRTY) {
__fstate_save(task);
__fstate_clean(regs);
}
@@ -31,7 +31,7 @@ static inline void fstate_save(struct task_struct *task,
static inline void fstate_restore(struct task_struct *task,
struct pt_regs *regs)
{
- if ((regs->sstatus & SR_FS) != SR_FS_OFF) {
+ if ((regs->xstatus & SR_FS) != SR_FS_OFF) {
__fstate_restore(task);
__fstate_clean(regs);
}
@@ -43,7 +43,7 @@ static inline void __switch_to_aux(struct task_struct *prev,
struct pt_regs *regs;
regs = task_pt_regs(prev);
- if (unlikely(regs->sstatus & SR_SD))
+ if (unlikely(regs->xstatus & SR_SD))
fstate_save(prev, regs);
fstate_restore(next, task_pt_regs(next));
}
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 9f5628c38ac9..d631ad41917b 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -71,7 +71,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
DEFINE(PT_SIZE, sizeof(struct pt_regs));
- OFFSET(PT_SEPC, pt_regs, sepc);
+ OFFSET(PT_XEPC, pt_regs, xepc);
OFFSET(PT_RA, pt_regs, ra);
OFFSET(PT_FP, pt_regs, s0);
OFFSET(PT_S0, pt_regs, s0);
@@ -105,9 +105,9 @@ void asm_offsets(void)
OFFSET(PT_T6, pt_regs, t6);
OFFSET(PT_GP, pt_regs, gp);
OFFSET(PT_ORIG_A0, pt_regs, orig_a0);
- OFFSET(PT_SSTATUS, pt_regs, sstatus);
- OFFSET(PT_SBADADDR, pt_regs, sbadaddr);
- OFFSET(PT_SCAUSE, pt_regs, scause);
+ OFFSET(PT_XSTATUS, pt_regs, xstatus);
+ OFFSET(PT_XBADADDR, pt_regs, xbadaddr);
+ OFFSET(PT_XCAUSE, pt_regs, xcause);
/*
* THREAD_{F,X}* might be larger than a S-type offset can handle, but
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index bc7a56e1ca6f..b722ce20dc20 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -26,14 +26,14 @@
/*
* If coming from userspace, preserve the user thread pointer and load
- * the kernel thread pointer. If we came from the kernel, sscratch
- * will contain 0, and we should continue on the current TP.
+ * the kernel thread pointer. If we came from the kernel, the scratch
+ * register will contain 0, and we should continue on the current TP.
*/
- csrrw tp, CSR_SSCRATCH, tp
+ csrrw tp, CSR_XSCRATCH, tp
bnez tp, _save_context
_restore_kernel_tpsp:
- csrr tp, CSR_SSCRATCH
+ csrr tp, CSR_XSCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)
_save_context:
REG_S sp, TASK_TI_USER_SP(tp)
@@ -79,16 +79,16 @@ _save_context:
li t0, SR_SUM | SR_FS
REG_L s0, TASK_TI_USER_SP(tp)
- csrrc s1, CSR_SSTATUS, t0
- csrr s2, CSR_SEPC
- csrr s3, CSR_STVAL
- csrr s4, CSR_SCAUSE
- csrr s5, CSR_SSCRATCH
+ csrrc s1, CSR_XSTATUS, t0
+ csrr s2, CSR_XEPC
+ csrr s3, CSR_XTVAL
+ csrr s4, CSR_XCAUSE
+ csrr s5, CSR_XSCRATCH
REG_S s0, PT_SP(sp)
- REG_S s1, PT_SSTATUS(sp)
- REG_S s2, PT_SEPC(sp)
- REG_S s3, PT_SBADADDR(sp)
- REG_S s4, PT_SCAUSE(sp)
+ REG_S s1, PT_XSTATUS(sp)
+ REG_S s2, PT_XEPC(sp)
+ REG_S s3, PT_XBADADDR(sp)
+ REG_S s4, PT_XCAUSE(sp)
REG_S s5, PT_TP(sp)
.endm
@@ -97,10 +97,10 @@ _save_context:
* registers from the stack.
*/
.macro RESTORE_ALL
- REG_L a0, PT_SSTATUS(sp)
- REG_L a2, PT_SEPC(sp)
- csrw CSR_SSTATUS, a0
- csrw CSR_SEPC, a2
+ REG_L a0, PT_XSTATUS(sp)
+ REG_L a2, PT_XEPC(sp)
+ csrw CSR_XSTATUS, a0
+ csrw CSR_XEPC, a2
REG_L x1, PT_RA(sp)
REG_L x3, PT_GP(sp)
@@ -144,10 +144,10 @@ ENTRY(handle_exception)
SAVE_ALL
/*
- * Set sscratch register to 0, so that if a recursive exception
+ * Set the scratch register to 0, so that if a recursive exception
* occurs, the exception vector knows it came from the kernel
*/
- csrw CSR_SSCRATCH, x0
+ csrw CSR_XSCRATCH, x0
/* Load the global pointer */
.option push
@@ -167,7 +167,7 @@ ENTRY(handle_exception)
tail do_IRQ
1:
/* Exceptions run with interrupts enabled */
- csrs sstatus, SR_SIE
+ csrs CSR_XSTATUS, SR_XIE
/* Handle syscalls */
li t0, EXC_SYSCALL
@@ -194,7 +194,7 @@ handle_syscall:
* scall instruction on sret
*/
addi s2, s2, 0x4
- REG_S s2, PT_SEPC(sp)
+ REG_S s2, PT_XEPC(sp)
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
@@ -221,9 +221,15 @@ ret_from_syscall:
bnez t0, handle_syscall_trace_exit
ret_from_exception:
- REG_L s0, PT_SSTATUS(sp)
- csrc sstatus, SR_SIE
+ REG_L s0, PT_XSTATUS(sp)
+ csrc CSR_XSTATUS, SR_XIE
+#ifdef CONFIG_M_MODE
+ /* the MPP value is too large to be used as an immediate arg for addi */
+ li t0, SR_MPP
+ and s0, s0, t0
+#else
andi s0, s0, SR_SPP
+#endif
bnez s0, resume_kernel
resume_userspace:
@@ -237,14 +243,14 @@ resume_userspace:
REG_S s0, TASK_TI_KERNEL_SP(tp)
/*
- * Save TP into sscratch, so we can find the kernel data structures
- * again.
+ * Save TP into the scratch register , so we can find the kernel data
+ * structures again.
*/
- csrw CSR_SSCRATCH, tp
+ csrw CSR_XSCRATCH, tp
restore_all:
RESTORE_ALL
- sret
+ Xret
#if IS_ENABLED(CONFIG_PREEMPT)
resume_kernel:
@@ -265,7 +271,7 @@ work_pending:
bnez s1, work_resched
work_notifysig:
/* Handle pending signals and notify-resume requests */
- csrs sstatus, SR_SIE /* Enable interrupts for do_notify_resume() */
+ csrs CSR_XSTATUS, SR_XIE /* Enable interrupts for do_notify_resume() */
move a0, sp /* pt_regs */
move a1, s0 /* current_thread_info->flags */
tail do_notify_resume
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index 1defb0618aff..1dade31f4564 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -23,7 +23,7 @@ ENTRY(__fstate_save)
li a2, TASK_THREAD_F0
add a0, a0, a2
li t1, SR_FS
- csrs sstatus, t1
+ csrs CSR_XSTATUS, t1
frcsr t0
fsd f0, TASK_THREAD_F0_F0(a0)
fsd f1, TASK_THREAD_F1_F0(a0)
@@ -58,7 +58,7 @@ ENTRY(__fstate_save)
fsd f30, TASK_THREAD_F30_F0(a0)
fsd f31, TASK_THREAD_F31_F0(a0)
sw t0, TASK_THREAD_FCSR_F0(a0)
- csrc sstatus, t1
+ csrc CSR_XSTATUS, t1
ret
ENDPROC(__fstate_save)
@@ -67,7 +67,7 @@ ENTRY(__fstate_restore)
add a0, a0, a2
li t1, SR_FS
lw t0, TASK_THREAD_FCSR_F0(a0)
- csrs sstatus, t1
+ csrs CSR_XSTATUS, t1
fld f0, TASK_THREAD_F0_F0(a0)
fld f1, TASK_THREAD_F1_F0(a0)
fld f2, TASK_THREAD_F2_F0(a0)
@@ -101,6 +101,6 @@ ENTRY(__fstate_restore)
fld f30, TASK_THREAD_F30_F0(a0)
fld f31, TASK_THREAD_F31_F0(a0)
fscsr t0
- csrc sstatus, t1
+ csrc CSR_XSTATUS, t1
ret
ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 0f1ba17e476f..bb96bb7b95d2 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -47,8 +47,8 @@ ENTRY(_start)
.global _start_kernel
_start_kernel:
/* Mask all interrupts */
- csrw CSR_SIE, zero
- csrw CSR_SIP, zero
+ csrw CSR_XIE, zero
+ csrw CSR_XIP, zero
/* Load the global pointer */
.option push
@@ -61,7 +61,7 @@ _start_kernel:
* floating point in kernel space
*/
li t0, SR_FS
- csrc sstatus, t0
+ csrc CSR_XSTATUS, t0
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
@@ -111,7 +111,7 @@ relocate:
/* Point stvec to virtual address of intruction after satp write */
la a2, 1f
add a2, a2, a1
- csrw CSR_STVEC, a2
+ csrw CSR_XTVEC, a2
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
@@ -133,7 +133,7 @@ relocate:
1:
/* Set trap vector to spin forever to help debug */
la a0, .Lsecondary_park
- csrw CSR_STVEC, a0
+ csrw CSR_XTVEC, a0
/* Reload the global pointer */
.option push
@@ -159,7 +159,7 @@ relocate:
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
- csrw CSR_STVEC, a3
+ csrw CSR_XTVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 6d8659388c49..804ff70bb853 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -29,7 +29,7 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
- switch (regs->scause & ~SCAUSE_IRQ_FLAG) {
+ switch (regs->xcause & ~XCAUSE_IRQ_FLAG) {
case INTERRUPT_CAUSE_TIMER:
riscv_timer_interrupt();
break;
@@ -46,7 +46,7 @@ asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
handle_arch_irq(regs);
break;
default:
- pr_alert("unexpected interrupt cause 0x%lx", regs->scause);
+ pr_alert("unexpected interrupt cause 0x%lx", regs->xcause);
BUG();
}
irq_exit();
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index f23794bd1e90..1f03920d07d3 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -34,7 +34,7 @@ void show_regs(struct pt_regs *regs)
show_regs_print_info(KERN_DEFAULT);
pr_cont("sepc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
- regs->sepc, regs->ra, regs->sp);
+ regs->xepc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
@@ -56,17 +56,17 @@ void show_regs(struct pt_regs *regs)
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
regs->t5, regs->t6);
- pr_cont("sstatus: " REG_FMT " sbadaddr: " REG_FMT " scause: " REG_FMT "\n",
- regs->sstatus, regs->sbadaddr, regs->scause);
+ pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
+ regs->xstatus, regs->xbadaddr, regs->xcause);
}
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- regs->sstatus = SR_SPIE;
+ regs->xstatus = SR_XPIE;
if (has_fpu)
- regs->sstatus |= SR_FS_INITIAL;
- regs->sepc = pc;
+ regs->xstatus |= SR_FS_INITIAL;
+ regs->xepc = pc;
regs->sp = sp;
set_fs(USER_DS);
}
@@ -101,7 +101,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
const register unsigned long gp __asm__ ("gp");
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp;
- childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
+ /* Supervisor/Machine, irqs on: */
+ childregs->xstatus = SR_XPP | SR_XPIE;
p->thread.ra = (unsigned long)ret_from_kernel_thread;
p->thread.s[0] = usp; /* fn */
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index b14d7647d800..e1a2cee340f7 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -124,7 +124,7 @@ SYSCALL_DEFINE0(rt_sigreturn)
pr_info_ratelimited(
"%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n",
task->comm, task_pid_nr(task), __func__,
- frame, (void *)regs->sepc, (void *)regs->sp);
+ frame, (void *)regs->xepc, (void *)regs->sp);
}
force_sig(SIGSEGV);
return 0;
@@ -199,7 +199,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
* We always pass siginfo and mcontext, regardless of SA_SIGINFO,
* since some things rely on this (e.g. glibc's debug/segfault.c).
*/
- regs->sepc = (unsigned long)ksig->ka.sa.sa_handler;
+ regs->xepc = (unsigned long)ksig->ka.sa.sa_handler;
regs->sp = (unsigned long)frame;
regs->a0 = ksig->sig; /* a0: signal number */
regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */
@@ -208,7 +208,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
#if DEBUG_SIG
pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n",
current->comm, task_pid_nr(current), ksig->sig,
- (void *)regs->sepc, (void *)regs->ra, frame);
+ (void *)regs->xepc, (void *)regs->ra, frame);
#endif
return 0;
@@ -220,10 +220,9 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
int ret;
/* Are we from a system call? */
- if (regs->scause == EXC_SYSCALL) {
+ if (regs->xcause == EXC_SYSCALL) {
/* Avoid additional syscall restarting via ret_from_exception */
- regs->scause = -1UL;
-
+ regs->xcause = -1UL;
/* If so, check system call restarting.. */
switch (regs->a0) {
case -ERESTART_RESTARTBLOCK:
@@ -239,7 +238,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/* fallthrough */
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
- regs->sepc -= 0x4;
+ regs->xepc -= 0x4;
break;
}
}
@@ -261,9 +260,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (regs->scause == EXC_SYSCALL) {
+ if (regs->xcause == EXC_SYSCALL) {
/* Avoid additional syscall restarting via ret_from_exception */
- regs->scause = -1UL;
+ regs->xcause = -1UL;
/* Restart the system call - no handlers present */
switch (regs->a0) {
@@ -271,12 +270,12 @@ static void do_signal(struct pt_regs *regs)
case -ERESTARTSYS:
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
- regs->sepc -= 0x4;
+ regs->xepc -= 0x4;
break;
case -ERESTART_RESTARTBLOCK:
regs->a0 = regs->orig_a0;
regs->a7 = __NR_restart_syscall;
- regs->sepc -= 0x4;
+ regs->xepc -= 0x4;
break;
}
}
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 424eb72d56b1..86b05780c1d9 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -40,7 +40,7 @@ void die(struct pt_regs *regs, const char *str)
print_modules();
show_regs(regs);
- ret = notify_die(DIE_OOPS, str, regs, 0, regs->scause, SIGSEGV);
+ ret = notify_die(DIE_OOPS, str, regs, 0, regs->xcause, SIGSEGV);
bust_spinlocks(0);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -85,7 +85,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
#define DO_ERROR_INFO(name, signo, code, str) \
asmlinkage void name(struct pt_regs *regs) \
{ \
- do_trap_error(regs, signo, code, regs->sepc, "Oops - " str); \
+ do_trap_error(regs, signo, code, regs->xepc, "Oops - " str); \
}
DO_ERROR_INFO(do_trap_unknown,
@@ -128,12 +128,12 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
if (!user_mode(regs)) {
enum bug_trap_type type;
- type = report_bug(regs->sepc, regs);
+ type = report_bug(regs->xepc, regs);
switch (type) {
case BUG_TRAP_TYPE_NONE:
break;
case BUG_TRAP_TYPE_WARN:
- regs->sepc += get_break_insn_length(regs->sepc);
+ regs->xepc += get_break_insn_length(regs->xepc);
break;
case BUG_TRAP_TYPE_BUG:
die(regs, "Kernel BUG");
@@ -141,7 +141,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs)
}
#endif /* CONFIG_GENERIC_BUG */
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc));
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->xepc));
}
#ifdef CONFIG_GENERIC_BUG
@@ -166,9 +166,9 @@ void __init trap_init(void)
* Set sup0 scratch register to 0, indicating to exception vector
* that we are presently executing in the kernel
*/
- csr_write(CSR_SSCRATCH, 0);
+ csr_write(CSR_XSCRATCH, 0);
/* Set the exception vector address */
- csr_write(CSR_STVEC, &handle_exception);
+ csr_write(CSR_XTVEC, &handle_exception);
/* Enable all interrupts */
- csr_write(CSR_SIE, -1);
+ csr_write(CSR_XIE, -1);
}
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 399e6f0c2d98..f47a2ea4dc89 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -18,7 +18,7 @@ ENTRY(__asm_copy_from_user)
/* Enable access to user memory */
li t6, SR_SUM
- csrs sstatus, t6
+ csrs CSR_XSTATUS, t6
add a3, a1, a2
/* Use word-oriented copy only if low-order bits match */
@@ -47,7 +47,7 @@ ENTRY(__asm_copy_from_user)
3:
/* Disable access to user memory */
- csrc sstatus, t6
+ csrc CSR_XSTATUS, t6
li a0, 0
ret
4: /* Edge case: unalignment */
@@ -72,7 +72,7 @@ ENTRY(__clear_user)
/* Enable access to user memory */
li t6, SR_SUM
- csrs sstatus, t6
+ csrs CSR_XSTATUS, t6
add a3, a0, a1
addi t0, a0, SZREG-1
@@ -94,7 +94,7 @@ ENTRY(__clear_user)
3:
/* Disable access to user memory */
- csrc sstatus, t6
+ csrc CSR_XSTATUS, t6
li a0, 0
ret
4: /* Edge case: unalignment */
@@ -114,11 +114,11 @@ ENDPROC(__clear_user)
/* Fixup code for __copy_user(10) and __clear_user(11) */
10:
/* Disable access to user memory */
- csrs sstatus, t6
+ csrs CSR_XSTATUS, t6
mv a0, a2
ret
11:
- csrs sstatus, t6
+ csrs CSR_XSTATUS, t6
mv a0, a1
ret
.previous
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 7aed9178d365..e0659deeb16b 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -15,9 +15,9 @@ int fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->sepc);
+ fixup = search_exception_tables(regs->xepc);
if (fixup) {
- regs->sepc = fixup->fixup;
+ regs->xepc = fixup->fixup;
return 1;
}
return 0;
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 96add1427a75..10a8ce38ac7a 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -32,8 +32,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
int code = SEGV_MAPERR;
vm_fault_t fault;
- cause = regs->scause;
- addr = regs->sbadaddr;
+ cause = regs->xcause;
+ addr = regs->xbadaddr;
tsk = current;
mm = tsk->mm;
@@ -51,7 +51,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
goto vmalloc_fault;
/* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->sstatus & SR_SPIE))
+ if (likely(regs->xstatus & SR_XPIE))
local_irq_enable();
/*
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 09e031176bc6..3ad2fa52bac9 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -28,7 +28,7 @@
static int riscv_clock_next_event(unsigned long delta,
struct clock_event_device *ce)
{
- csr_set(sie, SIE_STIE);
+ csr_set(CSR_XIE, XIE_XTIE);
sbi_set_timer(get_cycles64() + delta);
return 0;
}
@@ -70,13 +70,13 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
ce->cpumask = cpumask_of(cpu);
clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
- csr_set(sie, SIE_STIE);
+ csr_set(CSR_XIE, XIE_XTIE);
return 0;
}
static int riscv_timer_dying_cpu(unsigned int cpu)
{
- csr_clear(sie, SIE_STIE);
+ csr_clear(CSR_XIE, XIE_XTIE);
return 0;
}
@@ -85,7 +85,7 @@ void riscv_timer_interrupt(void)
{
struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
- csr_clear(sie, SIE_STIE);
+ csr_clear(CSR_XIE, XIE_XTIE);
evdev->event_handler(evdev);
}
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index c72c036aea76..4ee96ac90ea4 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -179,7 +179,7 @@ static void plic_handle_irq(struct pt_regs *regs)
WARN_ON_ONCE(!handler->present);
- csr_clear(sie, SIE_SEIE);
+ csr_clear(CSR_XIE, XIE_XEIE);
while ((hwirq = readl(claim))) {
int irq = irq_find_mapping(plic_irqdomain, hwirq);
@@ -190,7 +190,7 @@ static void plic_handle_irq(struct pt_regs *regs)
generic_handle_irq(irq);
writel(hwirq, claim);
}
- csr_set(sie, SIE_SEIE);
+ csr_set(CSR_XIE, XIE_XEIE);
}
/*
--
2.20.1
RISC-V has the concept of a cpu level interrupt controller. Part of it
is expose as bits in the status registers, and 2 new CSRs per privilege
level in the instruction set, but the machanisms to trigger IPIs and
timer events, as well as reading the actual timer value are not
specified in the RISC-V spec but usually delegated to a block of MMIO
registers. This patch adds support for those MMIO registers in the
timer and IPI code. For now only the SiFive layout also supported by
a few other implementations is supported, but the code should be
easily extensible to others in the future.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/include/asm/clint.h | 40 +++++++++++++++++++++++++++
arch/riscv/include/asm/timex.h | 17 ++++++++++++
arch/riscv/kernel/Makefile | 1 +
arch/riscv/kernel/clint.c | 45 +++++++++++++++++++++++++++++++
arch/riscv/kernel/setup.c | 2 ++
arch/riscv/kernel/smp.c | 24 +++++++++++++++++
arch/riscv/kernel/smpboot.c | 3 +++
drivers/clocksource/timer-riscv.c | 16 ++++++++---
8 files changed, 144 insertions(+), 4 deletions(-)
create mode 100644 arch/riscv/include/asm/clint.h
create mode 100644 arch/riscv/kernel/clint.c
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
new file mode 100644
index 000000000000..46d182d9a4db
--- /dev/null
+++ b/arch/riscv/include/asm/clint.h
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _ASM_CLINT_H
+#define _ASM_CLINT_H 1
+
+#include <linux/smp.h>
+
+#ifdef CONFIG_M_MODE
+extern u32 __iomem *clint_ipi_base;
+extern u64 __iomem *clint_time_val;
+extern u64 __iomem *clint_time_cmp;
+
+void clint_init_boot_cpu(void);
+
+static inline void clint_send_ipi(unsigned long hartid)
+{
+ writel(1, clint_ipi_base + hartid);
+}
+
+static inline void clint_clear_ipi(unsigned long hartid)
+{
+ writel(0, clint_ipi_base + hartid);
+}
+
+static inline u64 clint_read_timer(void)
+{
+ return readq_relaxed(clint_time_val);
+}
+
+static inline void clint_set_timer(unsigned long delta)
+{
+ writeq_relaxed(clint_read_timer() + delta,
+ clint_time_cmp + cpuid_to_hartid_map(smp_processor_id()));
+}
+
+#else
+#define clint_init_boot_cpu() do { } while (0)
+#define clint_clear_ipi(hartid) do { } while (0)
+#endif /* CONFIG_M_MODE */
+
+#endif /* _ASM_CLINT_H */
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 6a703ec9d796..bf907997f107 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -10,6 +10,22 @@
typedef unsigned long cycles_t;
+#ifdef CONFIG_M_MODE
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <asm/clint.h>
+
+static inline cycles_t get_cycles(void)
+{
+#ifdef CONFIG_64BIT
+ return readq_relaxed(clint_time_val);
+#else
+ return readl_relaxed(clint_time_val);
+#endif
+}
+#define get_cycles get_cycles
+
+#else /* CONFIG_M_MODE */
static inline cycles_t get_cycles_inline(void)
{
cycles_t n;
@@ -40,6 +56,7 @@ static inline uint64_t get_cycles64(void)
return ((u64)hi << 32) | lo;
}
#endif
+#endif /* CONFIG_M_MODE */
#define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 2420d37d96de..f933c04f89db 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -29,6 +29,7 @@ obj-y += vdso.o
obj-y += cacheinfo.o
obj-y += vdso/
+obj-$(CONFIG_M_MODE) += clint.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c
new file mode 100644
index 000000000000..15b9e7fa5416
--- /dev/null
+++ b/arch/riscv/kernel/clint.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/types.h>
+#include <asm/csr.h>
+#include <asm/irq.h>
+#include <asm/timex.h>
+
+/*
+ * This is the layout used by the SiFive clint, which is also shared by the qemu
+ * virt platform, and the Kendryte KD210 at least.
+ */
+#define CLINT_IPI_OFF 0
+#define CLINT_TIME_VAL_OFF 0xbff8
+#define CLINT_TIME_CMP_OFF 0x4000;
+
+u32 __iomem *clint_ipi_base;
+u64 __iomem *clint_time_val;
+u64 __iomem *clint_time_cmp;
+
+void clint_init_boot_cpu(void)
+{
+ struct device_node *np;
+ void __iomem *base;
+
+ np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
+ if (!np) {
+ panic("clint not found");
+ return;
+ }
+
+ base = of_iomap(np, 0);
+ if (!base)
+ panic("could not map CLINT");
+
+ clint_ipi_base = base + CLINT_IPI_OFF;
+ clint_time_val = base + CLINT_TIME_VAL_OFF;
+ clint_time_cmp = base + CLINT_TIME_CMP_OFF;
+
+ clint_clear_ipi(boot_cpu_hartid);
+}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index a990a6cb184f..f4ba71b66c73 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -17,6 +17,7 @@
#include <linux/sched/task.h>
#include <linux/swiotlb.h>
+#include <asm/clint.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -65,6 +66,7 @@ void __init setup_arch(char **cmdline_p)
setup_bootmem();
paging_init();
unflatten_device_tree();
+ clint_init_boot_cpu();
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 8cd730239613..ee8599a7ca48 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -13,7 +13,9 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/delay.h>
+#include <linux/io.h>
+#include <asm/clint.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -78,6 +80,27 @@ static void ipi_stop(void)
wait_for_interrupt();
}
+#ifdef CONFIG_M_MODE
+static inline void send_ipi_single(int cpu, enum ipi_message_type op)
+{
+ set_bit(op, &ipi_data[cpu].bits);
+ clint_send_ipi(cpuid_to_hartid_map(cpu));
+}
+
+static inline void send_ipi_mask(const struct cpumask *mask,
+ enum ipi_message_type op)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ send_ipi_single(cpu, op);
+}
+
+static inline void clear_ipi(void)
+{
+ clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+}
+#else /* CONFIG_M_MODE */
static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
{
int cpuid, hartid;
@@ -103,6 +126,7 @@ static inline void clear_ipi(void)
{
csr_clear(CSR_SIP, SIE_SSIE);
}
+#endif /* CONFIG_M_MODE */
void riscv_software_interrupt(void)
{
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 7462a44304fe..1b7678d86ec8 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -23,6 +23,7 @@
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
+#include <asm/clint.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -132,6 +133,8 @@ asmlinkage void __init smp_callin(void)
{
struct mm_struct *mm = &init_mm;
+ clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+
/* All kernel threads share the same mm context. */
mmgrab(mm);
current->active_mm = mm;
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 3ad2fa52bac9..09f2165bd0a5 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -24,12 +24,16 @@
* operations on the current hart. There is guaranteed to be exactly one timer
* per hart on all RISC-V systems.
*/
-
static int riscv_clock_next_event(unsigned long delta,
struct clock_event_device *ce)
{
csr_set(CSR_XIE, XIE_XTIE);
+
+#ifdef CONFIG_M_MODE
+ clint_set_timer(delta);
+#else
sbi_set_timer(get_cycles64() + delta);
+#endif
return 0;
}
@@ -45,14 +49,18 @@ static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = {
* within one tick of each other, so while this could technically go
* backwards when hopping between CPUs, practically it won't happen.
*/
-static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
+static u64 riscv_sched_clock(void)
{
+#ifdef CONFIG_M_MODE
+ return clint_read_timer();
+#else
return get_cycles64();
+#endif
}
-static u64 riscv_sched_clock(void)
+static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
{
- return get_cycles64();
+ return riscv_sched_clock();
}
static struct clocksource riscv_clocksource = {
--
2.20.1
The RISC-V ISA only supports flushing the instruction cache for the local
CPU core. For normal S-mode Linux remote flushing is offloaded to
machine mode using ecalls, but for M-mode Linux we'll have to do it
ourselves. Use the same implementation as all the existing open source
SBI implementations by just doing an IPI to all remote cores to execute
th sfence.i instruction on every live core.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/mm/cacheflush.c | 31 +++++++++++++++++++++++++++----
1 file changed, 27 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 9ebcff8ba263..10875ea1065e 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -10,10 +10,35 @@
#include <asm/sbi.h>
+#ifdef CONFIG_M_MODE
+static void ipi_remote_fence_i(void *info)
+{
+ return local_flush_icache_all();
+}
+
+void flush_icache_all(void)
+{
+ on_each_cpu(ipi_remote_fence_i, NULL, 1);
+}
+
+static void flush_icache_cpumask(const cpumask_t *mask)
+{
+ on_each_cpu_mask(mask, ipi_remote_fence_i, NULL, 1);
+}
+#else /* CONFIG_M_MODE */
void flush_icache_all(void)
{
sbi_remote_fence_i(NULL);
}
+static void flush_icache_cpumask(const cpumask_t *mask)
+{
+ cpumask_t hmask;
+
+ cpumask_clear(&hmask);
+ riscv_cpuid_to_hartid_mask(mask, &hmask);
+ sbi_remote_fence_i(hmask.bits);
+}
+#endif /* CONFIG_M_MODE */
/*
* Performs an icache flush for the given MM context. RISC-V has no direct
@@ -28,7 +53,7 @@ void flush_icache_all(void)
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
- cpumask_t others, hmask, *mask;
+ cpumask_t others, *mask;
preempt_disable();
@@ -47,9 +72,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
if (mm != current->active_mm || !local) {
- cpumask_clear(&hmask);
- riscv_cpuid_to_hartid_mask(&others, &hmask);
- sbi_remote_fence_i(hmask.bits);
+ flush_icache_cpumask(&others);
} else {
/*
* It's assumed that at least one strongly ordered operation is
--
2.20.1
The numerical levels for External/Timer/Software interrupts differ
between S-mode and M-mode.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/kernel/irq.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 804ff70bb853..9566aabbe50b 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -14,9 +14,15 @@
/*
* Possible interrupt causes:
*/
-#define INTERRUPT_CAUSE_SOFTWARE IRQ_S_SOFT
-#define INTERRUPT_CAUSE_TIMER IRQ_S_TIMER
-#define INTERRUPT_CAUSE_EXTERNAL IRQ_S_EXT
+#ifdef CONFIG_M_MODE
+# define INTERRUPT_CAUSE_SOFTWARE IRQ_M_SOFT
+# define INTERRUPT_CAUSE_TIMER IRQ_M_TIMER
+# define INTERRUPT_CAUSE_EXTERNAL IRQ_M_EXT
+#else
+# define INTERRUPT_CAUSE_SOFTWARE IRQ_S_SOFT
+# define INTERRUPT_CAUSE_TIMER IRQ_S_TIMER
+# define INTERRUPT_CAUSE_EXTERNAL IRQ_S_EXT
+#endif /* CONFIG_M_MODE */
int arch_show_interrupts(struct seq_file *p, int prec)
{
--
2.20.1
When we get booted we want a clear slate without any leaks from previous
supervisors or the firmware. Flush the instruction cache and then clear
all registers to known good values. This is really important for the
upcoming nommu support that runs on M-mode, but can't really harm when
running in S-mode either. Vaguely based on the concepts from opensbi.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/include/asm/csr.h | 1 +
arch/riscv/kernel/head.S | 86 ++++++++++++++++++++++++++++++++++++
2 files changed, 87 insertions(+)
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 64f8fe84b88f..a804272a5f19 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -83,6 +83,7 @@
/* symbolic CSR names: */
#define CSR_MHARTID 0xf14
#define CSR_MSTATUS 0x300
+#define CSR_MISA 0x301
#define CSR_MIE 0x304
#define CSR_MTVEC 0x305
#define CSR_MSCRATCH 0x340
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 275c2ab1e990..b603edb29e73 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/csr.h>
+#include <asm/hwcap.h>
#include <asm/image.h>
__INIT
@@ -51,6 +52,12 @@ _start_kernel:
csrw CSR_XIP, zero
#ifdef CONFIG_M_MODE
+ /* flush the instruction cache */
+ fence.i
+
+ /* Reset all registers except ra, a0, a1 */
+ call reset_regs
+
/*
* The hartid in a0 is expected later on, and we have no firmware
* to hand it to us.
@@ -201,6 +208,85 @@ relocate:
j .Lsecondary_park
END(_start)
+#ifdef CONFIG_M_MODE
+ENTRY(reset_regs)
+ li sp, 0
+ li gp, 0
+ li tp, 0
+ li t0, 0
+ li t1, 0
+ li t2, 0
+ li s0, 0
+ li s1, 0
+ li a2, 0
+ li a3, 0
+ li a4, 0
+ li a5, 0
+ li a6, 0
+ li a7, 0
+ li s2, 0
+ li s3, 0
+ li s4, 0
+ li s5, 0
+ li s6, 0
+ li s7, 0
+ li s8, 0
+ li s9, 0
+ li s10, 0
+ li s11, 0
+ li t3, 0
+ li t4, 0
+ li t5, 0
+ li t6, 0
+ csrw sscratch, 0
+
+#ifdef CONFIG_FPU
+ csrr t0, CSR_MISA
+ andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
+ bnez t0, .Lreset_regs_done
+
+ li t1, SR_FS
+ csrs CSR_XSTATUS, t1
+ fmv.s.x f0, zero
+ fmv.s.x f1, zero
+ fmv.s.x f2, zero
+ fmv.s.x f3, zero
+ fmv.s.x f4, zero
+ fmv.s.x f5, zero
+ fmv.s.x f6, zero
+ fmv.s.x f7, zero
+ fmv.s.x f8, zero
+ fmv.s.x f9, zero
+ fmv.s.x f10, zero
+ fmv.s.x f11, zero
+ fmv.s.x f12, zero
+ fmv.s.x f13, zero
+ fmv.s.x f14, zero
+ fmv.s.x f15, zero
+ fmv.s.x f16, zero
+ fmv.s.x f17, zero
+ fmv.s.x f18, zero
+ fmv.s.x f19, zero
+ fmv.s.x f20, zero
+ fmv.s.x f21, zero
+ fmv.s.x f22, zero
+ fmv.s.x f23, zero
+ fmv.s.x f24, zero
+ fmv.s.x f25, zero
+ fmv.s.x f26, zero
+ fmv.s.x f27, zero
+ fmv.s.x f28, zero
+ fmv.s.x f29, zero
+ fmv.s.x f30, zero
+ fmv.s.x f31, zero
+ csrw fcsr, 0
+ /* note that the caller must clear SR_FS */
+#endif /* CONFIG_FPU */
+.Lreset_regs_done:
+ ret
+END(reset_regs)
+#endif /* CONFIG_M_MODE */
+
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE
--
2.20.1
No point in bloating the kernel image with a bootloader header if
we run bare metal.
Signed-off-by: Christoph Hellwig <[email protected]>
---
arch/riscv/kernel/head.S | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 670e5cacb24e..09fcf3d000c0 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -16,6 +16,7 @@
__INIT
ENTRY(_start)
+#ifndef CONFIG_M_MODE
/*
* Image header expected by Linux boot-loaders. The image header data
* structure is described in asm/image.h.
@@ -47,6 +48,7 @@ ENTRY(_start)
.global _start_kernel
_start_kernel:
+#endif /* CONFIG_M_MODE */
/* Mask all interrupts */
csrw CSR_XIE, zero
csrw CSR_XIP, zero
--
2.20.1
On Tue, Aug 13, 2019 at 05:47:40PM +0200, Christoph Hellwig wrote:
> RISC-V has the concept of a cpu level interrupt controller. Part of it
> is expose as bits in the status registers, and 2 new CSRs per privilege
> level in the instruction set, but the machanisms to trigger IPIs and
> timer events, as well as reading the actual timer value are not
> specified in the RISC-V spec but usually delegated to a block of MMIO
> registers. This patch adds support for those MMIO registers in the
> timer and IPI code. For now only the SiFive layout also supported by
> a few other implementations is supported, but the code should be
> easily extensible to others in the future.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> +/*
> + * This is the layout used by the SiFive clint, which is also shared by the qemu
> + * virt platform, and the Kendryte KD210 at least.
> + */
> +#define CLINT_IPI_OFF 0
> +#define CLINT_TIME_VAL_OFF 0xbff8
> +#define CLINT_TIME_CMP_OFF 0x4000;
> +
> +u32 __iomem *clint_ipi_base;
> +u64 __iomem *clint_time_val;
> +u64 __iomem *clint_time_cmp;
> +
> +void clint_init_boot_cpu(void)
> +{
> + struct device_node *np;
> + void __iomem *base;
> +
> + np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
Since the MMIO layout is that of the SiFive clint, the compatible string
should be specific to that. e.g. "sifive,clint". That way it will be
possible to distinguish it from other implementations.
What exactly is the "0" suffix for? Is that a version number?
If that's a CPU index, then I don't think that's the right way to encode
this unless the programming interface actually differs across CPUs. It
would be better to use an explicit phandle to express the affinity.
Thanks,
Mark.
On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> Switch to our own constant for the satp register instead of using
> the old name from a legacy version of the privileged spec.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Atish Patra <[email protected]>
Didn't you want us to replace this with Bin Meng's patch?
https://lore.kernel.org/linux-riscv/[email protected]/
If so, probably best just to drop this one and state a dependency.
- Paul
On Tue, Aug 13, 2019 at 09:36:23AM -0700, Paul Walmsley wrote:
> On Tue, 13 Aug 2019, Christoph Hellwig wrote:
>
> > Switch to our own constant for the satp register instead of using
> > the old name from a legacy version of the privileged spec.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
> > Reviewed-by: Atish Patra <[email protected]>
>
> Didn't you want us to replace this with Bin Meng's patch?
>
> https://lore.kernel.org/linux-riscv/[email protected]/
>
> If so, probably best just to drop this one and state a dependency.
Either way is fine with me. But until you have a branch with
either one applied I'm going to keep resending my patch, as random
dependencies on uncommitted patches don't work.
On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> On Tue, Aug 13, 2019 at 09:36:23AM -0700, Paul Walmsley wrote:
> > On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> >
> > > Switch to our own constant for the satp register instead of using
> > > the old name from a legacy version of the privileged spec.
> > >
> > > Signed-off-by: Christoph Hellwig <[email protected]>
> > > Reviewed-by: Atish Patra <[email protected]>
> >
> > Didn't you want us to replace this with Bin Meng's patch?
> >
> > https://lore.kernel.org/linux-riscv/[email protected]/
> >
> > If so, probably best just to drop this one and state a dependency.
>
> Either way is fine with me. But until you have a branch with
> either one applied I'm going to keep resending my patch, as random
> dependencies on uncommitted patches don't work.
If you're going to resend a patch, it's better to resend the other one
that you've explicitly endorsed in favor of your own.
- Paul
Thomas, Jason, Marc,
On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> When running in M-mode we still the S-mode plic handlers in the DT.
> Ignore them by setting the maximum threshold.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
If you're happy with this, could one of you ack it so we can merge it
with the rest of this series through the RISC-V tree?
thanks
- Paul
> ---
> drivers/irqchip/irq-sifive-plic.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index cf755964f2f8..c72c036aea76 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -244,6 +244,7 @@ static int __init plic_init(struct device_node *node,
> struct plic_handler *handler;
> irq_hw_number_t hwirq;
> int cpu, hartid;
> + u32 threshold = 0;
>
> if (of_irq_parse_one(node, i, &parent)) {
> pr_err("failed to parse parent for context %d.\n", i);
> @@ -266,10 +267,16 @@ static int __init plic_init(struct device_node *node,
> continue;
> }
>
> + /*
> + * When running in M-mode we need to ignore the S-mode handler.
> + * Here we assume it always comes later, but that might be a
> + * little fragile.
> + */
> handler = per_cpu_ptr(&plic_handlers, cpu);
> if (handler->present) {
> pr_warn("handler already present for context %d.\n", i);
> - continue;
> + threshold = 0xffffffff;
> + goto done;
> }
>
> handler->present = true;
> @@ -279,8 +286,9 @@ static int __init plic_init(struct device_node *node,
> handler->enable_base =
> plic_regs + ENABLE_BASE + i * ENABLE_PER_HART;
>
> +done:
> /* priority must be > threshold to trigger an interrupt */
> - writel(0, handler->hart_base + CONTEXT_THRESHOLD);
> + writel(threshold, handler->hart_base + CONTEXT_THRESHOLD);
> for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
> plic_toggle(handler, hwirq, 0);
> nr_handlers++;
> --
> 2.20.1
>
>
On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> Switch to our own constant for the satp register instead of using
> the old name from a legacy version of the privileged spec.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> Reviewed-by: Atish Patra <[email protected]>
Dropping this one in favor of Bin Meng's patch per
https://lore.kernel.org/linux-riscv/[email protected]/
- Paul
Please ignore the previous mail, I must have missed this part of the patch,
>
> > + csrr t0, CSR_MISA
> > + andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
> > + bnez t0, .Lreset_regs_done
> > +
In S-mode we were not able to obtain the ISA information in misa, but now
the nommu port is in M-mode so this is rather straightforward.
Hi Christoph,
On Tue, Aug 13, 2019 at 05:47:45PM +0200, Christoph Hellwig wrote:
> When we get booted we want a clear slate without any leaks from previous
> supervisors or the firmware. Flush the instruction cache and then clear
> all registers to known good values. This is really important for the
> upcoming nommu support that runs on M-mode, but can't really harm when
> running in S-mode either.
Sure.
> +#ifdef CONFIG_FPU
But it doesn't really mean that the running system has an FPU given CONFIG_FPU
enabled. Normally the existence of an FPU is checked in riscv_fill_hwcap by
searching device tree, where the code looks like
bool has_fpu = false; // this one is global
...
#ifdef CONFIG_FPU
if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
has_fpu = true;
#endif
Or does CONFIG_FPU have a more intuitive meaning when CONFIG_M_MODE is enabled?
> + csrr t0, CSR_MISA
> + andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
> + bnez t0, .Lreset_regs_done
> +
> + li t1, SR_FS
> + csrs CSR_XSTATUS, t1
> + fmv.s.x f0, zero
> + fmv.s.x f1, zero
> + fmv.s.x f2, zero
> + fmv.s.x f3, zero
> + fmv.s.x f4, zero
> + fmv.s.x f5, zero
> + fmv.s.x f6, zero
> + fmv.s.x f7, zero
> + fmv.s.x f8, zero
> + fmv.s.x f9, zero
> + fmv.s.x f10, zero
> + fmv.s.x f11, zero
> + fmv.s.x f12, zero
> + fmv.s.x f13, zero
> + fmv.s.x f14, zero
> + fmv.s.x f15, zero
> + fmv.s.x f16, zero
> + fmv.s.x f17, zero
> + fmv.s.x f18, zero
> + fmv.s.x f19, zero
> + fmv.s.x f20, zero
> + fmv.s.x f21, zero
> + fmv.s.x f22, zero
> + fmv.s.x f23, zero
> + fmv.s.x f24, zero
> + fmv.s.x f25, zero
> + fmv.s.x f26, zero
> + fmv.s.x f27, zero
> + fmv.s.x f28, zero
> + fmv.s.x f29, zero
> + fmv.s.x f30, zero
> + fmv.s.x f31, zero
> + csrw fcsr, 0
> + /* note that the caller must clear SR_FS */
> +#endif /* CONFIG_FPU */
On Wed, Aug 14, 2019 at 09:00:14AM +0800, Alan Kao wrote:
> But it doesn't really mean that the running system has an FPU given CONFIG_FPU
> enabled. Normally the existence of an FPU is checked in riscv_fill_hwcap by
> searching device tree, where the code looks like
>
>
> bool has_fpu = false; // this one is global
> ...
> #ifdef CONFIG_FPU
> if (elf_hwcap & (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D))
> has_fpu = true;
> #endif
>
>
> Or does CONFIG_FPU have a more intuitive meaning when CONFIG_M_MODE is enabled?
No, it doesn't..
>
> > + csrr t0, CSR_MISA
> > + andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
> > + bnez t0, .Lreset_regs_done
... which is why we have these few lines of code that check the
caps returns from the misa CSR, similar to the elf_caps check quoted
above.
On Tue, 13 Aug 2019, Christoph Hellwig wrote:
> This prepare for adding native non-SBI IPI code.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
Thanks, queued for v5.4-rc1.
- Paul
On Tue, 13 Aug 2019 18:44:02 +0100,
Paul Walmsley <[email protected]> wrote:
Hi Paul,
>
> Thomas, Jason, Marc,
>
> On Tue, 13 Aug 2019, Christoph Hellwig wrote:
>
> > When running in M-mode we still the S-mode plic handlers in the DT.
^^^^ missing word?
> > Ignore them by setting the maximum threshold.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> If you're happy with this, could one of you ack it so we can merge it
> with the rest of this series through the RISC-V tree?
Sure, no problem.
Acked-by: Marc Zyngier <[email protected]>
Thanks,
M.
--
Jazz is not dead, it just smells funny.
On Tue, Aug 13, 2019 at 05:29:58PM +0100, Mark Rutland wrote:
> > + np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
>
> Since the MMIO layout is that of the SiFive clint, the compatible string
> should be specific to that. e.g. "sifive,clint". That way it will be
> possible to distinguish it from other implementations.
>
> What exactly is the "0" suffix for? Is that a version number?
>
> If that's a CPU index, then I don't think that's the right way to encode
> this unless the programming interface actually differs across CPUs. It
> would be better to use an explicit phandle to express the affinity.
It isn't a cpu index, I suspect a version number. These show up
in a lot of the early RISC-V DTs coming from the UCB/SiFive sphere.
They've now spread everywhere unfortunately.
On Tue, Aug 13, 2019 at 09:41:58PM -0700, Paul Walmsley wrote:
> On Tue, 13 Aug 2019, Christoph Hellwig wrote:
>
> > This prepare for adding native non-SBI IPI code.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> Thanks, queued for v5.4-rc1.
Where did you queue it up? I can't find it anywhere in your tree,
and I really need a baseline for the next iteration.
On Tue, 2019-08-13 at 17:47 +0200, Christoph Hellwig wrote:
> The RISC-V ISA only supports flushing the instruction cache for the
> local
> CPU core. For normal S-mode Linux remote flushing is offloaded to
> machine mode using ecalls, but for M-mode Linux we'll have to do it
> ourselves. Use the same implementation as all the existing open
> source
> SBI implementations by just doing an IPI to all remote cores to
> execute
> th sfence.i instruction on every live core.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/riscv/mm/cacheflush.c | 31 +++++++++++++++++++++++++++----
> 1 file changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 9ebcff8ba263..10875ea1065e 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -10,10 +10,35 @@
>
> #include <asm/sbi.h>
>
> +#ifdef CONFIG_M_MODE
> +static void ipi_remote_fence_i(void *info)
> +{
> + return local_flush_icache_all();
> +}
> +
> +void flush_icache_all(void)
> +{
> + on_each_cpu(ipi_remote_fence_i, NULL, 1);
> +}
> +
> +static void flush_icache_cpumask(const cpumask_t *mask)
> +{
> + on_each_cpu_mask(mask, ipi_remote_fence_i, NULL, 1);
> +}
> +#else /* CONFIG_M_MODE */
> void flush_icache_all(void)
> {
> sbi_remote_fence_i(NULL);
> }
> +static void flush_icache_cpumask(const cpumask_t *mask)
> +{
> + cpumask_t hmask;
> +
> + cpumask_clear(&hmask);
> + riscv_cpuid_to_hartid_mask(mask, &hmask);
> + sbi_remote_fence_i(hmask.bits);
> +}
> +#endif /* CONFIG_M_MODE */
>
> /*
> * Performs an icache flush for the given MM context. RISC-V has no
> direct
> @@ -28,7 +53,7 @@ void flush_icache_all(void)
> void flush_icache_mm(struct mm_struct *mm, bool local)
> {
> unsigned int cpu;
> - cpumask_t others, hmask, *mask;
> + cpumask_t others, *mask;
>
> preempt_disable();
>
> @@ -47,9 +72,7 @@ void flush_icache_mm(struct mm_struct *mm, bool
> local)
> cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
> local |= cpumask_empty(&others);
> if (mm != current->active_mm || !local) {
> - cpumask_clear(&hmask);
> - riscv_cpuid_to_hartid_mask(&others, &hmask);
> - sbi_remote_fence_i(hmask.bits);
> + flush_icache_cpumask(&others);
> } else {
> /*
> * It's assumed that at least one strongly ordered
> operation is
Reviewed-by: Atish Patra <[email protected]>
--
Regards,
Atish
On Tue, 2019-08-13 at 17:47 +0200, Christoph Hellwig wrote:
> No point in bloating the kernel image with a bootloader header if
> we run bare metal.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/riscv/kernel/head.S | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 670e5cacb24e..09fcf3d000c0 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -16,6 +16,7 @@
>
> __INIT
> ENTRY(_start)
> +#ifndef CONFIG_M_MODE
> /*
> * Image header expected by Linux boot-loaders. The image
> header data
> * structure is described in asm/image.h.
> @@ -47,6 +48,7 @@ ENTRY(_start)
>
> .global _start_kernel
> _start_kernel:
> +#endif /* CONFIG_M_MODE */
> /* Mask all interrupts */
> csrw CSR_XIE, zero
> csrw CSR_XIP, zero
Reviewed-by: Atish Patra <[email protected]>
--
Regards,
Atish
On Tue, 2019-08-13 at 17:47 +0200, Christoph Hellwig wrote:
> There is no SBI when we run in M-mode, so fail the compile for any
> code
> trying to use SBI calls.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/riscv/include/asm/sbi.h | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/include/asm/sbi.h
> b/arch/riscv/include/asm/sbi.h
> index 21134b3ef404..1e17f07eadaf 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -8,6 +8,7 @@
>
> #include <linux/types.h>
>
> +#ifndef CONFIG_M_MODE
> #define SBI_SET_TIMER 0
> #define SBI_CONSOLE_PUTCHAR 1
> #define SBI_CONSOLE_GETCHAR 2
> @@ -94,4 +95,5 @@ static inline void sbi_remote_sfence_vma_asid(const
> unsigned long *hart_mask,
> SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size,
> asid);
> }
>
> -#endif
> +#endif /* CONFIG_M_MODE */
> +#endif /* _ASM_RISCV_SBI_H */
Reviewed-by: Atish Patra <[email protected]>
--
Regards,
Atish
On Tue, 2019-08-13 at 17:47 +0200, Christoph Hellwig wrote:
> RISC-V has the concept of a cpu level interrupt controller. Part of
> it
> is expose as bits in the status registers, and 2 new CSRs per
//is expose/is to expose/
> privilege
> level in the instruction set, but the machanisms to trigger IPIs and
> timer events, as well as reading the actual timer value are not
> specified in the RISC-V spec but usually delegated to a block of MMIO
> registers. This patch adds support for those MMIO registers in the
> timer and IPI code. For now only the SiFive layout also supported by
> a few other implementations is supported, but the code should be
> easily extensible to others in the future.
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/riscv/include/asm/clint.h | 40 +++++++++++++++++++++++++++
> arch/riscv/include/asm/timex.h | 17 ++++++++++++
> arch/riscv/kernel/Makefile | 1 +
> arch/riscv/kernel/clint.c | 45
> +++++++++++++++++++++++++++++++
> arch/riscv/kernel/setup.c | 2 ++
> arch/riscv/kernel/smp.c | 24 +++++++++++++++++
> arch/riscv/kernel/smpboot.c | 3 +++
> drivers/clocksource/timer-riscv.c | 16 ++++++++---
> 8 files changed, 144 insertions(+), 4 deletions(-)
> create mode 100644 arch/riscv/include/asm/clint.h
> create mode 100644 arch/riscv/kernel/clint.c
>
> diff --git a/arch/riscv/include/asm/clint.h
> b/arch/riscv/include/asm/clint.h
> new file mode 100644
> index 000000000000..46d182d9a4db
> --- /dev/null
> +++ b/arch/riscv/include/asm/clint.h
> @@ -0,0 +1,40 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#ifndef _ASM_CLINT_H
> +#define _ASM_CLINT_H 1
> +
> +#include <linux/smp.h>
> +
> +#ifdef CONFIG_M_MODE
> +extern u32 __iomem *clint_ipi_base;
> +extern u64 __iomem *clint_time_val;
> +extern u64 __iomem *clint_time_cmp;
> +
> +void clint_init_boot_cpu(void);
> +
> +static inline void clint_send_ipi(unsigned long hartid)
> +{
> + writel(1, clint_ipi_base + hartid);
> +}
> +
> +static inline void clint_clear_ipi(unsigned long hartid)
> +{
> + writel(0, clint_ipi_base + hartid);
> +}
> +
> +static inline u64 clint_read_timer(void)
> +{
> + return readq_relaxed(clint_time_val);
> +}
> +
> +static inline void clint_set_timer(unsigned long delta)
> +{
> + writeq_relaxed(clint_read_timer() + delta,
> + clint_time_cmp +
> cpuid_to_hartid_map(smp_processor_id()));'
This is not compatible with 32 bit mode. IIRC, timecmp is a 64 bit on
RV32 as well. Here is the implementation in OpenSBI.
https://github.com/riscv/opensbi/blob/master/lib/utils/sys/clint.c#L104
> +}
> +
> +#else
> +#define clint_init_boot_cpu() do { } while (0)
> +#define clint_clear_ipi(hartid) do { } while (0)
> +#endif /* CONFIG_M_MODE */
> +
> +#endif /* _ASM_CLINT_H */
> diff --git a/arch/riscv/include/asm/timex.h
> b/arch/riscv/include/asm/timex.h
> index 6a703ec9d796..bf907997f107 100644
> --- a/arch/riscv/include/asm/timex.h
> +++ b/arch/riscv/include/asm/timex.h
> @@ -10,6 +10,22 @@
>
> typedef unsigned long cycles_t;
>
> +#ifdef CONFIG_M_MODE
> +
> +#include <linux/io-64-nonatomic-lo-hi.h>
> +#include <asm/clint.h>
> +
> +static inline cycles_t get_cycles(void)
> +{
> +#ifdef CONFIG_64BIT
> + return readq_relaxed(clint_time_val);
> +#else
> + return readl_relaxed(clint_time_val);
> +#endif
Same comment as above. Both RV32 & RV64 bit have 64 bit have 64 bit
precission for timer val. You have to read 32 bits at a time and "or"
them to get 64 bit value. Here is the implementation from OpenSBI
https://github.com/riscv/opensbi/blob/master/lib/utils/sys/clint.c#L69
> +}
> +#define get_cycles get_cycles
> +
> +#else /* CONFIG_M_MODE */
> static inline cycles_t get_cycles_inline(void)
> {
> cycles_t n;
> @@ -40,6 +56,7 @@ static inline uint64_t get_cycles64(void)
> return ((u64)hi << 32) | lo;
> }
> #endif
> +#endif /* CONFIG_M_MODE */
>
> #define ARCH_HAS_READ_CURRENT_TIMER
>
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 2420d37d96de..f933c04f89db 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -29,6 +29,7 @@ obj-y += vdso.o
> obj-y += cacheinfo.o
> obj-y += vdso/
>
> +obj-$(CONFIG_M_MODE) += clint.o
> obj-$(CONFIG_FPU) += fpu.o
> obj-$(CONFIG_SMP) += smpboot.o
> obj-$(CONFIG_SMP) += smp.o
> diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c
> new file mode 100644
> index 000000000000..15b9e7fa5416
> --- /dev/null
> +++ b/arch/riscv/kernel/clint.c
> @@ -0,0 +1,45 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2019 Christoph Hellwig.
> + */
> +
> +#include <linux/io.h>
> +#include <linux/of_address.h>
> +#include <linux/types.h>
> +#include <asm/csr.h>
> +#include <asm/irq.h>
> +#include <asm/timex.h>
> +
> +/*
> + * This is the layout used by the SiFive clint, which is also shared
> by the qemu
> + * virt platform, and the Kendryte KD210 at least.
> + */
> +#define CLINT_IPI_OFF 0
> +#define CLINT_TIME_VAL_OFF 0xbff8
> +#define CLINT_TIME_CMP_OFF 0x4000;
> +
> +u32 __iomem *clint_ipi_base;
> +u64 __iomem *clint_time_val;
> +u64 __iomem *clint_time_cmp;
> +
> +void clint_init_boot_cpu(void)
> +{
> + struct device_node *np;
> + void __iomem *base;
> +
> + np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
> + if (!np) {
> + panic("clint not found");
> + return;
> + }
> +
> + base = of_iomap(np, 0);
> + if (!base)
> + panic("could not map CLINT");
> +
> + clint_ipi_base = base + CLINT_IPI_OFF;
> + clint_time_val = base + CLINT_TIME_VAL_OFF;
> + clint_time_cmp = base + CLINT_TIME_CMP_OFF;
> +
> + clint_clear_ipi(boot_cpu_hartid);
> +}
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index a990a6cb184f..f4ba71b66c73 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -17,6 +17,7 @@
> #include <linux/sched/task.h>
> #include <linux/swiotlb.h>
>
> +#include <asm/clint.h>
> #include <asm/setup.h>
> #include <asm/sections.h>
> #include <asm/pgtable.h>
> @@ -65,6 +66,7 @@ void __init setup_arch(char **cmdline_p)
> setup_bootmem();
> paging_init();
> unflatten_device_tree();
> + clint_init_boot_cpu();
>
> #ifdef CONFIG_SWIOTLB
> swiotlb_init(1);
> diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
> index 8cd730239613..ee8599a7ca48 100644
> --- a/arch/riscv/kernel/smp.c
> +++ b/arch/riscv/kernel/smp.c
> @@ -13,7 +13,9 @@
> #include <linux/sched.h>
> #include <linux/seq_file.h>
> #include <linux/delay.h>
> +#include <linux/io.h>
>
> +#include <asm/clint.h>
> #include <asm/sbi.h>
> #include <asm/tlbflush.h>
> #include <asm/cacheflush.h>
> @@ -78,6 +80,27 @@ static void ipi_stop(void)
> wait_for_interrupt();
> }
>
> +#ifdef CONFIG_M_MODE
> +static inline void send_ipi_single(int cpu, enum ipi_message_type
> op)
> +{
> + set_bit(op, &ipi_data[cpu].bits);
> + clint_send_ipi(cpuid_to_hartid_map(cpu));
> +}
> +
> +static inline void send_ipi_mask(const struct cpumask *mask,
> + enum ipi_message_type op)
> +{
> + int cpu;
> +
> + for_each_cpu(cpu, mask)
> + send_ipi_single(cpu, op);
> +}
> +
> +static inline void clear_ipi(void)
> +{
> + clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
> +}
> +#else /* CONFIG_M_MODE */
> static void send_ipi_mask(const struct cpumask *mask, enum
> ipi_message_type op)
> {
> int cpuid, hartid;
> @@ -103,6 +126,7 @@ static inline void clear_ipi(void)
> {
> csr_clear(CSR_SIP, SIE_SSIE);
> }
> +#endif /* CONFIG_M_MODE */
>
> void riscv_software_interrupt(void)
> {
> diff --git a/arch/riscv/kernel/smpboot.c
> b/arch/riscv/kernel/smpboot.c
> index 7462a44304fe..1b7678d86ec8 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -23,6 +23,7 @@
> #include <linux/of.h>
> #include <linux/sched/task_stack.h>
> #include <linux/sched/mm.h>
> +#include <asm/clint.h>
> #include <asm/irq.h>
> #include <asm/mmu_context.h>
> #include <asm/tlbflush.h>
> @@ -132,6 +133,8 @@ asmlinkage void __init smp_callin(void)
> {
> struct mm_struct *mm = &init_mm;
>
> + clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
> +
> /* All kernel threads share the same mm context. */
> mmgrab(mm);
> current->active_mm = mm;
> diff --git a/drivers/clocksource/timer-riscv.c
> b/drivers/clocksource/timer-riscv.c
> index 3ad2fa52bac9..09f2165bd0a5 100644
> --- a/drivers/clocksource/timer-riscv.c
> +++ b/drivers/clocksource/timer-riscv.c
> @@ -24,12 +24,16 @@
> * operations on the current hart. There is guaranteed to be
> exactly one timer
> * per hart on all RISC-V systems.
> */
> -
> static int riscv_clock_next_event(unsigned long delta,
> struct clock_event_device *ce)
> {
> csr_set(CSR_XIE, XIE_XTIE);
> +
> +#ifdef CONFIG_M_MODE
> + clint_set_timer(delta);
> +#else
> sbi_set_timer(get_cycles64() + delta);
> +#endif
> return 0;
> }
>
> @@ -45,14 +49,18 @@ static DEFINE_PER_CPU(struct clock_event_device,
> riscv_clock_event) = {
> * within one tick of each other, so while this could technically go
> * backwards when hopping between CPUs, practically it won't happen.
> */
> -static unsigned long long riscv_clocksource_rdtime(struct
> clocksource *cs)
> +static u64 riscv_sched_clock(void)
> {
> +#ifdef CONFIG_M_MODE
> + return clint_read_timer();
> +#else
> return get_cycles64();
> +#endif
> }
>
> -static u64 riscv_sched_clock(void)
> +static unsigned long long riscv_clocksource_rdtime(struct
> clocksource *cs)
> {
> - return get_cycles64();
> + return riscv_sched_clock();
> }
>
> static struct clocksource riscv_clocksource = {
--
Regards,
Atish
On Wed, Aug 21, 2019 at 12:24:31AM +0000, Atish Patra wrote:
> > +static inline void clint_set_timer(unsigned long delta)
> > +{
> > + writeq_relaxed(clint_read_timer() + delta,
> > + clint_time_cmp +
> > cpuid_to_hartid_map(smp_processor_id()));'
>
> This is not compatible with 32 bit mode. IIRC, timecmp is a 64 bit on
> RV32 as well. Here is the implementation in OpenSBI.
writeq alwasy writes 64-bit anyway, but the deltas is just 32-bit
per the Linux clocksource API.
> > +static inline cycles_t get_cycles(void)
> > +{
> > +#ifdef CONFIG_64BIT
> > + return readq_relaxed(clint_time_val);
> > +#else
> > + return readl_relaxed(clint_time_val);
> > +#endif
>
> Same comment as above. Both RV32 & RV64 bit have 64 bit have 64 bit
> precission for timer val. You have to read 32 bits at a time and "or"
> them to get 64 bit value. Here is the implementation from OpenSBI
But the Linux API is only going to read 32-bits of that, same as
for the rdtime pseudo-instruction used by the current SBI-based code.
Note that I've reworked this area a bit for v4, which I'm going to
send out soon, including cleanups to the existing code to make a few
of these things more obvious:
http://git.infradead.org/users/hch/riscv.git/shortlog/refs/heads/riscv-nommu.4
> On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
>
> No point in bloating the kernel image with a bootloader header if
> we run bare metal.
I would say the same for S-mode. EFI booting should be an option, not
a requirement. I have M-mode U-boot working with bootelf to start BBL,
and at some point, I’m hoping we can have a M-mode linux kernel be
the SBI provider for S-mode kernels, which seem most logical to me
to start using the vmlinux elf binaries using something like kexec()
>
> Signed-off-by: Christoph Hellwig <[email protected]>
> ---
> arch/riscv/kernel/head.S | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 670e5cacb24e..09fcf3d000c0 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -16,6 +16,7 @@
>
> __INIT
> ENTRY(_start)
> +#ifndef CONFIG_M_MODE
> /*
> * Image header expected by Linux boot-loaders. The image header data
> * structure is described in asm/image.h.
> @@ -47,6 +48,7 @@ ENTRY(_start)
>
> .global _start_kernel
> _start_kernel:
> +#endif /* CONFIG_M_MODE */
> /* Mask all interrupts */
> csrw CSR_XIE, zero
> csrw CSR_XIP, zero
> --
> 2.20.1
>
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, Aug 20, 2019 at 09:14:41PM -0700, Troy Benjegerdes wrote:
>
>
> > On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
> >
> > No point in bloating the kernel image with a bootloader header if
> > we run bare metal.
>
> I would say the same for S-mode. EFI booting should be an option, not
> a requirement. I have M-mode U-boot working with bootelf to start BBL,
> and at some point, I’m hoping we can have a M-mode linux kernel be
> the SBI provider for S-mode kernels, which seem most logical to me
> to start using the vmlinux elf binaries using something like kexec()
Strictly speaking we could just add another option for the header so
that you could also disable it for S-mode. But then again the header
is not very harmful, as you don't have to use it. It just eats up
a little more kernel space. And as that aspace is very tight for my
M-mode target (the Kendryte KD210) and it is totally pointless for
M-mode I just remove it there.
The idea of using M-mode Linux as the SBI sounds cool.
On Tue, 2019-08-20 at 21:14 -0700, Troy Benjegerdes wrote:
> > On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
> >
> > No point in bloating the kernel image with a bootloader header if
> > we run bare metal.
>
> I would say the same for S-mode. EFI booting should be an option, not
> a requirement.
EFI booting is never a requirement on any board. When EFI stub will be
added for kernel, it will be enabled with CONFIG_EFI_STUB only.
The current additional header is only 64 bytes and also required for
booti in U-boot. So it shouldn't disabled for S-mode.
Disabling it for M-Mode Linux is okay because of memory constraint and
M-Mode linux won't use U-boot anyways.
> I have M-mode U-boot working with bootelf to start BBL,
> and at some point, I’m hoping we can have a M-mode linux kernel be
> the SBI provider for S-mode kernels,
Why do you want bloat a M-Mode software with Linux just for SBI
implementation?
Using Linux as a last stage boot loader i.e. LinuxBoot may make sense
though.
> which seem most logical to me
> to start using the vmlinux elf binaries using something like kexec()
>
> > Signed-off-by: Christoph Hellwig <[email protected]>
> > ---
> > arch/riscv/kernel/head.S | 2 ++
> > 1 file changed, 2 insertions(+)
> >
> > diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> > index 670e5cacb24e..09fcf3d000c0 100644
> > --- a/arch/riscv/kernel/head.S
> > +++ b/arch/riscv/kernel/head.S
> > @@ -16,6 +16,7 @@
> >
> > __INIT
> > ENTRY(_start)
> > +#ifndef CONFIG_M_MODE
> > /*
> > * Image header expected by Linux boot-loaders. The image
> > header data
> > * structure is described in asm/image.h.
> > @@ -47,6 +48,7 @@ ENTRY(_start)
> >
> > .global _start_kernel
> > _start_kernel:
> > +#endif /* CONFIG_M_MODE */
> > /* Mask all interrupts */
> > csrw CSR_XIE, zero
> > csrw CSR_XIP, zero
> > --
> > 2.20.1
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > [email protected]
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
>
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv
--
Regards,
Atish
> On Aug 21, 2019, at 10:31 AM, Atish Patra <[email protected]> wrote:
>
> On Tue, 2019-08-20 at 21:14 -0700, Troy Benjegerdes wrote:
>>> On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
>>>
>>> No point in bloating the kernel image with a bootloader header if
>>> we run bare metal.
>>
>> I would say the same for S-mode. EFI booting should be an option, not
>> a requirement.
>
> EFI booting is never a requirement on any board. When EFI stub will be
> added for kernel, it will be enabled with CONFIG_EFI_STUB only.
>
> The current additional header is only 64 bytes and also required for
> booti in U-boot. So it shouldn't disabled for S-mode.
>
> Disabling it for M-Mode Linux is okay because of memory constraint and
> M-Mode linux won't use U-boot anyways.
>
>> I have M-mode U-boot working with bootelf to start BBL,
>> and at some point, I’m hoping we can have a M-mode linux kernel be
>> the SBI provider for S-mode kernels,
>
> Why do you want bloat a M-Mode software with Linux just for SBI
> implementation?
>
> Using Linux as a last stage boot loader i.e. LinuxBoot may make sense
> though.
>
Boot time, and ease of development, and simplified system management.
Having M-mode linux as a supervisor/boot kernel can get us to responding
to HTTPS/SSH/etc requests within seconds of power-on, while the ‘boot’
kernel can be loading guest S-mode kernels from things like NVME flash
drives that are going to be a lot more code and development to support in
U-boot or any other non-linux dedicated boot loader.
There’s also a very strong security argument, as Linux is going to get the
largest and broadest security review, and will likely get software updates
a lot faster than dedicated boot firmwares will.
Another reason would be sharing the same kernel binary (elf file) for both
M-mode, and S-mode, and using the device tree passed to each to specify
which mode it should be running it. There are probably a bunch of gotchas
with this idea, and even so I suspect someone will decide to go ahead and
just do it eventually because it could make testing, validation, and security
updates a lot easier from an operational/deployment point of view.
Linuxbios convinced me that if you want to do a really large cluster,
you can build, manage, and run such a thing with fewer people and
engineering cost than if you have all these extra layers of boot firmware
that require some company to have firmware engineers and lots of extra
system testing on the firmware.
> -----Original Message-----
> From: [email protected] <linux-kernel-
> [email protected]> On Behalf Of Troy Benjegerdes
> Sent: Wednesday, August 21, 2019 11:25 PM
> To: Atish Patra <[email protected]>
> Cc: [email protected]; [email protected]; [email protected];
> Damien Le Moal <[email protected]>; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH 15/15] riscv: disable the EFI PECOFF header for M-mode
>
>
>
> > On Aug 21, 2019, at 10:31 AM, Atish Patra <[email protected]> wrote:
> >
> > On Tue, 2019-08-20 at 21:14 -0700, Troy Benjegerdes wrote:
> >>> On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
> >>>
> >>> No point in bloating the kernel image with a bootloader header if we
> >>> run bare metal.
> >>
> >> I would say the same for S-mode. EFI booting should be an option, not
> >> a requirement.
> >
> > EFI booting is never a requirement on any board. When EFI stub will be
> > added for kernel, it will be enabled with CONFIG_EFI_STUB only.
> >
> > The current additional header is only 64 bytes and also required for
> > booti in U-boot. So it shouldn't disabled for S-mode.
> >
> > Disabling it for M-Mode Linux is okay because of memory constraint and
> > M-Mode linux won't use U-boot anyways.
> >
> >> I have M-mode U-boot working with bootelf to start BBL, and at some
> >> point, I’m hoping we can have a M-mode linux kernel be the SBI
> >> provider for S-mode kernels,
> >
> > Why do you want bloat a M-Mode software with Linux just for SBI
> > implementation?
> >
> > Using Linux as a last stage boot loader i.e. LinuxBoot may make sense
> > though.
> >
>
> Boot time, and ease of development, and simplified system management.
>
> Having M-mode linux as a supervisor/boot kernel can get us to responding to
> HTTPS/SSH/etc requests within seconds of power-on, while the ‘boot’
> kernel can be loading guest S-mode kernels from things like NVME flash
> drives that are going to be a lot more code and development to support in U-
> boot or any other non-linux dedicated boot loader.
I don't see why these things cannot be achieved in existing open-source
bootloaders. In fact, U-boot already has "Falcon" mode for fast booting.
>
> There’s also a very strong security argument, as Linux is going to get the
> largest and broadest security review, and will likely get software updates a
> lot faster than dedicated boot firmwares will.
For security, we have to get SW certified with various something like ISO2626
standard. This is very common practice in Automotive industry. To achieve such
a certification for any SW, the size of code base is very very important.
Due to this reason, even today Linux (and other big open-source project)
are very difficult to be security certified.
>
> Another reason would be sharing the same kernel binary (elf file) for both
> M-mode, and S-mode, and using the device tree passed to each to specify
> which mode it should be running it. There are probably a bunch of gotchas
> with this idea, and even so I suspect someone will decide to go ahead and
> just do it eventually because it could make testing, validation, and security
> updates a lot easier from an operational/deployment point of view.
>
> Linuxbios convinced me that if you want to do a really large cluster, you can
> build, manage, and run such a thing with fewer people and engineering cost
> than if you have all these extra layers of boot firmware that require some
> company to have firmware engineers and lots of extra system testing on the
> firmware.
I don't by this last argument. These days it's just very few folks doing firmware,
bootloader, and Linux porting for any new SOC (any architecture). Most of
the things are already there in various open-source project so same person
can easily contribute to various projects.
Regards,
Anup
> On Aug 21, 2019, at 4:02 PM, Anup Patel <[email protected]> wrote:
>
>
>
>> -----Original Message-----
>> From: [email protected] <linux-kernel-
>> [email protected]> On Behalf Of Troy Benjegerdes
>> Sent: Wednesday, August 21, 2019 11:25 PM
>> To: Atish Patra <[email protected]>
>> Cc: [email protected]; [email protected]; [email protected];
>> Damien Le Moal <[email protected]>; linux-
>> [email protected]; [email protected]
>> Subject: Re: [PATCH 15/15] riscv: disable the EFI PECOFF header for M-mode
>>
>>
>>
>>> On Aug 21, 2019, at 10:31 AM, Atish Patra <[email protected]> wrote:
>>>
>>> On Tue, 2019-08-20 at 21:14 -0700, Troy Benjegerdes wrote:
>>>>> On Aug 13, 2019, at 8:47 AM, Christoph Hellwig <[email protected]> wrote:
>>>>>
>>>>> No point in bloating the kernel image with a bootloader header if we
>>>>> run bare metal.
>>>>
>>>> I would say the same for S-mode. EFI booting should be an option, not
>>>> a requirement.
>>>
>>> EFI booting is never a requirement on any board. When EFI stub will be
>>> added for kernel, it will be enabled with CONFIG_EFI_STUB only.
>>>
>>> The current additional header is only 64 bytes and also required for
>>> booti in U-boot. So it shouldn't disabled for S-mode.
>>>
>>> Disabling it for M-Mode Linux is okay because of memory constraint and
>>> M-Mode linux won't use U-boot anyways.
>>>
>>>> I have M-mode U-boot working with bootelf to start BBL, and at some
>>>> point, I’m hoping we can have a M-mode linux kernel be the SBI
>>>> provider for S-mode kernels,
>>>
>>> Why do you want bloat a M-Mode software with Linux just for SBI
>>> implementation?
>>>
>>> Using Linux as a last stage boot loader i.e. LinuxBoot may make sense
>>> though.
>>>
>>
>> Boot time, and ease of development, and simplified system management.
>>
>> Having M-mode linux as a supervisor/boot kernel can get us to responding to
>> HTTPS/SSH/etc requests within seconds of power-on, while the ‘boot’
>> kernel can be loading guest S-mode kernels from things like NVME flash
>> drives that are going to be a lot more code and development to support in U-
>> boot or any other non-linux dedicated boot loader.
>
> I don't see why these things cannot be achieved in existing open-source
> bootloaders. In fact, U-boot already has "Falcon" mode for fast booting.
>
>>
>> There’s also a very strong security argument, as Linux is going to get the
>> largest and broadest security review, and will likely get software updates a
>> lot faster than dedicated boot firmwares will.
>
> For security, we have to get SW certified with various something like ISO2626
> standard. This is very common practice in Automotive industry. To achieve such
> a certification for any SW, the size of code base is very very important.
>
> Due to this reason, even today Linux (and other big open-source project)
> are very difficult to be security certified.
There’s security certified, and then there’s what I personally consider secure.
The second category is code that I know is widely audited by lots of people,
and gets quickly updated when there is a problem. I like U-boot, and I think
its a great solution for industry, it’s just not the only solution that could be
used.
>
>>
>> Another reason would be sharing the same kernel binary (elf file) for both
>> M-mode, and S-mode, and using the device tree passed to each to specify
>> which mode it should be running it. There are probably a bunch of gotchas
>> with this idea, and even so I suspect someone will decide to go ahead and
>> just do it eventually because it could make testing, validation, and security
>> updates a lot easier from an operational/deployment point of view.
>>
>> Linuxbios convinced me that if you want to do a really large cluster, you can
>> build, manage, and run such a thing with fewer people and engineering cost
>> than if you have all these extra layers of boot firmware that require some
>> company to have firmware engineers and lots of extra system testing on the
>> firmware.
>
> I don't by this last argument. These days it's just very few folks doing firmware,
> bootloader, and Linux porting for any new SOC (any architecture). Most of
> the things are already there in various open-source project so same person
> can easily contribute to various projects.
>
> Regards,
> Anup
What I see though is we’re duplicating code and work between bootloaders
and kernel, for example the SPI-NOR code, and if it was all linux, it would be
one driver model to learn/remember/track, and one place to fix things.
U-boot is great because you can boot other !linux things (like FreeBSD),
however if I was purpose building a linux cluster, I would want to be running
linux as early as possible so I can use linux scripting in bash/go/python and
talk to the queue/workload manager over a native high performance network
instead of the extremely limited ‘hush’ shell and having to discover which
user image to boot with something old and slow like dhcp/tftp/etc.
On Mon, 19 Aug 2019 03:16:48 PDT (-0700), Christoph Hellwig wrote:
> On Tue, Aug 13, 2019 at 05:29:58PM +0100, Mark Rutland wrote:
>> > + np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
>>
>> Since the MMIO layout is that of the SiFive clint, the compatible string
>> should be specific to that. e.g. "sifive,clint". That way it will be
>> possible to distinguish it from other implementations.
>>
>> What exactly is the "0" suffix for? Is that a version number?
>>
>> If that's a CPU index, then I don't think that's the right way to encode
>> this unless the programming interface actually differs across CPUs. It
>> would be better to use an explicit phandle to express the affinity.
>
> It isn't a cpu index, I suspect a version number. These show up
> in a lot of the early RISC-V DTs coming from the UCB/SiFive sphere.
> They've now spread everywhere unfortunately.
clint0 would be version 0 of the clint, with is the core-local interrupt
controller in rocket chip. It should be "sifive,clint-1.0.0", not
"riscv,clint0", as it's a SiFive widget. Unfortunately there are a lot of
legacy device trees floating around, but I'm only considering what's been
upstream to be actually part of the spec.
In this case the code should match on a "sifive,clint-1.0.0", and the device
trees should be fixed up to match. We match on "riscv,plic0" for legacy
systems, and I guess it makes sense to do something similar here.
On Tue, Aug 27, 2019 at 04:37:16PM -0700, Palmer Dabbelt wrote:
> clint0 would be version 0 of the clint, with is the core-local interrupt
> controller in rocket chip. It should be "sifive,clint-1.0.0", not
> "riscv,clint0", as it's a SiFive widget. Unfortunately there are a lot of
> legacy device trees floating around, but I'm only considering what's been
> upstream to be actually part of the spec.
>
> In this case the code should match on a "sifive,clint-1.0.0", and the
> device trees should be fixed up to match. We match on "riscv,plic0" for
> legacy systems, and I guess it makes sense to do something similar here.
IFF we decided to change it I'd rather separate DT noes for the ipi
bank vs timecmp register vs timeval to support variable layouts. The
downside is that we can't just boot on unmodified upstream qemu, which
has used the "riscv,clint0" for years.
On Tue, Aug 13, 2019 at 09:41:58PM -0700, Paul Walmsley wrote:
> On Tue, 13 Aug 2019, Christoph Hellwig wrote:
>
> > This prepare for adding native non-SBI IPI code.
> >
> > Signed-off-by: Christoph Hellwig <[email protected]>
>
> Thanks, queued for v5.4-rc1.
I still don't see it in any tree, so I'll resend it with the next
version.
On Tue, 27 Aug 2019 23:11:46 PDT (-0700), Christoph Hellwig wrote:
> On Tue, Aug 27, 2019 at 04:37:16PM -0700, Palmer Dabbelt wrote:
>> clint0 would be version 0 of the clint, with is the core-local interrupt
>> controller in rocket chip. It should be "sifive,clint-1.0.0", not
>> "riscv,clint0", as it's a SiFive widget. Unfortunately there are a lot of
>> legacy device trees floating around, but I'm only considering what's been
>> upstream to be actually part of the spec.
>>
>> In this case the code should match on a "sifive,clint-1.0.0", and the
>> device trees should be fixed up to match. We match on "riscv,plic0" for
>> legacy systems, and I guess it makes sense to do something similar here.
>
> IFF we decided to change it I'd rather separate DT noes for the ipi
> bank vs timecmp register vs timeval to support variable layouts. The
> downside is that we can't just boot on unmodified upstream qemu, which
> has used the "riscv,clint0" for years.
Like I alluded to above, matching on "riscv,clint0" seems reasonable to me as
it's a defacto standard -- we'll just have to make sure that if we ever end up
with a RISC-V CLINT that the DT entry is something else.
As far as splitting the memory maps goes, I don't have a strong opinion but it
seems like that'll introduce more complexity than it's worth.
On Tue, Sep 03, 2019 at 11:48:52AM -0700, Palmer Dabbelt wrote:
> On Tue, 27 Aug 2019 23:11:46 PDT (-0700), Christoph Hellwig wrote:
> >On Tue, Aug 27, 2019 at 04:37:16PM -0700, Palmer Dabbelt wrote:
> >>clint0 would be version 0 of the clint, with is the core-local interrupt
> >>controller in rocket chip. It should be "sifive,clint-1.0.0", not
> >>"riscv,clint0", as it's a SiFive widget. Unfortunately there are a lot of
> >>legacy device trees floating around, but I'm only considering what's been
> >>upstream to be actually part of the spec.
> >>
> >>In this case the code should match on a "sifive,clint-1.0.0", and the
> >>device trees should be fixed up to match. We match on "riscv,plic0" for
> >>legacy systems, and I guess it makes sense to do something similar here.
> >
> >IFF we decided to change it I'd rather separate DT noes for the ipi
> >bank vs timecmp register vs timeval to support variable layouts. The
> >downside is that we can't just boot on unmodified upstream qemu, which
> >has used the "riscv,clint0" for years.
>
> Like I alluded to above, matching on "riscv,clint0" seems reasonable to me
> as it's a defacto standard -- we'll just have to make sure that if we ever
> end up with a RISC-V CLINT that the DT entry is something else.
De facto, but not mandatory.
>
> As far as splitting the memory maps goes, I don't have a strong opinion but
> it seems like that'll introduce more complexity than it's worth.
>
At least the splitting can keep reminding us and any new comers in the future
that CLINT is not (yet) a must in RISC-V landscape. A previous discussion
FYI: ( https://lkml.org/lkml/2019/8/20/1361 )
> _______________________________________________
> linux-riscv mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-riscv