2020-04-07 14:47:46

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 0/9] Support strict kernel memory permissions for security

The main purpose of this patch series is changing the kernel mapping permission
, make sure that code is not writeable, data is not executable, and read-only
data is neither writable nor executable.

This patch series also supports the relevant implementations such as
ARCH_HAS_SET_MEMORY, ARCH_HAS_SET_DIRECT_MAP,
ARCH_SUPPORTS_DEBUG_PAGEALLOC and DEBUG_WX.

Changes in v4:
- Use NOKPROBE_SYMBOL instead of __kprobe annotation
- Use text_mutex instead of patch_lock
- Remove 'riscv_' prefix of function name

Changes in v3:
- Fix build error on nommu configuration. We already support nommu on
RISC-V, so we should consider nommu case and test not only rv32/64,
but also nommu.

Changes in v2:
- Use _data to specify the start of data section with write permission.
- Change ftrace patch text implementaion.
- Separate DEBUG_WX patch to another patchset.

Zong Li (9):
riscv: add macro to get instruction length
riscv: introduce interfaces to patch kernel code
riscv: patch code by fixmap mapping
riscv: add ARCH_HAS_SET_MEMORY support
riscv: add ARCH_HAS_SET_DIRECT_MAP support
riscv: add ARCH_SUPPORTS_DEBUG_PAGEALLOC support
riscv: move exception table immediately after RO_DATA
riscv: add alignment for text, rodata and data sections
riscv: add STRICT_KERNEL_RWX support

arch/riscv/Kconfig | 6 +
arch/riscv/include/asm/bug.h | 8 ++
arch/riscv/include/asm/fixmap.h | 2 +
arch/riscv/include/asm/patch.h | 12 ++
arch/riscv/include/asm/set_memory.h | 48 +++++++
arch/riscv/kernel/Makefile | 4 +-
arch/riscv/kernel/ftrace.c | 26 ++--
arch/riscv/kernel/patch.c | 120 ++++++++++++++++++
arch/riscv/kernel/traps.c | 3 +-
arch/riscv/kernel/vmlinux.lds.S | 11 +-
arch/riscv/mm/Makefile | 2 +-
arch/riscv/mm/init.c | 44 +++++++
arch/riscv/mm/pageattr.c | 187 ++++++++++++++++++++++++++++
13 files changed, 458 insertions(+), 15 deletions(-)
create mode 100644 arch/riscv/include/asm/patch.h
create mode 100644 arch/riscv/include/asm/set_memory.h
create mode 100644 arch/riscv/kernel/patch.c
create mode 100644 arch/riscv/mm/pageattr.c

--
2.26.0


2020-04-07 14:47:50

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 1/9] riscv: add macro to get instruction length

Extract the calculation of instruction length for common use.

Signed-off-by: Zong Li <[email protected]>
---
arch/riscv/include/asm/bug.h | 8 ++++++++
arch/riscv/kernel/traps.c | 3 ++-
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index 75604fec1b1b..d6f1ec08d97b 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -19,6 +19,14 @@
#define __BUG_INSN_32 _UL(0x00100073) /* ebreak */
#define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */

+#define GET_INSN_LENGTH(insn) \
+({ \
+ unsigned long __len; \
+ __len = ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? \
+ 4UL : 2UL; \
+ __len; \
+})
+
typedef u32 bug_insn_t;

#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index ffb3d94bf0cc..a4d136355f78 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -118,7 +118,8 @@ static inline unsigned long get_break_insn_length(unsigned long pc)

if (probe_kernel_address((bug_insn_t *)pc, insn))
return 0;
- return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL);
+
+ return GET_INSN_LENGTH(insn);
}

asmlinkage __visible void do_trap_break(struct pt_regs *regs)
--
2.26.0

2020-04-07 14:47:59

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 2/9] riscv: introduce interfaces to patch kernel code

On strict kernel memory permission, we couldn't patch code without
writable permission. Preserve two holes in fixmap area, so we can map
the kernel code temporarily to fixmap area, then patch the instructions.

We need two pages here because we support the compressed instruction, so
the instruction might be align to 2 bytes. When patching the 32-bit
length instruction which is 2 bytes alignment, it will across two pages.

Introduce two interfaces to patch kernel code:
riscv_patch_text_nosync:
- patch code without synchronization, it's caller's responsibility to
synchronize all CPUs if needed.
riscv_patch_text:
- patch code and always synchronize with stop_machine()

Signed-off-by: Zong Li <[email protected]>
Suggested-by: Masami Hiramatsu <[email protected]>
---
arch/riscv/include/asm/fixmap.h | 2 +
arch/riscv/include/asm/patch.h | 12 ++++
arch/riscv/kernel/Makefile | 4 +-
arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++
4 files changed, 137 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/include/asm/patch.h
create mode 100644 arch/riscv/kernel/patch.c

diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 42d2c42f3cc9..2368d49eb4ef 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -27,6 +27,8 @@ enum fixed_addresses {
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
+ FIX_TEXT_POKE1,
+ FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
__end_of_fixed_addresses
};
diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
new file mode 100644
index 000000000000..9a7d7346001e
--- /dev/null
+++ b/arch/riscv/include/asm/patch.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#ifndef _ASM_RISCV_PATCH_H
+#define _ASM_RISCV_PATCH_H
+
+int patch_text_nosync(void *addr, const void *insns, size_t len);
+int patch_text(void *addr, u32 insn);
+
+#endif /* _ASM_RISCV_PATCH_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f40205cb9a22..d189bd3d8501 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,7 +4,8 @@
#

ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_patch.o = -pg
endif

extra-y += head.o
@@ -26,6 +27,7 @@ obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
+obj-y += patch.o
obj-$(CONFIG_MMU) += vdso.o vdso/

obj-$(CONFIG_RISCV_M_MODE) += clint.o
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
new file mode 100644
index 000000000000..5b4f0d37097f
--- /dev/null
+++ b/arch/riscv/kernel/patch.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 SiFive
+ */
+
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <linux/stop_machine.h>
+#include <asm/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+
+struct patch_insn_patch {
+ void *addr;
+ u32 insn;
+ atomic_t cpu_count;
+};
+
+#ifdef CONFIG_MMU
+static void *patch_map(void *addr, int fixmap)
+{
+ uintptr_t uintaddr = (uintptr_t) addr;
+ struct page *page;
+
+ if (core_kernel_text(uintaddr))
+ page = phys_to_page(__pa_symbol(addr));
+ else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ page = vmalloc_to_page(addr);
+ else
+ return addr;
+
+ BUG_ON(!page);
+
+ return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+ (uintaddr & ~PAGE_MASK));
+}
+NOKPROBE_SYMBOL(patch_map);
+
+static void patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
+NOKPROBE_SYMBOL(patch_unmap);
+
+static int patch_insn_write(void *addr, const void *insn, size_t len)
+{
+ void *waddr = addr;
+ bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
+ int ret;
+
+ if (across_pages)
+ patch_map(addr + len, FIX_TEXT_POKE1);
+
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+ ret = probe_kernel_write(waddr, insn, len);
+
+ patch_unmap(FIX_TEXT_POKE0);
+
+ if (across_pages)
+ patch_unmap(FIX_TEXT_POKE1);
+
+ return ret;
+}
+NOKPROBE_SYMBOL(patch_insn_write);
+#else
+static int patch_insn_write(void *addr, const void *insn, size_t len)
+{
+ return probe_kernel_write(addr, insn, len);
+}
+NOKPROBE_SYMBOL(patch_insn_write);
+#endif /* CONFIG_MMU */
+
+int patch_text_nosync(void *addr, const void *insns, size_t len)
+{
+ u32 *tp = addr;
+ int ret;
+
+ ret = patch_insn_write(tp, insns, len);
+
+ if (!ret)
+ flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
+
+ return ret;
+}
+NOKPROBE_SYMBOL(patch_text_nosync);
+
+static int patch_text_cb(void *data)
+{
+ struct patch_insn_patch *patch = data;
+ int ret = 0;
+
+ if (atomic_inc_return(&patch->cpu_count) == 1) {
+ ret =
+ patch_text_nosync(patch->addr, &patch->insn,
+ GET_INSN_LENGTH(patch->insn));
+ atomic_inc(&patch->cpu_count);
+ } else {
+ while (atomic_read(&patch->cpu_count) <= num_online_cpus())
+ cpu_relax();
+ smp_mb();
+ }
+
+ return ret;
+}
+NOKPROBE_SYMBOL(patch_text_cb);
+
+int patch_text(void *addr, u32 insn)
+{
+ struct patch_insn_patch patch = {
+ .addr = addr,
+ .insn = insn,
+ .cpu_count = ATOMIC_INIT(0),
+ };
+
+ return stop_machine_cpuslocked(patch_text_cb,
+ &patch, cpu_online_mask);
+}
+NOKPROBE_SYMBOL(patch_text);
--
2.26.0

2020-04-07 14:48:14

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 6/9] riscv: add ARCH_SUPPORTS_DEBUG_PAGEALLOC support

ARCH_SUPPORTS_DEBUG_PAGEALLOC provides a hook to map and unmap
pages for debugging purposes. Implement the __kernel_map_pages
functions to fill the poison pattern.

Signed-off-by: Zong Li <[email protected]>
Reviewed-by: Palmer Dabbelt <[email protected]>
---
arch/riscv/Kconfig | 3 +++
arch/riscv/mm/pageattr.c | 13 +++++++++++++
2 files changed, 16 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a94d0f064d9c..1e1efc998baf 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -132,6 +132,9 @@ config ARCH_SELECT_MEMORY_MODEL
config ARCH_WANT_GENERAL_HUGETLB
def_bool y

+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ def_bool y
+
config SYS_SUPPORTS_HUGETLBFS
def_bool y

diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index 7be6cd67e2ef..728759eb530a 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -172,3 +172,16 @@ int set_direct_map_default_noflush(struct page *page)

return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (!debug_pagealloc_enabled())
+ return;
+
+ if (enable)
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(_PAGE_PRESENT), __pgprot(0));
+ else
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(0), __pgprot(_PAGE_PRESENT));
+}
--
2.26.0

2020-04-07 14:48:16

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 8/9] riscv: add alignment for text, rodata and data sections

The kernel mapping will tried to optimize its mapping by using bigger
size. In rv64, it tries to use PMD_SIZE, and tryies to use PGDIR_SIZE in
rv32. To ensure that the start address of these sections could fit the
mapping entry size, make them align to the biggest alignment.

Define a macro SECTION_ALIGN because the HPAGE_SIZE or PMD_SIZE, etc.,
are invisible in linker script.

This patch is prepared for STRICT_KERNEL_RWX support.

Signed-off-by: Zong Li <[email protected]>
Reviewed-by: Palmer Dabbelt <[email protected]>
---
arch/riscv/include/asm/set_memory.h | 13 +++++++++++++
arch/riscv/kernel/vmlinux.lds.S | 5 ++++-
2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 620d81c372d9..4c5bae7ca01c 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_SET_MEMORY_H
#define _ASM_RISCV_SET_MEMORY_H

+#ifndef __ASSEMBLY__
/*
* Functions to change memory attributes.
*/
@@ -24,4 +25,16 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);

+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_ARCH_HAS_STRICT_KERNEL_RWX
+#ifdef CONFIG_64BIT
+#define SECTION_ALIGN (1 << 21)
+#else
+#define SECTION_ALIGN (1 << 22)
+#endif
+#else /* !CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */
+#define SECTION_ALIGN L1_CACHE_BYTES
+#endif /* CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */
+
#endif /* _ASM_RISCV_SET_MEMORY_H */
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 02e948b620af..ef87deea0350 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -9,6 +9,7 @@
#include <asm/page.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
+#include <asm/set_memory.h>

OUTPUT_ARCH(riscv)
ENTRY(_start)
@@ -36,6 +37,7 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
__init_end = .;

+ . = ALIGN(SECTION_ALIGN);
.text : {
_text = .;
_stext = .;
@@ -53,13 +55,14 @@ SECTIONS

/* Start of data section */
_sdata = .;
- RO_DATA(L1_CACHE_BYTES)
+ RO_DATA(SECTION_ALIGN)
.srodata : {
*(.srodata*)
}

EXCEPTION_TABLE(0x10)

+ . = ALIGN(SECTION_ALIGN);
_data = .;

RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
--
2.26.0

2020-04-07 14:48:20

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 9/9] riscv: add STRICT_KERNEL_RWX support

The commit contains that make text section as non-writable, rodata
section as read-only, and data section as non-executable.

The init section should be changed to non-executable.

Signed-off-by: Zong Li <[email protected]>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/set_memory.h | 8 ++++++
arch/riscv/mm/init.c | 44 +++++++++++++++++++++++++++++
3 files changed, 53 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1e1efc998baf..58b556167d59 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -61,6 +61,7 @@ config RISCV
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select SPARSEMEM_STATIC if 32BIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 4c5bae7ca01c..c38df4771c09 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -22,6 +22,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif

+#ifdef CONFIG_STRICT_KERNEL_RWX
+void set_kernel_text_ro(void);
+void set_kernel_text_rw(void);
+#else
+static inline void set_kernel_text_ro(void) { }
+static inline void set_kernel_text_rw(void) { }
+#endif
+
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fab855963c73..b55be44ff9bd 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -12,6 +12,7 @@
#include <linux/sizes.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/set_memory.h>

#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -477,6 +478,17 @@ static void __init setup_vm_final(void)
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
}
+
+void free_initmem(void)
+{
+ unsigned long init_begin = (unsigned long)__init_begin;
+ unsigned long init_end = (unsigned long)__init_end;
+
+ /* Make the region as non-execuatble. */
+ set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT);
+ free_initmem_default(POISON_FREE_INITMEM);
+}
+
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
@@ -488,6 +500,38 @@ static inline void setup_vm_final(void)
}
#endif /* CONFIG_MMU */

+#ifdef CONFIG_STRICT_KERNEL_RWX
+void set_kernel_text_rw(void)
+{
+ unsigned long text_start = (unsigned long)_text;
+ unsigned long text_end = (unsigned long)_etext;
+
+ set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT);
+}
+
+void set_kernel_text_ro(void)
+{
+ unsigned long text_start = (unsigned long)_text;
+ unsigned long text_end = (unsigned long)_etext;
+
+ set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+}
+
+void mark_rodata_ro(void)
+{
+ unsigned long text_start = (unsigned long)_text;
+ unsigned long text_end = (unsigned long)_etext;
+ unsigned long rodata_start = (unsigned long)__start_rodata;
+ unsigned long data_start = (unsigned long)_data;
+ unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+
+ set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+}
+#endif
+
void __init paging_init(void)
{
setup_vm_final();
--
2.26.0

2020-04-07 14:48:31

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 7/9] riscv: move exception table immediately after RO_DATA

Move EXCEPTION_TABLE immediately after RO_DATA. Make it easy to set the
attribution of the sections which should be read-only at a time.
Add _data to specify the start of data section with write permission.
This patch is prepared for STRICT_KERNEL_RWX support.

Signed-off-by: Zong Li <[email protected]>
Suggested-by: Palmer Dabbelt <[email protected]>
---
arch/riscv/kernel/vmlinux.lds.S | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 1e0193ded420..02e948b620af 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -58,6 +58,10 @@ SECTIONS
*(.srodata*)
}

+ EXCEPTION_TABLE(0x10)
+
+ _data = .;
+
RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.sdata : {
__global_pointer$ = . + 0x800;
@@ -69,8 +73,6 @@ SECTIONS

BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)

- EXCEPTION_TABLE(0x10)
-
.rel.dyn : {
*(.rel.dyn*)
}
--
2.26.0

2020-04-07 14:48:51

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 3/9] riscv: patch code by fixmap mapping

On strict kernel memory permission, the ftrace have to change the
permission of text for dynamic patching the intructions. Use
riscv_patch_text_nosync() to patch code instead of probe_kernel_write.

Signed-off-by: Zong Li <[email protected]>
Suggested-by: Masami Hiramatsu <[email protected]>
---
arch/riscv/kernel/ftrace.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index c40fdcdeb950..08396614d6f4 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -7,9 +7,23 @@

#include <linux/ftrace.h>
#include <linux/uaccess.h>
+#include <linux/memory.h>
#include <asm/cacheflush.h>
+#include <asm/patch.h>

#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+{
+ mutex_lock(&text_mutex);
+ return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+{
+ mutex_unlock(&text_mutex);
+ return 0;
+}
+
static int ftrace_check_current_call(unsigned long hook_pos,
unsigned int *expected)
{
@@ -46,20 +60,14 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
{
unsigned int call[2];
unsigned int nops[2] = {NOP4, NOP4};
- int ret = 0;

make_call(hook_pos, target, call);

- /* replace the auipc-jalr pair at once */
- ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
- MCOUNT_INSN_SIZE);
- /* return must be -EPERM on write error */
- if (ret)
+ /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+ if (patch_text_nosync
+ ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
return -EPERM;

- smp_mb();
- flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE);
-
return 0;
}

--
2.26.0

2020-04-07 14:48:54

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 4/9] riscv: add ARCH_HAS_SET_MEMORY support

Add set_memory_ro/rw/x/nx architecture hooks to change the page
attribution.

Use own set_memory.h rather than generic set_memory.h
(i.e. include/asm-generic/set_memory.h), because we want to add other
function prototypes here.

Signed-off-by: Zong Li <[email protected]>
Reviewed-by: Palmer Dabbelt <[email protected]>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/set_memory.h | 24 +++++
arch/riscv/mm/Makefile | 2 +-
arch/riscv/mm/pageattr.c | 150 ++++++++++++++++++++++++++++
4 files changed, 176 insertions(+), 1 deletion(-)
create mode 100644 arch/riscv/include/asm/set_memory.h
create mode 100644 arch/riscv/mm/pageattr.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 39c93ddff0cb..9044e0dd95ea 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -59,6 +59,7 @@ config RISCV
select HAVE_EBPF_JIT if 64BIT
select EDAC_SUPPORT
select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_SET_MEMORY
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select SPARSEMEM_STATIC if 32BIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
new file mode 100644
index 000000000000..79a810f0f38b
--- /dev/null
+++ b/arch/riscv/include/asm/set_memory.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#ifndef _ASM_RISCV_SET_MEMORY_H
+#define _ASM_RISCV_SET_MEMORY_H
+
+/*
+ * Functions to change memory attributes.
+ */
+#ifdef CONFIG_MMU
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+#else
+static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
+static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
+#endif
+
+#endif /* _ASM_RISCV_SET_MEMORY_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 814e16a8d68a..363ef01c30b1 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -7,7 +7,7 @@ endif

obj-y += init.o
obj-y += extable.o
-obj-$(CONFIG_MMU) += fault.o
+obj-$(CONFIG_MMU) += fault.o pageattr.o
obj-y += cacheflush.o
obj-y += context.o

diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
new file mode 100644
index 000000000000..fcd59ef2835b
--- /dev/null
+++ b/arch/riscv/mm/pageattr.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/pagewalk.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/bitops.h>
+
+struct pageattr_masks {
+ pgprot_t set_mask;
+ pgprot_t clear_mask;
+};
+
+static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
+{
+ struct pageattr_masks *masks = walk->private;
+ unsigned long new_val = val;
+
+ new_val &= ~(pgprot_val(masks->clear_mask));
+ new_val |= (pgprot_val(masks->set_mask));
+
+ return new_val;
+}
+
+static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pgd_t val = READ_ONCE(*pgd);
+
+ if (pgd_leaf(val)) {
+ val = __pgd(set_pageattr_masks(pgd_val(val), walk));
+ set_pgd(pgd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ p4d_t val = READ_ONCE(*p4d);
+
+ if (p4d_leaf(val)) {
+ val = __p4d(set_pageattr_masks(p4d_val(val), walk));
+ set_p4d(p4d, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pud_t val = READ_ONCE(*pud);
+
+ if (pud_leaf(val)) {
+ val = __pud(set_pageattr_masks(pud_val(val), walk));
+ set_pud(pud, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pmd_t val = READ_ONCE(*pmd);
+
+ if (pmd_leaf(val)) {
+ val = __pmd(set_pageattr_masks(pmd_val(val), walk));
+ set_pmd(pmd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t val = READ_ONCE(*pte);
+
+ val = __pte(set_pageattr_masks(pte_val(val), walk));
+ set_pte(pte, val);
+
+ return 0;
+}
+
+static int pageattr_pte_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ /* Nothing to do here */
+ return 0;
+}
+
+const static struct mm_walk_ops pageattr_ops = {
+ .pgd_entry = pageattr_pgd_entry,
+ .p4d_entry = pageattr_p4d_entry,
+ .pud_entry = pageattr_pud_entry,
+ .pmd_entry = pageattr_pmd_entry,
+ .pte_entry = pageattr_pte_entry,
+ .pte_hole = pageattr_pte_hole,
+};
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+ pgprot_t clear_mask)
+{
+ int ret;
+ unsigned long start = addr;
+ unsigned long end = start + PAGE_SIZE * numpages;
+ struct pageattr_masks masks = {
+ .set_mask = set_mask,
+ .clear_mask = clear_mask
+ };
+
+ if (!numpages)
+ return 0;
+
+ down_read(&init_mm.mmap_sem);
+ ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ &masks);
+ up_read(&init_mm.mmap_sem);
+
+ flush_tlb_kernel_range(start, end);
+
+ return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
+ __pgprot(_PAGE_WRITE));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
+ __pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC));
+}
--
2.26.0

2020-04-07 14:50:03

by Zong Li

[permalink] [raw]
Subject: [PATCH v4 5/9] riscv: add ARCH_HAS_SET_DIRECT_MAP support

Add set_direct_map_*() functions for setting the direct map alias for
the page to its default permissions and to an invalid state that cannot
be cached in a TLB. (See d253ca0c ("x86/mm/cpa: Add set_direct_map_*()
functions")) Add a similar implementation for RISC-V.

Signed-off-by: Zong Li <[email protected]>
Reviewed-by: Palmer Dabbelt <[email protected]>
---
arch/riscv/Kconfig | 1 +
arch/riscv/include/asm/set_memory.h | 3 +++
arch/riscv/mm/pageattr.c | 24 ++++++++++++++++++++++++
3 files changed, 28 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 9044e0dd95ea..a94d0f064d9c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -59,6 +59,7 @@ config RISCV
select HAVE_EBPF_JIT if 64BIT
select EDAC_SUPPORT
select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select SPARSEMEM_STATIC if 32BIT
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 79a810f0f38b..620d81c372d9 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -21,4 +21,7 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif

+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
#endif /* _ASM_RISCV_SET_MEMORY_H */
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index fcd59ef2835b..7be6cd67e2ef 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -148,3 +148,27 @@ int set_memory_nx(unsigned long addr, int numpages)
{
return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC));
}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = __pgprot(0),
+ .clear_mask = __pgprot(_PAGE_PRESENT)
+ };
+
+ return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = PAGE_KERNEL,
+ .clear_mask = __pgprot(0)
+ };
+
+ return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+}
--
2.26.0

2020-04-07 16:00:06

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v4 2/9] riscv: introduce interfaces to patch kernel code

Hi Zong,

On Tue, 7 Apr 2020 22:46:47 +0800
Zong Li <[email protected]> wrote:

> On strict kernel memory permission, we couldn't patch code without
> writable permission. Preserve two holes in fixmap area, so we can map
> the kernel code temporarily to fixmap area, then patch the instructions.
>
> We need two pages here because we support the compressed instruction, so
> the instruction might be align to 2 bytes. When patching the 32-bit
> length instruction which is 2 bytes alignment, it will across two pages.
>
> Introduce two interfaces to patch kernel code:
> riscv_patch_text_nosync:
> - patch code without synchronization, it's caller's responsibility to
> synchronize all CPUs if needed.
> riscv_patch_text:
> - patch code and always synchronize with stop_machine()
>
> Signed-off-by: Zong Li <[email protected]>
> Suggested-by: Masami Hiramatsu <[email protected]>
> ---
> arch/riscv/include/asm/fixmap.h | 2 +
> arch/riscv/include/asm/patch.h | 12 ++++
> arch/riscv/kernel/Makefile | 4 +-
> arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++
> 4 files changed, 137 insertions(+), 1 deletion(-)
> create mode 100644 arch/riscv/include/asm/patch.h
> create mode 100644 arch/riscv/kernel/patch.c
>
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index 42d2c42f3cc9..2368d49eb4ef 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -27,6 +27,8 @@ enum fixed_addresses {
> FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
> FIX_PTE,
> FIX_PMD,
> + FIX_TEXT_POKE1,
> + FIX_TEXT_POKE0,
> FIX_EARLYCON_MEM_BASE,
> __end_of_fixed_addresses
> };
> diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
> new file mode 100644
> index 000000000000..9a7d7346001e
> --- /dev/null
> +++ b/arch/riscv/include/asm/patch.h
> @@ -0,0 +1,12 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 SiFive
> + */
> +
> +#ifndef _ASM_RISCV_PATCH_H
> +#define _ASM_RISCV_PATCH_H
> +
> +int patch_text_nosync(void *addr, const void *insns, size_t len);
> +int patch_text(void *addr, u32 insn);
> +
> +#endif /* _ASM_RISCV_PATCH_H */
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index f40205cb9a22..d189bd3d8501 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -4,7 +4,8 @@
> #
>
> ifdef CONFIG_FTRACE
> -CFLAGS_REMOVE_ftrace.o = -pg
> +CFLAGS_REMOVE_ftrace.o = -pg
> +CFLAGS_REMOVE_patch.o = -pg
> endif
>
> extra-y += head.o
> @@ -26,6 +27,7 @@ obj-y += traps.o
> obj-y += riscv_ksyms.o
> obj-y += stacktrace.o
> obj-y += cacheinfo.o
> +obj-y += patch.o
> obj-$(CONFIG_MMU) += vdso.o vdso/
>
> obj-$(CONFIG_RISCV_M_MODE) += clint.o
> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> new file mode 100644
> index 000000000000..5b4f0d37097f
> --- /dev/null
> +++ b/arch/riscv/kernel/patch.c
> @@ -0,0 +1,120 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2020 SiFive
> + */
> +
> +#include <linux/spinlock.h>
> +#include <linux/mm.h>
> +#include <linux/uaccess.h>
> +#include <linux/stop_machine.h>
> +#include <asm/kprobes.h>
> +#include <asm/cacheflush.h>
> +#include <asm/fixmap.h>
> +
> +struct patch_insn_patch {
> + void *addr;
> + u32 insn;
> + atomic_t cpu_count;
> +};
> +
> +#ifdef CONFIG_MMU
> +static void *patch_map(void *addr, int fixmap)
> +{
> + uintptr_t uintaddr = (uintptr_t) addr;
> + struct page *page;
> +
> + if (core_kernel_text(uintaddr))
> + page = phys_to_page(__pa_symbol(addr));
> + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
> + page = vmalloc_to_page(addr);
> + else
> + return addr;
> +
> + BUG_ON(!page);
> +
> + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
> + (uintaddr & ~PAGE_MASK));
> +}
> +NOKPROBE_SYMBOL(patch_map);
> +
> +static void patch_unmap(int fixmap)
> +{
> + clear_fixmap(fixmap);
> +}
> +NOKPROBE_SYMBOL(patch_unmap);
> +

Please leave a comment here about text_mutex,

> +static int patch_insn_write(void *addr, const void *insn, size_t len)
> +{
> + void *waddr = addr;
> + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> + int ret;

Or use lockdep_assert_held(&text_mutex); here so that user can easily
understand they have to lock the text_mutex before calling this.

Thank you,

> +
> + if (across_pages)
> + patch_map(addr + len, FIX_TEXT_POKE1);
> +
> + waddr = patch_map(addr, FIX_TEXT_POKE0);
> +
> + ret = probe_kernel_write(waddr, insn, len);
> +
> + patch_unmap(FIX_TEXT_POKE0);
> +
> + if (across_pages)
> + patch_unmap(FIX_TEXT_POKE1);
> +
> + return ret;
> +}
> +NOKPROBE_SYMBOL(patch_insn_write);
> +#else
> +static int patch_insn_write(void *addr, const void *insn, size_t len)
> +{
> + return probe_kernel_write(addr, insn, len);
> +}
> +NOKPROBE_SYMBOL(patch_insn_write);
> +#endif /* CONFIG_MMU */
> +
> +int patch_text_nosync(void *addr, const void *insns, size_t len)
> +{
> + u32 *tp = addr;
> + int ret;
> +
> + ret = patch_insn_write(tp, insns, len);
> +
> + if (!ret)
> + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> +
> + return ret;
> +}
> +NOKPROBE_SYMBOL(patch_text_nosync);
> +
> +static int patch_text_cb(void *data)
> +{
> + struct patch_insn_patch *patch = data;
> + int ret = 0;
> +
> + if (atomic_inc_return(&patch->cpu_count) == 1) {
> + ret =
> + patch_text_nosync(patch->addr, &patch->insn,
> + GET_INSN_LENGTH(patch->insn));
> + atomic_inc(&patch->cpu_count);
> + } else {
> + while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> + cpu_relax();
> + smp_mb();
> + }
> +
> + return ret;
> +}
> +NOKPROBE_SYMBOL(patch_text_cb);
> +
> +int patch_text(void *addr, u32 insn)
> +{
> + struct patch_insn_patch patch = {
> + .addr = addr,
> + .insn = insn,
> + .cpu_count = ATOMIC_INIT(0),
> + };
> +
> + return stop_machine_cpuslocked(patch_text_cb,
> + &patch, cpu_online_mask);
> +}
> +NOKPROBE_SYMBOL(patch_text);
> --
> 2.26.0
>


--
Masami Hiramatsu <[email protected]>

2020-04-07 16:03:54

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH v4 3/9] riscv: patch code by fixmap mapping

On Tue, 7 Apr 2020 22:46:48 +0800
Zong Li <[email protected]> wrote:

> On strict kernel memory permission, the ftrace have to change the
> permission of text for dynamic patching the intructions. Use
> riscv_patch_text_nosync() to patch code instead of probe_kernel_write.
>
> Signed-off-by: Zong Li <[email protected]>
> Suggested-by: Masami Hiramatsu <[email protected]>

Looks good to me.

Reviewed-by: Masami Hiramatsu <[email protected]>

Thanks!

> ---
> arch/riscv/kernel/ftrace.c | 26 +++++++++++++++++---------
> 1 file changed, 17 insertions(+), 9 deletions(-)
>
> diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
> index c40fdcdeb950..08396614d6f4 100644
> --- a/arch/riscv/kernel/ftrace.c
> +++ b/arch/riscv/kernel/ftrace.c
> @@ -7,9 +7,23 @@
>
> #include <linux/ftrace.h>
> #include <linux/uaccess.h>
> +#include <linux/memory.h>
> #include <asm/cacheflush.h>
> +#include <asm/patch.h>
>
> #ifdef CONFIG_DYNAMIC_FTRACE
> +int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
> +{
> + mutex_lock(&text_mutex);
> + return 0;
> +}
> +
> +int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
> +{
> + mutex_unlock(&text_mutex);
> + return 0;
> +}
> +
> static int ftrace_check_current_call(unsigned long hook_pos,
> unsigned int *expected)
> {
> @@ -46,20 +60,14 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
> {
> unsigned int call[2];
> unsigned int nops[2] = {NOP4, NOP4};
> - int ret = 0;
>
> make_call(hook_pos, target, call);
>
> - /* replace the auipc-jalr pair at once */
> - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
> - MCOUNT_INSN_SIZE);
> - /* return must be -EPERM on write error */
> - if (ret)
> + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
> + if (patch_text_nosync
> + ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE))
> return -EPERM;
>
> - smp_mb();
> - flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE);
> -
> return 0;
> }
>
> --
> 2.26.0
>


--
Masami Hiramatsu <[email protected]>

2020-04-08 03:54:28

by Zong Li

[permalink] [raw]
Subject: Re: [PATCH v4 2/9] riscv: introduce interfaces to patch kernel code

On Tue, Apr 7, 2020 at 11:58 PM Masami Hiramatsu <[email protected]> wrote:
>
> Hi Zong,
>
> On Tue, 7 Apr 2020 22:46:47 +0800
> Zong Li <[email protected]> wrote:
>
> > On strict kernel memory permission, we couldn't patch code without
> > writable permission. Preserve two holes in fixmap area, so we can map
> > the kernel code temporarily to fixmap area, then patch the instructions.
> >
> > We need two pages here because we support the compressed instruction, so
> > the instruction might be align to 2 bytes. When patching the 32-bit
> > length instruction which is 2 bytes alignment, it will across two pages.
> >
> > Introduce two interfaces to patch kernel code:
> > riscv_patch_text_nosync:
> > - patch code without synchronization, it's caller's responsibility to
> > synchronize all CPUs if needed.
> > riscv_patch_text:
> > - patch code and always synchronize with stop_machine()
> >
> > Signed-off-by: Zong Li <[email protected]>
> > Suggested-by: Masami Hiramatsu <[email protected]>
> > ---
> > arch/riscv/include/asm/fixmap.h | 2 +
> > arch/riscv/include/asm/patch.h | 12 ++++
> > arch/riscv/kernel/Makefile | 4 +-
> > arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++
> > 4 files changed, 137 insertions(+), 1 deletion(-)
> > create mode 100644 arch/riscv/include/asm/patch.h
> > create mode 100644 arch/riscv/kernel/patch.c
> >
> > diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> > index 42d2c42f3cc9..2368d49eb4ef 100644
> > --- a/arch/riscv/include/asm/fixmap.h
> > +++ b/arch/riscv/include/asm/fixmap.h
> > @@ -27,6 +27,8 @@ enum fixed_addresses {
> > FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
> > FIX_PTE,
> > FIX_PMD,
> > + FIX_TEXT_POKE1,
> > + FIX_TEXT_POKE0,
> > FIX_EARLYCON_MEM_BASE,
> > __end_of_fixed_addresses
> > };
> > diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
> > new file mode 100644
> > index 000000000000..9a7d7346001e
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/patch.h
> > @@ -0,0 +1,12 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (C) 2020 SiFive
> > + */
> > +
> > +#ifndef _ASM_RISCV_PATCH_H
> > +#define _ASM_RISCV_PATCH_H
> > +
> > +int patch_text_nosync(void *addr, const void *insns, size_t len);
> > +int patch_text(void *addr, u32 insn);
> > +
> > +#endif /* _ASM_RISCV_PATCH_H */
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index f40205cb9a22..d189bd3d8501 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -4,7 +4,8 @@
> > #
> >
> > ifdef CONFIG_FTRACE
> > -CFLAGS_REMOVE_ftrace.o = -pg
> > +CFLAGS_REMOVE_ftrace.o = -pg
> > +CFLAGS_REMOVE_patch.o = -pg
> > endif
> >
> > extra-y += head.o
> > @@ -26,6 +27,7 @@ obj-y += traps.o
> > obj-y += riscv_ksyms.o
> > obj-y += stacktrace.o
> > obj-y += cacheinfo.o
> > +obj-y += patch.o
> > obj-$(CONFIG_MMU) += vdso.o vdso/
> >
> > obj-$(CONFIG_RISCV_M_MODE) += clint.o
> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> > new file mode 100644
> > index 000000000000..5b4f0d37097f
> > --- /dev/null
> > +++ b/arch/riscv/kernel/patch.c
> > @@ -0,0 +1,120 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2020 SiFive
> > + */
> > +
> > +#include <linux/spinlock.h>
> > +#include <linux/mm.h>
> > +#include <linux/uaccess.h>
> > +#include <linux/stop_machine.h>
> > +#include <asm/kprobes.h>
> > +#include <asm/cacheflush.h>
> > +#include <asm/fixmap.h>
> > +
> > +struct patch_insn_patch {
> > + void *addr;
> > + u32 insn;
> > + atomic_t cpu_count;
> > +};
> > +
> > +#ifdef CONFIG_MMU
> > +static void *patch_map(void *addr, int fixmap)
> > +{
> > + uintptr_t uintaddr = (uintptr_t) addr;
> > + struct page *page;
> > +
> > + if (core_kernel_text(uintaddr))
> > + page = phys_to_page(__pa_symbol(addr));
> > + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
> > + page = vmalloc_to_page(addr);
> > + else
> > + return addr;
> > +
> > + BUG_ON(!page);
> > +
> > + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
> > + (uintaddr & ~PAGE_MASK));
> > +}
> > +NOKPROBE_SYMBOL(patch_map);
> > +
> > +static void patch_unmap(int fixmap)
> > +{
> > + clear_fixmap(fixmap);
> > +}
> > +NOKPROBE_SYMBOL(patch_unmap);
> > +
>
> Please leave a comment here about text_mutex,
>
> > +static int patch_insn_write(void *addr, const void *insn, size_t len)
> > +{
> > + void *waddr = addr;
> > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE;
> > + int ret;
>
> Or use lockdep_assert_held(&text_mutex); here so that user can easily
> understand they have to lock the text_mutex before calling this.
>

OK, it seems to me that both could be added in the next version. Thanks.

> Thank you,
>
> > +
> > + if (across_pages)
> > + patch_map(addr + len, FIX_TEXT_POKE1);
> > +
> > + waddr = patch_map(addr, FIX_TEXT_POKE0);
> > +
> > + ret = probe_kernel_write(waddr, insn, len);
> > +
> > + patch_unmap(FIX_TEXT_POKE0);
> > +
> > + if (across_pages)
> > + patch_unmap(FIX_TEXT_POKE1);
> > +
> > + return ret;
> > +}
> > +NOKPROBE_SYMBOL(patch_insn_write);
> > +#else
> > +static int patch_insn_write(void *addr, const void *insn, size_t len)
> > +{
> > + return probe_kernel_write(addr, insn, len);
> > +}
> > +NOKPROBE_SYMBOL(patch_insn_write);
> > +#endif /* CONFIG_MMU */
> > +
> > +int patch_text_nosync(void *addr, const void *insns, size_t len)
> > +{
> > + u32 *tp = addr;
> > + int ret;
> > +
> > + ret = patch_insn_write(tp, insns, len);
> > +
> > + if (!ret)
> > + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> > +
> > + return ret;
> > +}
> > +NOKPROBE_SYMBOL(patch_text_nosync);
> > +
> > +static int patch_text_cb(void *data)
> > +{
> > + struct patch_insn_patch *patch = data;
> > + int ret = 0;
> > +
> > + if (atomic_inc_return(&patch->cpu_count) == 1) {
> > + ret =
> > + patch_text_nosync(patch->addr, &patch->insn,
> > + GET_INSN_LENGTH(patch->insn));
> > + atomic_inc(&patch->cpu_count);
> > + } else {
> > + while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> > + cpu_relax();
> > + smp_mb();
> > + }
> > +
> > + return ret;
> > +}
> > +NOKPROBE_SYMBOL(patch_text_cb);
> > +
> > +int patch_text(void *addr, u32 insn)
> > +{
> > + struct patch_insn_patch patch = {
> > + .addr = addr,
> > + .insn = insn,
> > + .cpu_count = ATOMIC_INIT(0),
> > + };
> > +
> > + return stop_machine_cpuslocked(patch_text_cb,
> > + &patch, cpu_online_mask);
> > +}
> > +NOKPROBE_SYMBOL(patch_text);
> > --
> > 2.26.0
> >
>
>
> --
> Masami Hiramatsu <[email protected]>