From: Jiang Liu <[email protected]>
This patchset tries to optimize arch specfic jump label implementation
for ARM64 by dynamic kernel text patching.
To enable this feature, your toolchain must support "asm goto" extension
and "%c" constraint extesion. Current GCC for AARCH64 doesn't support
"%c", so you need a GCC patch similiar to this:
http://gcc.gnu.org/viewcvs/gcc/trunk/gcc/config/arm/arm.c?view=patch&r1=175293&r2=175565&pathrev=175565
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=48637
It has been tested on ARM Fast mode and a real hardware platform.
Any comments are welcomed!
V3->V4:
1) resolve a race condition in kernel text patching
2) address other review comments
V2->V3:
1) fix a bug in comparing signed and unsigned values
2) detect big endian by checking __AARCH64EB__
V1->V2: address review comments of V1
1) refine comments
2) add a new interface to always synchronize with stop_machine()
when patching code
3) handle endian issue when patching code
Jiang Liu (7):
arm64: introduce basic aarch64 instruction decoding helpers
arm64: introduce interfaces to hotpatch kernel and module code
arm64: move encode_insn_immediate() from module.c to insn.c
arm64: introduce aarch64_insn_gen_{nop|branch_imm}() helper functions
arm64, jump label: detect %c support for ARM64
arm64, jump label: optimize jump label implementation
jump_label: use defined macros instead of hard-coding for better
readability
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/insn.h | 110 +++++++++++++++++
arch/arm64/include/asm/jump_label.h | 52 ++++++++
arch/arm64/kernel/Makefile | 3 +-
arch/arm64/kernel/insn.c | 235 ++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/jump_label.c | 58 +++++++++
arch/arm64/kernel/module.c | 152 +++++++----------------
include/linux/jump_label.h | 15 ++-
scripts/gcc-goto.sh | 2 +-
9 files changed, 514 insertions(+), 114 deletions(-)
create mode 100644 arch/arm64/include/asm/insn.h
create mode 100644 arch/arm64/include/asm/jump_label.h
create mode 100644 arch/arm64/kernel/insn.c
create mode 100644 arch/arm64/kernel/jump_label.c
--
1.8.1.2
From: Jiang Liu <[email protected]>
Introduce basic aarch64 instruction decoding helper
aarch64_get_insn_class() and aarch64_insn_hotpatch_safe().
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/insn.h | 67 ++++++++++++++++++++++++++++++++++++++++
arch/arm64/kernel/Makefile | 2 +-
arch/arm64/kernel/insn.c | 72 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 140 insertions(+), 1 deletion(-)
create mode 100644 arch/arm64/include/asm/insn.h
create mode 100644 arch/arm64/kernel/insn.c
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
new file mode 100644
index 0000000..6190016
--- /dev/null
+++ b/arch/arm64/include/asm/insn.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_ARM64_INSN_H
+#define _ASM_ARM64_INSN_H
+#include <linux/types.h>
+
+/*
+ * ARM Architecture Reference Manual ARMv8, Section C3.1
+ * AArch64 main encoding table
+ * Bit position
+ * 28 27 26 25 Encoding Group
+ * 0 0 - - Unallocated
+ * 1 0 0 - Data processing, immediate
+ * 1 0 1 - Branch, exception generation and system instructions
+ * - 1 - 0 Loads and stores
+ * - 1 0 1 Data processing - register
+ * 0 1 1 1 Data processing - SIMD and floating point
+ * 1 1 1 1 Data processing - SIMD and floating point
+ * "-" means "don't care"
+ */
+enum aarch64_insn_encoding_class {
+ AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
+ AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
+ AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
+ AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
+ AARCH64_INSN_CLS_LDST, /* Loads and stores */
+ AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and
+ * system instructions */
+};
+
+#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
+static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
+{ return (code & (mask)) == (val); } \
+static __always_inline u32 aarch64_insn_get_##abbr##_mask(void) \
+{ return (mask); } \
+static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
+{ return (val); }
+
+__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
+__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
+__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
+__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
+__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
+__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
+__AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
+
+#undef __AARCH64_INSN_FUNCS
+
+enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+
+bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
+
+#endif /* _ASM_ARM64_INSN_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7b4b564..9af6cb3 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -9,7 +9,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
entry-fpsimd.o process.o ptrace.o setup.o signal.o \
sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o
+ hyp-stub.o psci.o insn.o
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
new file mode 100644
index 0000000..1c501f3
--- /dev/null
+++ b/arch/arm64/kernel/insn.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <[email protected]>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/insn.h>
+
+static int aarch64_insn_encoding_cls[] = {
+ AARCH64_INSN_CLS_UNKNOWN,
+ AARCH64_INSN_CLS_UNKNOWN,
+ AARCH64_INSN_CLS_UNKNOWN,
+ AARCH64_INSN_CLS_UNKNOWN,
+ AARCH64_INSN_CLS_LDST,
+ AARCH64_INSN_CLS_DP_REG,
+ AARCH64_INSN_CLS_LDST,
+ AARCH64_INSN_CLS_DP_FPSIMD,
+ AARCH64_INSN_CLS_DP_IMM,
+ AARCH64_INSN_CLS_DP_IMM,
+ AARCH64_INSN_CLS_BR_SYS,
+ AARCH64_INSN_CLS_BR_SYS,
+ AARCH64_INSN_CLS_LDST,
+ AARCH64_INSN_CLS_DP_REG,
+ AARCH64_INSN_CLS_LDST,
+ AARCH64_INSN_CLS_DP_FPSIMD,
+};
+
+enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
+{
+ return aarch64_insn_encoding_cls[(insn >> 25) & 0xf];
+}
+
+static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
+{
+ if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS)
+ return false;
+
+ return aarch64_insn_is_b(insn) ||
+ aarch64_insn_is_bl(insn) ||
+ aarch64_insn_is_svc(insn) ||
+ aarch64_insn_is_hvc(insn) ||
+ aarch64_insn_is_smc(insn) ||
+ aarch64_insn_is_brk(insn) ||
+ aarch64_insn_is_nop(insn);
+}
+
+/*
+ * ARMv8-A Section B2.6.5:
+ * Concurrent modification and execution of instructions can lead to the
+ * resulting instruction performing any behavior that can be achieved by
+ * executing any sequence of instructions that can be executed from the
+ * same Exception level, except where the instruction before modification
+ * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC,
+ * or SMC instruction.
+ */
+bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
+{
+ return __aarch64_insn_hotpatch_safe(old_insn) &&
+ __aarch64_insn_hotpatch_safe(new_insn);
+}
--
1.8.1.2
From: Jiang Liu <[email protected]>
Introduce three interfaces to patch kernel and module code:
aarch64_insn_patch_text_nosync():
patch code without synchronization, it's caller's responsibility
to synchronize all CPUs if needed.
aarch64_insn_patch_text_sync():
patch code and always synchronize with stop_machine()
aarch64_insn_patch_text():
patch code and synchronize with stop_machine() if needed
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/insn.h | 24 +++++++++++++-
arch/arm64/kernel/insn.c | 77 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 6190016..fc439b9 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -60,8 +60,30 @@ __AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
#undef __AARCH64_INSN_FUNCS
-enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian. On the other hand, SCTLR_EL1.EE (bit 25, Exception Endianness)
+ * flag controls endianness for EL1 explicit data accesses and stage 1
+ * translation table walks as below:
+ * 0: little-endian
+ * 1: big-endian
+ * So need to handle endianness when patching kernel code.
+ */
+static __always_inline u32 aarch64_insn_read(void *addr)
+{
+ return le32_to_cpu(*(u32 *)addr);
+}
+static __always_inline void aarch64_insn_write(void *addr, u32 insn)
+{
+ *(u32 *)addr = cpu_to_le32(insn);
+}
+
+enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
+int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
+int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+
#endif /* _ASM_ARM64_INSN_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 1c501f3..8dd5fbe 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -16,6 +16,9 @@
*/
#include <linux/compiler.h>
#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/stop_machine.h>
+#include <asm/cacheflush.h>
#include <asm/insn.h>
static int aarch64_insn_encoding_cls[] = {
@@ -70,3 +73,77 @@ bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
return __aarch64_insn_hotpatch_safe(old_insn) &&
__aarch64_insn_hotpatch_safe(new_insn);
}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[],
+ int cnt)
+{
+ int i;
+ u32 *tp;
+
+ if (cnt <= 0)
+ return -EINVAL;
+
+ for (i = 0; i < cnt; i++) {
+ tp = addrs[i];
+ /* A64 instructions must be word aligned */
+ if ((uintptr_t)tp & 0x3)
+ return -EINVAL;
+ aarch64_insn_write(tp, insns[i]);
+ flush_icache_range((uintptr_t)tp, (uintptr_t)tp + sizeof(u32));
+ }
+
+ return 0;
+}
+
+struct aarch64_insn_patch {
+ void **text_addrs;
+ u32 *new_insns;
+ int insn_cnt;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+ struct aarch64_insn_patch *pp = arg;
+
+ return aarch64_insn_patch_text_nosync(pp->text_addrs, pp->new_insns,
+ pp->insn_cnt);
+}
+
+int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
+{
+ struct aarch64_insn_patch patch = {
+ .text_addrs = addrs,
+ .new_insns = insns,
+ .insn_cnt = cnt,
+ };
+
+ if (cnt <= 0)
+ return -EINVAL;
+
+ /*
+ * Execute __aarch64_insn_patch_text() on every online CPU,
+ * which ensure serialization among all online CPUs.
+ */
+ return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+ int ret;
+
+ if (cnt == 1 && aarch64_insn_hotpatch_safe(aarch64_insn_read(addrs[0]),
+ insns[0])) {
+ /*
+ * It doesn't guarantee all CPUs see the new instruction
+ * after returning from aarch64_insn_patch_text_nosync().
+ * So send a IPI to all other CPUs to achieve instruction
+ * synchronization.
+ */
+ ret = aarch64_insn_patch_text_nosync(addrs, insns, cnt);
+ kick_all_cpus_sync();
+ } else {
+ ret = aarch64_insn_patch_text_sync(addrs, insns, cnt);
+ }
+
+ return ret;
+}
--
1.8.1.2
From: Jiang Liu <[email protected]>
Function encode_insn_immediate() will be used by other instruction
manipulate related functions, so move it into insn.c and rename it
as aarch64_insn_encode_immediate().
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/insn.h | 14 ++++
arch/arm64/kernel/insn.c | 58 ++++++++++++++++
arch/arm64/kernel/module.c | 152 +++++++++++++-----------------------------
3 files changed, 117 insertions(+), 107 deletions(-)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index fc439b9..d62b601 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -42,6 +42,18 @@ enum aarch64_insn_encoding_class {
* system instructions */
};
+enum aarch64_insn_imm_type {
+ AARCH64_INSN_IMM_MOVK,
+ AARCH64_INSN_IMM_ADR,
+ AARCH64_INSN_IMM_26,
+ AARCH64_INSN_IMM_19,
+ AARCH64_INSN_IMM_16,
+ AARCH64_INSN_IMM_14,
+ AARCH64_INSN_IMM_12,
+ AARCH64_INSN_IMM_9,
+ AARCH64_INSN_IMM_MAX
+};
+
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ return (code & (mask)) == (val); } \
@@ -80,6 +92,8 @@ static __always_inline void aarch64_insn_write(void *addr, u32 insn)
}
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+ u32 insn, u64 imm);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 8dd5fbe..344d23ed 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -147,3 +147,61 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
return ret;
}
+
+u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+ u32 insn, u64 imm)
+{
+ u32 immlo, immhi, lomask, himask, mask;
+ int shift;
+
+ switch (type) {
+ case AARCH64_INSN_IMM_MOVK:
+ mask = BIT(16) - 1;
+ shift = 5;
+ break;
+ case AARCH64_INSN_IMM_ADR:
+ lomask = 0x3;
+ himask = 0x7ffff;
+ immlo = imm & lomask;
+ imm >>= 2;
+ immhi = imm & himask;
+ imm = (immlo << 24) | (immhi);
+ mask = (lomask << 24) | (himask);
+ shift = 5;
+ break;
+ case AARCH64_INSN_IMM_26:
+ mask = BIT(26) - 1;
+ shift = 0;
+ break;
+ case AARCH64_INSN_IMM_19:
+ mask = BIT(19) - 1;
+ shift = 5;
+ break;
+ case AARCH64_INSN_IMM_16:
+ mask = BIT(16) - 1;
+ shift = 5;
+ break;
+ case AARCH64_INSN_IMM_14:
+ mask = BIT(14) - 1;
+ shift = 5;
+ break;
+ case AARCH64_INSN_IMM_12:
+ mask = BIT(12) - 1;
+ shift = 10;
+ break;
+ case AARCH64_INSN_IMM_9:
+ mask = BIT(9) - 1;
+ shift = 12;
+ break;
+ default:
+ pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+ type);
+ return 0;
+ }
+
+ /* Update the immediate field. */
+ insn &= ~(mask << shift);
+ insn |= (imm & mask) << shift;
+
+ return insn;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index ca0e3d5..695a2dd 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -25,6 +25,9 @@
#include <linux/mm.h>
#include <linux/moduleloader.h>
#include <linux/vmalloc.h>
+#include <asm/insn.h>
+
+#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX
void *module_alloc(unsigned long size)
{
@@ -94,25 +97,19 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
return 0;
}
-enum aarch64_imm_type {
- INSN_IMM_MOVNZ,
- INSN_IMM_MOVK,
- INSN_IMM_ADR,
- INSN_IMM_26,
- INSN_IMM_19,
- INSN_IMM_16,
- INSN_IMM_14,
- INSN_IMM_12,
- INSN_IMM_9,
-};
-
-static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
+static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
+ int lsb, enum aarch64_insn_imm_type imm_type)
{
- u32 immlo, immhi, lomask, himask, mask;
- int shift;
+ u64 imm, limit = 0;
+ s64 sval;
+ u32 insn = *(u32 *)place;
+
+ sval = do_reloc(op, place, val);
+ sval >>= lsb;
+ imm = sval & 0xffff;
+
- switch (type) {
- case INSN_IMM_MOVNZ:
+ if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
/*
* For signed MOVW relocations, we have to manipulate the
* instruction encoding depending on whether or not the
@@ -131,70 +128,11 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm)
*/
imm = ~imm;
}
- case INSN_IMM_MOVK:
- mask = BIT(16) - 1;
- shift = 5;
- break;
- case INSN_IMM_ADR:
- lomask = 0x3;
- himask = 0x7ffff;
- immlo = imm & lomask;
- imm >>= 2;
- immhi = imm & himask;
- imm = (immlo << 24) | (immhi);
- mask = (lomask << 24) | (himask);
- shift = 5;
- break;
- case INSN_IMM_26:
- mask = BIT(26) - 1;
- shift = 0;
- break;
- case INSN_IMM_19:
- mask = BIT(19) - 1;
- shift = 5;
- break;
- case INSN_IMM_16:
- mask = BIT(16) - 1;
- shift = 5;
- break;
- case INSN_IMM_14:
- mask = BIT(14) - 1;
- shift = 5;
- break;
- case INSN_IMM_12:
- mask = BIT(12) - 1;
- shift = 10;
- break;
- case INSN_IMM_9:
- mask = BIT(9) - 1;
- shift = 12;
- break;
- default:
- pr_err("encode_insn_immediate: unknown immediate encoding %d\n",
- type);
- return 0;
+ imm_type = AARCH64_INSN_IMM_MOVK;
}
- /* Update the immediate field. */
- insn &= ~(mask << shift);
- insn |= (imm & mask) << shift;
-
- return insn;
-}
-
-static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
- int lsb, enum aarch64_imm_type imm_type)
-{
- u64 imm, limit = 0;
- s64 sval;
- u32 insn = *(u32 *)place;
-
- sval = do_reloc(op, place, val);
- sval >>= lsb;
- imm = sval & 0xffff;
-
/* Update the instruction with the new encoding. */
- *(u32 *)place = encode_insn_immediate(imm_type, insn, imm);
+ *(u32 *)place = aarch64_insn_encode_immediate(imm_type, insn, imm);
/* Shift out the immediate field. */
sval >>= 16;
@@ -203,9 +141,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
* For unsigned immediates, the overflow check is straightforward.
* For signed immediates, the sign bit is actually the bit past the
* most significant bit of the field.
- * The INSN_IMM_16 immediate type is unsigned.
+ * The AARCH64_INSN_IMM_16 immediate type is unsigned.
*/
- if (imm_type != INSN_IMM_16) {
+ if (imm_type != AARCH64_INSN_IMM_16) {
sval++;
limit++;
}
@@ -218,7 +156,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
}
static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
- int lsb, int len, enum aarch64_imm_type imm_type)
+ int lsb, int len, enum aarch64_insn_imm_type imm_type)
{
u64 imm, imm_mask;
s64 sval;
@@ -233,7 +171,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val,
imm = sval & imm_mask;
/* Update the instruction's immediate field. */
- *(u32 *)place = encode_insn_immediate(imm_type, insn, imm);
+ *(u32 *)place = aarch64_insn_encode_immediate(imm_type, insn, imm);
/*
* Extract the upper value bits (including the sign bit) and
@@ -315,125 +253,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
overflow_check = false;
case R_AARCH64_MOVW_UABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- INSN_IMM_16);
+ AARCH64_INSN_IMM_16);
break;
case R_AARCH64_MOVW_UABS_G1_NC:
overflow_check = false;
case R_AARCH64_MOVW_UABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- INSN_IMM_16);
+ AARCH64_INSN_IMM_16);
break;
case R_AARCH64_MOVW_UABS_G2_NC:
overflow_check = false;
case R_AARCH64_MOVW_UABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- INSN_IMM_16);
+ AARCH64_INSN_IMM_16);
break;
case R_AARCH64_MOVW_UABS_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
- INSN_IMM_16);
+ AARCH64_INSN_IMM_16);
break;
case R_AARCH64_MOVW_SABS_G0:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_SABS_G1:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_SABS_G2:
ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_PREL_G0_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- INSN_IMM_MOVK);
+ AARCH64_INSN_IMM_MOVK);
break;
case R_AARCH64_MOVW_PREL_G0:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_PREL_G1_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- INSN_IMM_MOVK);
+ AARCH64_INSN_IMM_MOVK);
break;
case R_AARCH64_MOVW_PREL_G1:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_PREL_G2_NC:
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- INSN_IMM_MOVK);
+ AARCH64_INSN_IMM_MOVK);
break;
case R_AARCH64_MOVW_PREL_G2:
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
case R_AARCH64_MOVW_PREL_G3:
/* We're using the top bits so we can't overflow. */
overflow_check = false;
ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48,
- INSN_IMM_MOVNZ);
+ AARCH64_INSN_IMM_MOVNZ);
break;
/* Immediate instruction relocations. */
case R_AARCH64_LD_PREL_LO19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- INSN_IMM_19);
+ AARCH64_INSN_IMM_19);
break;
case R_AARCH64_ADR_PREL_LO21:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21,
- INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR);
break;
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
case R_AARCH64_ADR_PREL_PG_HI21:
ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21,
- INSN_IMM_ADR);
+ AARCH64_INSN_IMM_ADR);
break;
case R_AARCH64_ADD_ABS_LO12_NC:
case R_AARCH64_LDST8_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12,
- INSN_IMM_12);
+ AARCH64_INSN_IMM_12);
break;
case R_AARCH64_LDST16_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11,
- INSN_IMM_12);
+ AARCH64_INSN_IMM_12);
break;
case R_AARCH64_LDST32_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10,
- INSN_IMM_12);
+ AARCH64_INSN_IMM_12);
break;
case R_AARCH64_LDST64_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9,
- INSN_IMM_12);
+ AARCH64_INSN_IMM_12);
break;
case R_AARCH64_LDST128_ABS_LO12_NC:
overflow_check = false;
ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8,
- INSN_IMM_12);
+ AARCH64_INSN_IMM_12);
break;
case R_AARCH64_TSTBR14:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14,
- INSN_IMM_14);
+ AARCH64_INSN_IMM_14);
break;
case R_AARCH64_CONDBR19:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19,
- INSN_IMM_19);
+ AARCH64_INSN_IMM_19);
break;
case R_AARCH64_JUMP26:
case R_AARCH64_CALL26:
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
- INSN_IMM_26);
+ AARCH64_INSN_IMM_26);
break;
default:
--
1.8.1.2
From: Jiang Liu <[email protected]>
Introduce aarch64_insn_gen_{nop|branch_imm}() helper functions, which
will be used to implement jump label on ARM64.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/include/asm/insn.h | 7 +++++++
arch/arm64/kernel/insn.c | 28 ++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index d62b601..ac129a8 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -94,6 +94,13 @@ static __always_inline void aarch64_insn_write(void *addr, u32 insn)
enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
u32 insn, u64 imm);
+u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
+ bool link);
+static __always_inline u32 aarch64_insn_gen_nop(void)
+{
+ return aarch64_insn_get_nop_value();
+}
+
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 344d23ed..a59f71d 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -14,6 +14,7 @@
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/smp.h>
@@ -205,3 +206,30 @@ u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
return insn;
}
+
+u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, bool link)
+{
+ u32 insn;
+ long offset;
+
+ /*
+ * PC: A 64-bit Program Counter holding the address of the current
+ * instruction. A64 instructions may be word-aligned.
+ */
+ BUG_ON((pc & 0x3) || (addr & 0x3));
+
+ /*
+ * B/BL support [-128M, 128M) offset
+ * ARM64 virtual address arrangement garantees all kernel and module
+ * texts are within +/-128M.
+ */
+ offset = ((long)addr - (long)pc) >> 2;
+ BUG_ON(abs(offset) > BIT(25) || offset == BIT(25));
+
+ if (link)
+ insn = aarch64_insn_get_bl_value();
+ else
+ insn = aarch64_insn_get_b_value();
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, offset);
+}
--
1.8.1.2
From: Jiang Liu <[email protected]>
As commit a9468f30b5eac6 "ARM: 7333/2: jump label: detect %c
support for ARM", this patch detects the same thing for ARM64
because some ARM64 GCC versions have the same issue.
Some versions of ARM64 GCC which do support asm goto, do not
support the %c specifier. Since we need the %c to support jump
labels on ARM64, detect that too in the asm goto detection script
to avoid build errors with these versions.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
scripts/gcc-goto.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh
index a2af2e8..c9469d3 100644
--- a/scripts/gcc-goto.sh
+++ b/scripts/gcc-goto.sh
@@ -5,7 +5,7 @@
cat << "END" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y"
int main(void)
{
-#ifdef __arm__
+#if defined(__arm__) || defined(__aarch64__)
/*
* Not related to asm goto, but used by jump label
* and broken on some ARM GCC versions (see GCC Bug 48637).
--
1.8.1.2
From: Jiang Liu <[email protected]>
Optimize jump label implementation for ARM64 by dynamically patching
kernel text.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/jump_label.h | 52 +++++++++++++++++++++++++++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/jump_label.c | 58 +++++++++++++++++++++++++++++++++++++
4 files changed, 112 insertions(+)
create mode 100644 arch/arm64/include/asm/jump_label.h
create mode 100644 arch/arm64/kernel/jump_label.c
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c044548..da388e4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -17,6 +17,7 @@ config ARM64
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select HARDIRQS_SW_RESEND
+ select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_TRACEHOOK
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
new file mode 100644
index 0000000..d268fab
--- /dev/null
+++ b/arch/arm64/include/asm/jump_label.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <[email protected]>
+ *
+ * Based on arch/arm/include/asm/jump_label.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_ARM64_JUMP_LABEL_H
+#define _ASM_ARM64_JUMP_LABEL_H
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+static __always_inline bool arch_static_branch(struct static_key *key)
+{
+ asm goto("1:\n\t"
+ "nop\n\t"
+ ".pushsection __jump_table, \"aw\"\n\t"
+ ".align 3\n\t"
+ ".quad 1b, %l[l_yes], %c0\n\t"
+ ".popsection\n\t"
+ : : "i"(key) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __KERNEL__ */
+
+typedef u64 jump_label_t;
+
+struct jump_entry {
+ jump_label_t code;
+ jump_label_t target;
+ jump_label_t key;
+};
+
+#endif /* _ASM_ARM64_JUMP_LABEL_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 9af6cb3..b7db65e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o smp_psci.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o
arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
+arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-y += $(arm64-obj-y) vdso/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
new file mode 100644
index 0000000..871786a
--- /dev/null
+++ b/arch/arm64/kernel/jump_label.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2013 Huawei Ltd.
+ * Author: Jiang Liu <[email protected]>
+ *
+ * Based on arch/arm/kernel/jump_label.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <asm/jump_label.h>
+#include <asm/insn.h>
+
+#ifdef HAVE_JUMP_LABEL
+
+static void __arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type,
+ bool is_static)
+{
+ void *addr = (void *)entry->code;
+ u32 insn;
+
+ if (type == JUMP_LABEL_ENABLE) {
+ insn = aarch64_insn_gen_branch_imm(entry->code,
+ entry->target, 0);
+ } else {
+ insn = aarch64_insn_gen_nop();
+ }
+
+ if (is_static)
+ aarch64_insn_patch_text_nosync(&addr, &insn, 1);
+ else
+ aarch64_insn_patch_text(&addr, &insn, 1);
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ __arch_jump_label_transform(entry, type, false);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ __arch_jump_label_transform(entry, type, true);
+}
+
+#endif /* HAVE_JUMP_LABEL */
--
1.8.1.2
From: Jiang Liu <[email protected]>
Use macro JUMP_LABEL_TRUE_BRANCH instead of hard-coding for better
readability.
Signed-off-by: Jiang Liu <[email protected]>
Cc: Jiang Liu <[email protected]>
---
include/linux/jump_label.h | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index a507907..6e54029 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -74,18 +74,21 @@ struct module;
#include <linux/atomic.h>
#ifdef HAVE_JUMP_LABEL
-#define JUMP_LABEL_TRUE_BRANCH 1UL
+#define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL
+#define JUMP_LABEL_TYPE_TRUE_BRANCH 1UL
+#define JUMP_LABEL_TYPE_MASK 1UL
static
inline struct jump_entry *jump_label_get_entries(struct static_key *key)
{
return (struct jump_entry *)((unsigned long)key->entries
- & ~JUMP_LABEL_TRUE_BRANCH);
+ & ~JUMP_LABEL_TYPE_MASK);
}
static inline bool jump_label_get_branch_default(struct static_key *key)
{
- if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH)
+ if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) ==
+ JUMP_LABEL_TYPE_TRUE_BRANCH)
return true;
return false;
}
@@ -116,9 +119,11 @@ extern void static_key_slow_dec(struct static_key *key);
extern void jump_label_apply_nops(struct module *mod);
#define STATIC_KEY_INIT_TRUE ((struct static_key) \
- { .enabled = ATOMIC_INIT(1), .entries = (void *)1 })
+ { .enabled = ATOMIC_INIT(1), \
+ .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH })
#define STATIC_KEY_INIT_FALSE ((struct static_key) \
- { .enabled = ATOMIC_INIT(0), .entries = (void *)0 })
+ { .enabled = ATOMIC_INIT(0), \
+ .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH })
#else /* !HAVE_JUMP_LABEL */
--
1.8.1.2
On Thu, Oct 17, 2013 at 07:19:34AM +0100, Jiang Liu wrote:
> From: Jiang Liu <[email protected]>
>
> Introduce basic aarch64 instruction decoding helper
> aarch64_get_insn_class() and aarch64_insn_hotpatch_safe().
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: Jiang Liu <[email protected]>
> ---
> arch/arm64/include/asm/insn.h | 67 ++++++++++++++++++++++++++++++++++++++++
> arch/arm64/kernel/Makefile | 2 +-
> arch/arm64/kernel/insn.c | 72 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 140 insertions(+), 1 deletion(-)
> create mode 100644 arch/arm64/include/asm/insn.h
> create mode 100644 arch/arm64/kernel/insn.c
>
> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
> new file mode 100644
> index 0000000..6190016
> --- /dev/null
> +++ b/arch/arm64/include/asm/insn.h
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright (C) 2013 Huawei Ltd.
> + * Author: Jiang Liu <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef _ASM_ARM64_INSN_H
> +#define _ASM_ARM64_INSN_H
We've tried to be consistent using __ASM_<name>_H here.
> +#include <linux/types.h>
> +
> +/*
> + * ARM Architecture Reference Manual ARMv8, Section C3.1
> + * AArch64 main encoding table
> + * Bit position
> + * 28 27 26 25 Encoding Group
> + * 0 0 - - Unallocated
> + * 1 0 0 - Data processing, immediate
> + * 1 0 1 - Branch, exception generation and system instructions
> + * - 1 - 0 Loads and stores
> + * - 1 0 1 Data processing - register
> + * 0 1 1 1 Data processing - SIMD and floating point
> + * 1 1 1 1 Data processing - SIMD and floating point
> + * "-" means "don't care"
> + */
> +enum aarch64_insn_encoding_class {
> + AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
> + AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
> + AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
> + AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
> + AARCH64_INSN_CLS_LDST, /* Loads and stores */
> + AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and
> + * system instructions */
> +};
> +
> +#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
> +static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
> +{ return (code & (mask)) == (val); } \
> +static __always_inline u32 aarch64_insn_get_##abbr##_mask(void) \
> +{ return (mask); } \
> +static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
> +{ return (val); }
> +
> +__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
> +__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
> +__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
> +__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
> +__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
> +__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
> +__AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
> +
> +#undef __AARCH64_INSN_FUNCS
> +
> +enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
> +
> +bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
> +
> +#endif /* _ASM_ARM64_INSN_H */
> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
> index 7b4b564..9af6cb3 100644
> --- a/arch/arm64/kernel/Makefile
> +++ b/arch/arm64/kernel/Makefile
> @@ -9,7 +9,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
> arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
> entry-fpsimd.o process.o ptrace.o setup.o signal.o \
> sys.o stacktrace.o time.o traps.o io.o vdso.o \
> - hyp-stub.o psci.o
> + hyp-stub.o psci.o insn.o
>
> arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
> sys_compat.o
> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
> new file mode 100644
> index 0000000..1c501f3
> --- /dev/null
> +++ b/arch/arm64/kernel/insn.c
> @@ -0,0 +1,72 @@
> +/*
> + * Copyright (C) 2013 Huawei Ltd.
> + * Author: Jiang Liu <[email protected]>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +#include <linux/compiler.h>
> +#include <linux/kernel.h>
> +#include <asm/insn.h>
> +
> +static int aarch64_insn_encoding_cls[] = {
> + AARCH64_INSN_CLS_UNKNOWN,
> + AARCH64_INSN_CLS_UNKNOWN,
> + AARCH64_INSN_CLS_UNKNOWN,
> + AARCH64_INSN_CLS_UNKNOWN,
> + AARCH64_INSN_CLS_LDST,
> + AARCH64_INSN_CLS_DP_REG,
> + AARCH64_INSN_CLS_LDST,
> + AARCH64_INSN_CLS_DP_FPSIMD,
> + AARCH64_INSN_CLS_DP_IMM,
> + AARCH64_INSN_CLS_DP_IMM,
> + AARCH64_INSN_CLS_BR_SYS,
> + AARCH64_INSN_CLS_BR_SYS,
> + AARCH64_INSN_CLS_LDST,
> + AARCH64_INSN_CLS_DP_REG,
> + AARCH64_INSN_CLS_LDST,
> + AARCH64_INSN_CLS_DP_FPSIMD,
> +};
> +
> +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
> +{
> + return aarch64_insn_encoding_cls[(insn >> 25) & 0xf];
> +}
For some reason, I just read `cls' as `count leading set', so you could just
use `class' here to help lazy readers like me :)
> +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
> +{
> + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS)
> + return false;
> +
> + return aarch64_insn_is_b(insn) ||
> + aarch64_insn_is_bl(insn) ||
> + aarch64_insn_is_svc(insn) ||
> + aarch64_insn_is_hvc(insn) ||
> + aarch64_insn_is_smc(insn) ||
> + aarch64_insn_is_brk(insn) ||
> + aarch64_insn_is_nop(insn);
> +}
> +
> +/*
> + * ARMv8-A Section B2.6.5:
> + * Concurrent modification and execution of instructions can lead to the
> + * resulting instruction performing any behavior that can be achieved by
> + * executing any sequence of instructions that can be executed from the
> + * same Exception level, except where the instruction before modification
> + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC,
> + * or SMC instruction.
> + */
> +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
> +{
> + return __aarch64_insn_hotpatch_safe(old_insn) &&
> + __aarch64_insn_hotpatch_safe(new_insn);
> +}
With those cosmetic changes:
Reviewed-by: Will Deacon <[email protected]>
Will
[adding Tixy for stop_machine() question below]
On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
> From: Jiang Liu <[email protected]>
>
> Introduce three interfaces to patch kernel and module code:
> aarch64_insn_patch_text_nosync():
> patch code without synchronization, it's caller's responsibility
> to synchronize all CPUs if needed.
> aarch64_insn_patch_text_sync():
> patch code and always synchronize with stop_machine()
> aarch64_insn_patch_text():
> patch code and synchronize with stop_machine() if needed
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: Jiang Liu <[email protected]>
> ---
> arch/arm64/include/asm/insn.h | 24 +++++++++++++-
> arch/arm64/kernel/insn.c | 77 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 100 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
> index 6190016..fc439b9 100644
> --- a/arch/arm64/include/asm/insn.h
> +++ b/arch/arm64/include/asm/insn.h
> @@ -60,8 +60,30 @@ __AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
>
> #undef __AARCH64_INSN_FUNCS
>
> -enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
> +/*
> + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
> + * little-endian. On the other hand, SCTLR_EL1.EE (bit 25, Exception Endianness)
> + * flag controls endianness for EL1 explicit data accesses and stage 1
> + * translation table walks as below:
> + * 0: little-endian
> + * 1: big-endian
> + * So need to handle endianness when patching kernel code.
> + */
You can delete this comment now that we're using the helpers...
> +static __always_inline u32 aarch64_insn_read(void *addr)
> +{
> + return le32_to_cpu(*(u32 *)addr);
> +}
>
> +static __always_inline void aarch64_insn_write(void *addr, u32 insn)
> +{
> + *(u32 *)addr = cpu_to_le32(insn);
> +}
... then just inline these calls directly.
> +enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
> bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
>
> +int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
> +int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
> +int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
> +
> #endif /* _ASM_ARM64_INSN_H */
> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
> index 1c501f3..8dd5fbe 100644
> --- a/arch/arm64/kernel/insn.c
> +++ b/arch/arm64/kernel/insn.c
> @@ -16,6 +16,9 @@
> */
> #include <linux/compiler.h>
> #include <linux/kernel.h>
> +#include <linux/smp.h>
> +#include <linux/stop_machine.h>
> +#include <asm/cacheflush.h>
> #include <asm/insn.h>
>
> static int aarch64_insn_encoding_cls[] = {
> @@ -70,3 +73,77 @@ bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
> return __aarch64_insn_hotpatch_safe(old_insn) &&
> __aarch64_insn_hotpatch_safe(new_insn);
> }
> +
> +int __kprobes aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[],
> + int cnt)
> +{
> + int i;
> + u32 *tp;
> +
> + if (cnt <= 0)
> + return -EINVAL;
Isn't cnt always 1 for the _nosync patching? Can you just drop the argument
and simplify this code? Patching a sequence without syncing is always racy.
> + for (i = 0; i < cnt; i++) {
> + tp = addrs[i];
> + /* A64 instructions must be word aligned */
> + if ((uintptr_t)tp & 0x3)
> + return -EINVAL;
> + aarch64_insn_write(tp, insns[i]);
> + flush_icache_range((uintptr_t)tp, (uintptr_t)tp + sizeof(u32));
> + }
> +
> + return 0;
> +}
> +
> +struct aarch64_insn_patch {
> + void **text_addrs;
> + u32 *new_insns;
> + int insn_cnt;
> +};
> +
> +static int __kprobes aarch64_insn_patch_text_cb(void *arg)
> +{
> + struct aarch64_insn_patch *pp = arg;
> +
> + return aarch64_insn_patch_text_nosync(pp->text_addrs, pp->new_insns,
> + pp->insn_cnt);
> +}
> +
> +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
> +{
> + struct aarch64_insn_patch patch = {
> + .text_addrs = addrs,
> + .new_insns = insns,
> + .insn_cnt = cnt,
> + };
> +
> + if (cnt <= 0)
> + return -EINVAL;
> +
> + /*
> + * Execute __aarch64_insn_patch_text() on every online CPU,
> + * which ensure serialization among all online CPUs.
> + */
> + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
> +}
Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
*one* CPU, which is the right thing to do. However, the arch/arm/ call to
stop_machine in kprobes does actually run the patching code on *all* the
online cores (including the cache flushing!). I think this is to work around
cores without hardware cache maintenance broadcasting, but that could easily
be called out specially (like we do in patch.c) and the flushing could be
separated from the patching too.
> +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
> +{
> + int ret;
> +
> + if (cnt == 1 && aarch64_insn_hotpatch_safe(aarch64_insn_read(addrs[0]),
> + insns[0])) {
You could make aarch64_insn_hotpatch_safe take the cnt parameter and return
false if cnt != 1.
> + /*
> + * It doesn't guarantee all CPUs see the new instruction
"It"? You mean the ARMv8 architecture?
Will
On Thu, Oct 17, 2013 at 07:19:36AM +0100, Jiang Liu wrote:
> From: Jiang Liu <[email protected]>
>
> Function encode_insn_immediate() will be used by other instruction
> manipulate related functions, so move it into insn.c and rename it
> as aarch64_insn_encode_immediate().
>
> Signed-off-by: Jiang Liu <[email protected]>
> Cc: Jiang Liu <[email protected]>
> ---
> arch/arm64/include/asm/insn.h | 14 ++++
> arch/arm64/kernel/insn.c | 58 ++++++++++++++++
> arch/arm64/kernel/module.c | 152 +++++++++++++-----------------------------
> 3 files changed, 117 insertions(+), 107 deletions(-)
>
> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
> index fc439b9..d62b601 100644
> --- a/arch/arm64/include/asm/insn.h
> +++ b/arch/arm64/include/asm/insn.h
> @@ -42,6 +42,18 @@ enum aarch64_insn_encoding_class {
> * system instructions */
> };
>
> +enum aarch64_insn_imm_type {
> + AARCH64_INSN_IMM_MOVK,
> + AARCH64_INSN_IMM_ADR,
> + AARCH64_INSN_IMM_26,
> + AARCH64_INSN_IMM_19,
> + AARCH64_INSN_IMM_16,
> + AARCH64_INSN_IMM_14,
> + AARCH64_INSN_IMM_12,
> + AARCH64_INSN_IMM_9,
> + AARCH64_INSN_IMM_MAX
> +};
> +
> #define __AARCH64_INSN_FUNCS(abbr, mask, val) \
> static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
> { return (code & (mask)) == (val); } \
> @@ -80,6 +92,8 @@ static __always_inline void aarch64_insn_write(void *addr, u32 insn)
> }
>
> enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
> +u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
> + u32 insn, u64 imm);
> bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
>
> int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
> index 8dd5fbe..344d23ed 100644
> --- a/arch/arm64/kernel/insn.c
> +++ b/arch/arm64/kernel/insn.c
> @@ -147,3 +147,61 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
>
> return ret;
> }
> +
> +u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
> + u32 insn, u64 imm)
> +{
> + u32 immlo, immhi, lomask, himask, mask;
> + int shift;
> +
> + switch (type) {
> + case AARCH64_INSN_IMM_MOVK:
> + mask = BIT(16) - 1;
> + shift = 5;
> + break;
This is just AARCH64_INSN_IMM_16 now that we're not dealing with MOV
relocations. You can kill INSN_IMM_MOVK in the enum.
The rest of the patch looks fine.
Will
Hi Will,
Thanks for review and will make those small changes.
Gerry
On 10/17/2013 06:47 PM, Will Deacon wrote:
> On Thu, Oct 17, 2013 at 07:19:34AM +0100, Jiang Liu wrote:
>> From: Jiang Liu <[email protected]>
>>
>> Introduce basic aarch64 instruction decoding helper
>> aarch64_get_insn_class() and aarch64_insn_hotpatch_safe().
>>
>> Signed-off-by: Jiang Liu <[email protected]>
>> Cc: Jiang Liu <[email protected]>
>> ---
>> arch/arm64/include/asm/insn.h | 67 ++++++++++++++++++++++++++++++++++++++++
>> arch/arm64/kernel/Makefile | 2 +-
>> arch/arm64/kernel/insn.c | 72 +++++++++++++++++++++++++++++++++++++++++++
>> 3 files changed, 140 insertions(+), 1 deletion(-)
>> create mode 100644 arch/arm64/include/asm/insn.h
>> create mode 100644 arch/arm64/kernel/insn.c
>>
>> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
>> new file mode 100644
>> index 0000000..6190016
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/insn.h
>> @@ -0,0 +1,67 @@
>> +/*
>> + * Copyright (C) 2013 Huawei Ltd.
>> + * Author: Jiang Liu <[email protected]>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#ifndef _ASM_ARM64_INSN_H
>> +#define _ASM_ARM64_INSN_H
>
> We've tried to be consistent using __ASM_<name>_H here.
>
>> +#include <linux/types.h>
>> +
>> +/*
>> + * ARM Architecture Reference Manual ARMv8, Section C3.1
>> + * AArch64 main encoding table
>> + * Bit position
>> + * 28 27 26 25 Encoding Group
>> + * 0 0 - - Unallocated
>> + * 1 0 0 - Data processing, immediate
>> + * 1 0 1 - Branch, exception generation and system instructions
>> + * - 1 - 0 Loads and stores
>> + * - 1 0 1 Data processing - register
>> + * 0 1 1 1 Data processing - SIMD and floating point
>> + * 1 1 1 1 Data processing - SIMD and floating point
>> + * "-" means "don't care"
>> + */
>> +enum aarch64_insn_encoding_class {
>> + AARCH64_INSN_CLS_UNKNOWN, /* UNALLOCATED */
>> + AARCH64_INSN_CLS_DP_IMM, /* Data processing - immediate */
>> + AARCH64_INSN_CLS_DP_REG, /* Data processing - register */
>> + AARCH64_INSN_CLS_DP_FPSIMD, /* Data processing - SIMD and FP */
>> + AARCH64_INSN_CLS_LDST, /* Loads and stores */
>> + AARCH64_INSN_CLS_BR_SYS, /* Branch, exception generation and
>> + * system instructions */
>> +};
>> +
>> +#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
>> +static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
>> +{ return (code & (mask)) == (val); } \
>> +static __always_inline u32 aarch64_insn_get_##abbr##_mask(void) \
>> +{ return (mask); } \
>> +static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
>> +{ return (val); }
>> +
>> +__AARCH64_INSN_FUNCS(b, 0xFC000000, 0x14000000)
>> +__AARCH64_INSN_FUNCS(bl, 0xFC000000, 0x94000000)
>> +__AARCH64_INSN_FUNCS(svc, 0xFFE0001F, 0xD4000001)
>> +__AARCH64_INSN_FUNCS(hvc, 0xFFE0001F, 0xD4000002)
>> +__AARCH64_INSN_FUNCS(smc, 0xFFE0001F, 0xD4000003)
>> +__AARCH64_INSN_FUNCS(brk, 0xFFE0001F, 0xD4200000)
>> +__AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
>> +
>> +#undef __AARCH64_INSN_FUNCS
>> +
>> +enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
>> +
>> +bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
>> +
>> +#endif /* _ASM_ARM64_INSN_H */
>> diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
>> index 7b4b564..9af6cb3 100644
>> --- a/arch/arm64/kernel/Makefile
>> +++ b/arch/arm64/kernel/Makefile
>> @@ -9,7 +9,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
>> arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \
>> entry-fpsimd.o process.o ptrace.o setup.o signal.o \
>> sys.o stacktrace.o time.o traps.o io.o vdso.o \
>> - hyp-stub.o psci.o
>> + hyp-stub.o psci.o insn.o
>>
>> arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
>> sys_compat.o
>> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
>> new file mode 100644
>> index 0000000..1c501f3
>> --- /dev/null
>> +++ b/arch/arm64/kernel/insn.c
>> @@ -0,0 +1,72 @@
>> +/*
>> + * Copyright (C) 2013 Huawei Ltd.
>> + * Author: Jiang Liu <[email protected]>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#include <linux/compiler.h>
>> +#include <linux/kernel.h>
>> +#include <asm/insn.h>
>> +
>> +static int aarch64_insn_encoding_cls[] = {
>> + AARCH64_INSN_CLS_UNKNOWN,
>> + AARCH64_INSN_CLS_UNKNOWN,
>> + AARCH64_INSN_CLS_UNKNOWN,
>> + AARCH64_INSN_CLS_UNKNOWN,
>> + AARCH64_INSN_CLS_LDST,
>> + AARCH64_INSN_CLS_DP_REG,
>> + AARCH64_INSN_CLS_LDST,
>> + AARCH64_INSN_CLS_DP_FPSIMD,
>> + AARCH64_INSN_CLS_DP_IMM,
>> + AARCH64_INSN_CLS_DP_IMM,
>> + AARCH64_INSN_CLS_BR_SYS,
>> + AARCH64_INSN_CLS_BR_SYS,
>> + AARCH64_INSN_CLS_LDST,
>> + AARCH64_INSN_CLS_DP_REG,
>> + AARCH64_INSN_CLS_LDST,
>> + AARCH64_INSN_CLS_DP_FPSIMD,
>> +};
>> +
>> +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn)
>> +{
>> + return aarch64_insn_encoding_cls[(insn >> 25) & 0xf];
>> +}
>
> For some reason, I just read `cls' as `count leading set', so you could just
> use `class' here to help lazy readers like me :)
>
>> +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn)
>> +{
>> + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS)
>> + return false;
>> +
>> + return aarch64_insn_is_b(insn) ||
>> + aarch64_insn_is_bl(insn) ||
>> + aarch64_insn_is_svc(insn) ||
>> + aarch64_insn_is_hvc(insn) ||
>> + aarch64_insn_is_smc(insn) ||
>> + aarch64_insn_is_brk(insn) ||
>> + aarch64_insn_is_nop(insn);
>> +}
>> +
>> +/*
>> + * ARMv8-A Section B2.6.5:
>> + * Concurrent modification and execution of instructions can lead to the
>> + * resulting instruction performing any behavior that can be achieved by
>> + * executing any sequence of instructions that can be executed from the
>> + * same Exception level, except where the instruction before modification
>> + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC,
>> + * or SMC instruction.
>> + */
>> +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
>> +{
>> + return __aarch64_insn_hotpatch_safe(old_insn) &&
>> + __aarch64_insn_hotpatch_safe(new_insn);
>> +}
>
> With those cosmetic changes:
>
> Reviewed-by: Will Deacon <[email protected]>
>
> Will
>
On Thu, 2013-10-17 at 12:38 +0100, Will Deacon wrote:
> [adding Tixy for stop_machine() question below]
>
> On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
[...]
> > +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
> > +{
> > + struct aarch64_insn_patch patch = {
> > + .text_addrs = addrs,
> > + .new_insns = insns,
> > + .insn_cnt = cnt,
> > + };
> > +
> > + if (cnt <= 0)
> > + return -EINVAL;
> > +
> > + /*
> > + * Execute __aarch64_insn_patch_text() on every online CPU,
> > + * which ensure serialization among all online CPUs.
> > + */
> > + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
> > +}
>
> Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
> *one* CPU, which is the right thing to do. However, the arch/arm/ call to
> stop_machine in kprobes does actually run the patching code on *all* the
> online cores (including the cache flushing!). I think this is to work around
> cores without hardware cache maintenance broadcasting, but that could easily
> be called out specially (like we do in patch.c) and the flushing could be
> separated from the patching too.
[...]
For code modifications done in 32bit ARM kprobes (and ftrace) I'm not
sure we ever actually resolved the possible cache flushing issues. If
there was specific reasons for flushing on all cores I can't remember
them, sorry. I have a suspicion that doing so was a case of sticking
with what the code was already doing, and flushing on all cores seemed
safest to guard against problems we hadn't thought about.
Some of the issues discussed were that we couldn't have one core
potentially executing instructions being modified by another CPU,
because that's architecturally unpredictable except for a few
instructions [1], and we also have the case where a 32-bit Thumb
instruction can straddle two different cache-lines. But these may not be
reasons to flush on all cores if stop machine is synchronising all CPU's
in a kind of holding pen and the cache operations done on one core are
broadcast to others. (Are there correct barriers involved in
stop-machine so that when the other cores resume they are guaranteed to
only see the new version of the modified code, or do we only get that
guarantee because we happen to execute the cache flushing on all cores?)
[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/136441.html
Another of the issues I hit was big.LITTLE related whereby the cache
line size is different on different cores [2].
[2] http://lists.infradead.org/pipermail/linux-arm-kernel/2013-February/149794.html
I don't think anything I've said above actually gives a solid reason why
we _must_ execute cache flushing on all cores for kprobes and can't just
use the relatively new patch_text function (which checks for the one
case we do need to flush on all cores using cache_ops_need_broadcast).
Sorry, I don't think I've added much light on things here have I?
--
Tixy
On 10/17/2013 07:38 PM, Will Deacon wrote:
> [adding Tixy for stop_machine() question below]
>
> On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
>> From: Jiang Liu <[email protected]>
>>
>> Introduce three interfaces to patch kernel and module code:
>> aarch64_insn_patch_text_nosync():
>> patch code without synchronization, it's caller's responsibility
>> to synchronize all CPUs if needed.
>> aarch64_insn_patch_text_sync():
>> patch code and always synchronize with stop_machine()
>> aarch64_insn_patch_text():
>> patch code and synchronize with stop_machine() if needed
>>
>> Signed-off-by: Jiang Liu <[email protected]>
>> Cc: Jiang Liu <[email protected]>
>> ---
>> arch/arm64/include/asm/insn.h | 24 +++++++++++++-
>> arch/arm64/kernel/insn.c | 77 +++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 100 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
>> index 6190016..fc439b9 100644
>> --- a/arch/arm64/include/asm/insn.h
>> +++ b/arch/arm64/include/asm/insn.h
>> @@ -60,8 +60,30 @@ __AARCH64_INSN_FUNCS(nop, 0xFFFFFFFF, 0xD503201F)
>>
>> #undef __AARCH64_INSN_FUNCS
>>
>> -enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
>> +/*
>> + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
>> + * little-endian. On the other hand, SCTLR_EL1.EE (bit 25, Exception Endianness)
>> + * flag controls endianness for EL1 explicit data accesses and stage 1
>> + * translation table walks as below:
>> + * 0: little-endian
>> + * 1: big-endian
>> + * So need to handle endianness when patching kernel code.
>> + */
>
> You can delete this comment now that we're using the helpers...
>
>> +static __always_inline u32 aarch64_insn_read(void *addr)
>> +{
>> + return le32_to_cpu(*(u32 *)addr);
>> +}
>>
>> +static __always_inline void aarch64_insn_write(void *addr, u32 insn)
>> +{
>> + *(u32 *)addr = cpu_to_le32(insn);
>> +}
>
> ... then just inline these calls directly.
>
>> +enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
>> bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
>>
>> +int aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[], int cnt);
>> +int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
>> +int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
>> +
>> #endif /* _ASM_ARM64_INSN_H */
>> diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
>> index 1c501f3..8dd5fbe 100644
>> --- a/arch/arm64/kernel/insn.c
>> +++ b/arch/arm64/kernel/insn.c
>> @@ -16,6 +16,9 @@
>> */
>> #include <linux/compiler.h>
>> #include <linux/kernel.h>
>> +#include <linux/smp.h>
>> +#include <linux/stop_machine.h>
>> +#include <asm/cacheflush.h>
>> #include <asm/insn.h>
>>
>> static int aarch64_insn_encoding_cls[] = {
>> @@ -70,3 +73,77 @@ bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn)
>> return __aarch64_insn_hotpatch_safe(old_insn) &&
>> __aarch64_insn_hotpatch_safe(new_insn);
>> }
>> +
>> +int __kprobes aarch64_insn_patch_text_nosync(void *addrs[], u32 insns[],
>> + int cnt)
>> +{
>> + int i;
>> + u32 *tp;
>> +
>> + if (cnt <= 0)
>> + return -EINVAL;
>
> Isn't cnt always 1 for the _nosync patching? Can you just drop the argument
> and simplify this code? Patching a sequence without syncing is always racy.
Will drop the third parameter and simplify the code.
>
>> + for (i = 0; i < cnt; i++) {
>> + tp = addrs[i];
>> + /* A64 instructions must be word aligned */
>> + if ((uintptr_t)tp & 0x3)
>> + return -EINVAL;
>> + aarch64_insn_write(tp, insns[i]);
>> + flush_icache_range((uintptr_t)tp, (uintptr_t)tp + sizeof(u32));
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +struct aarch64_insn_patch {
>> + void **text_addrs;
>> + u32 *new_insns;
>> + int insn_cnt;
>> +};
>> +
>> +static int __kprobes aarch64_insn_patch_text_cb(void *arg)
>> +{
>> + struct aarch64_insn_patch *pp = arg;
>> +
>> + return aarch64_insn_patch_text_nosync(pp->text_addrs, pp->new_insns,
>> + pp->insn_cnt);
>> +}
>> +
>> +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
>> +{
>> + struct aarch64_insn_patch patch = {
>> + .text_addrs = addrs,
>> + .new_insns = insns,
>> + .insn_cnt = cnt,
>> + };
>> +
>> + if (cnt <= 0)
>> + return -EINVAL;
>> +
>> + /*
>> + * Execute __aarch64_insn_patch_text() on every online CPU,
>> + * which ensure serialization among all online CPUs.
>> + */
>> + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
>> +}
>
> Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
> *one* CPU, which is the right thing to do. However, the arch/arm/ call to
> stop_machine in kprobes does actually run the patching code on *all* the
> online cores (including the cache flushing!). I think this is to work around
> cores without hardware cache maintenance broadcasting, but that could easily
> be called out specially (like we do in patch.c) and the flushing could be
> separated from the patching too.
>
>> +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
>> +{
>> + int ret;
>> +
>> + if (cnt == 1 && aarch64_insn_hotpatch_safe(aarch64_insn_read(addrs[0]),
>> + insns[0])) {
>
> You could make aarch64_insn_hotpatch_safe take the cnt parameter and return
> false if cnt != 1.
>
>> + /*
>> + * It doesn't guarantee all CPUs see the new instruction
>
> "It"? You mean the ARMv8 architecture?
Yes, I mean ARMv8 architecture.
>
> Will
>
On 10/17/2013 11:24 PM, Jon Medhurst (Tixy) wrote:
> On Thu, 2013-10-17 at 12:38 +0100, Will Deacon wrote:
>> [adding Tixy for stop_machine() question below]
>>
>> On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
> [...]
>>> +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
>>> +{
>>> + struct aarch64_insn_patch patch = {
>>> + .text_addrs = addrs,
>>> + .new_insns = insns,
>>> + .insn_cnt = cnt,
>>> + };
>>> +
>>> + if (cnt <= 0)
>>> + return -EINVAL;
>>> +
>>> + /*
>>> + * Execute __aarch64_insn_patch_text() on every online CPU,
>>> + * which ensure serialization among all online CPUs.
>>> + */
>>> + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
>>> +}
>>
>> Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
>> *one* CPU, which is the right thing to do. However, the arch/arm/ call to
>> stop_machine in kprobes does actually run the patching code on *all* the
>> online cores (including the cache flushing!). I think this is to work around
>> cores without hardware cache maintenance broadcasting, but that could easily
>> be called out specially (like we do in patch.c) and the flushing could be
>> separated from the patching too.
> [...]
>
> For code modifications done in 32bit ARM kprobes (and ftrace) I'm not
> sure we ever actually resolved the possible cache flushing issues. If
> there was specific reasons for flushing on all cores I can't remember
> them, sorry. I have a suspicion that doing so was a case of sticking
> with what the code was already doing, and flushing on all cores seemed
> safest to guard against problems we hadn't thought about.
>
> Some of the issues discussed were that we couldn't have one core
> potentially executing instructions being modified by another CPU,
> because that's architecturally unpredictable except for a few
> instructions [1], and we also have the case where a 32-bit Thumb
> instruction can straddle two different cache-lines. But these may not be
> reasons to flush on all cores if stop machine is synchronising all CPU's
> in a kind of holding pen and the cache operations done on one core are
> broadcast to others. (Are there correct barriers involved in
> stop-machine so that when the other cores resume they are guaranteed to
> only see the new version of the modified code, or do we only get that
> guarantee because we happen to execute the cache flushing on all cores?)
I think it's cache flushing instead of stop_machine() because cache
flusing includes an ISB.
The idea flow should be:
1) master acquire a lock to serialize text patching
2) all CPU barrier
3) master updates memory and flush cache
4) master set a flag to let all other CPU continue
5) all other CPU executes ISB.
Updating memory and flushing cache on every CPU with stop_machine()
achieves the same effect with simple implementation, but the
implementation really seems a little strange.
If desired, I will implement the standard flow.
>
> [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/136441.html
>
> Another of the issues I hit was big.LITTLE related whereby the cache
> line size is different on different cores [2].
>
> [2] http://lists.infradead.org/pipermail/linux-arm-kernel/2013-February/149794.html
>
> I don't think anything I've said above actually gives a solid reason why
> we _must_ execute cache flushing on all cores for kprobes and can't just
> use the relatively new patch_text function (which checks for the one
> case we do need to flush on all cores using cache_ops_need_broadcast).
>
> Sorry, I don't think I've added much light on things here have I?
>
Hi Tixy,
On Thu, Oct 17, 2013 at 04:24:01PM +0100, Jon Medhurst (Tixy) wrote:
> On Thu, 2013-10-17 at 12:38 +0100, Will Deacon wrote:
> > On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
> > > + /*
> > > + * Execute __aarch64_insn_patch_text() on every online CPU,
> > > + * which ensure serialization among all online CPUs.
> > > + */
> > > + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
> > > +}
> >
> > Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
> > *one* CPU, which is the right thing to do. However, the arch/arm/ call to
> > stop_machine in kprobes does actually run the patching code on *all* the
> > online cores (including the cache flushing!). I think this is to work around
> > cores without hardware cache maintenance broadcasting, but that could easily
> > be called out specially (like we do in patch.c) and the flushing could be
> > separated from the patching too.
> [...]
>
> For code modifications done in 32bit ARM kprobes (and ftrace) I'm not
> sure we ever actually resolved the possible cache flushing issues. If
> there was specific reasons for flushing on all cores I can't remember
> them, sorry. I have a suspicion that doing so was a case of sticking
> with what the code was already doing, and flushing on all cores seemed
> safest to guard against problems we hadn't thought about.
[...]
> Sorry, I don't think I've added much light on things here have I?
I think you missed the bit I was confused about :) Flushing the cache on
each core is necessary if cache_ops_need_broadcast, so I can understand why
you'd have code to do that. The bit I don't understand is that you actually
patch the instruction on each core too!
Will
On Fri, 2013-10-18 at 09:56 +0100, Will Deacon wrote:
> Hi Tixy,
>
> On Thu, Oct 17, 2013 at 04:24:01PM +0100, Jon Medhurst (Tixy) wrote:
> > On Thu, 2013-10-17 at 12:38 +0100, Will Deacon wrote:
> > > On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
> > > > + /*
> > > > + * Execute __aarch64_insn_patch_text() on every online CPU,
> > > > + * which ensure serialization among all online CPUs.
> > > > + */
> > > > + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
> > > > +}
> > >
> > > Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
> > > *one* CPU, which is the right thing to do. However, the arch/arm/ call to
> > > stop_machine in kprobes does actually run the patching code on *all* the
> > > online cores (including the cache flushing!). I think this is to work around
> > > cores without hardware cache maintenance broadcasting, but that could easily
> > > be called out specially (like we do in patch.c) and the flushing could be
> > > separated from the patching too.
> > [...]
> >
> > For code modifications done in 32bit ARM kprobes (and ftrace) I'm not
> > sure we ever actually resolved the possible cache flushing issues. If
> > there was specific reasons for flushing on all cores I can't remember
> > them, sorry. I have a suspicion that doing so was a case of sticking
> > with what the code was already doing, and flushing on all cores seemed
> > safest to guard against problems we hadn't thought about.
>
> [...]
>
> > Sorry, I don't think I've added much light on things here have I?
>
> I think you missed the bit I was confused about :) Flushing the cache on
> each core is necessary if cache_ops_need_broadcast, so I can understand why
> you'd have code to do that. The bit I don't understand is that you actually
> patch the instruction on each core too!
This is only happens when removing a kprobe with __arch_disarm_kprobe().
We can't just use the intelligent patch_text() function there because we
want to always force stop machine to be used as this prevents the case
where a CPU a hits the probe, starts executing it's handler then another
CPU whips away the probe from under it.
That explains why we use stop_machine, but not why all CPU's must modify
the instruction. I think it's a case of just that it's simpler to do
that unconditionally rather than add extra code for the
cache_ops_need_broadcast() case. I mean, stop_machine() is a sledge
hammer, which stalls the whole system until the next scheduler tick, and
then gets every CPU to busy wait, so there's not much incentive to try
and optimise the code to avoid a memory write + cacheline flush on each
core.
This reminds me, I'm sure I heard rumours quite some time ago that Paul
McKenney was thinking of trying to do away with stop_machine...?
--
Tixy
On 10/18/2013 09:44 PM, Jon Medhurst (Tixy) wrote:
> On Fri, 2013-10-18 at 09:56 +0100, Will Deacon wrote:
>> Hi Tixy,
>>
>> On Thu, Oct 17, 2013 at 04:24:01PM +0100, Jon Medhurst (Tixy) wrote:
>>> On Thu, 2013-10-17 at 12:38 +0100, Will Deacon wrote:
>>>> On Thu, Oct 17, 2013 at 07:19:35AM +0100, Jiang Liu wrote:
>>>>> + /*
>>>>> + * Execute __aarch64_insn_patch_text() on every online CPU,
>>>>> + * which ensure serialization among all online CPUs.
>>>>> + */
>>>>> + return stop_machine(aarch64_insn_patch_text_cb, &patch, NULL);
>>>>> +}
>>>>
>>>> Whoa, whoa, whoa! The comment here is wrong -- we only run the patching on
>>>> *one* CPU, which is the right thing to do. However, the arch/arm/ call to
>>>> stop_machine in kprobes does actually run the patching code on *all* the
>>>> online cores (including the cache flushing!). I think this is to work around
>>>> cores without hardware cache maintenance broadcasting, but that could easily
>>>> be called out specially (like we do in patch.c) and the flushing could be
>>>> separated from the patching too.
>>> [...]
>>>
>>> For code modifications done in 32bit ARM kprobes (and ftrace) I'm not
>>> sure we ever actually resolved the possible cache flushing issues. If
>>> there was specific reasons for flushing on all cores I can't remember
>>> them, sorry. I have a suspicion that doing so was a case of sticking
>>> with what the code was already doing, and flushing on all cores seemed
>>> safest to guard against problems we hadn't thought about.
>>
>> [...]
>>
>>> Sorry, I don't think I've added much light on things here have I?
>>
>> I think you missed the bit I was confused about :) Flushing the cache on
>> each core is necessary if cache_ops_need_broadcast, so I can understand why
>> you'd have code to do that. The bit I don't understand is that you actually
>> patch the instruction on each core too!
>
> This is only happens when removing a kprobe with __arch_disarm_kprobe().
> We can't just use the intelligent patch_text() function there because we
> want to always force stop machine to be used as this prevents the case
> where a CPU a hits the probe, starts executing it's handler then another
> CPU whips away the probe from under it.
>
> That explains why we use stop_machine, but not why all CPU's must modify
> the instruction. I think it's a case of just that it's simpler to do
> that unconditionally rather than add extra code for the
> cache_ops_need_broadcast() case. I mean, stop_machine() is a sledge
> hammer, which stalls the whole system until the next scheduler tick, and
> then gets every CPU to busy wait, so there's not much incentive to try
> and optimise the code to avoid a memory write + cacheline flush on each
> core.
>
> This reminds me, I'm sure I heard rumours quite some time ago that Paul
> McKenney was thinking of trying to do away with stop_machine...?
>
I remember McKenney has tried that, don't know about the progress now.