From: Miroslav Benes <[email protected]>
Make kobj variable (of type struct kobject) statically allocated in
klp_object structure. It will allow us to move in the func-object-patch
hierarchy through kobject links.
The only reason to have it dynamic was to not have empty release
callback in the code. However we have empty callbacks for function and
patch in the code now, so it is no longer valid and the advantage of
static allocation is clear.
Signed-off-by: Miroslav Benes <[email protected]>
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 2 +-
kernel/livepatch/core.c | 22 ++++++++++++++++------
2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index ee6dbb39a809..fe45f2f02c8d 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -99,7 +99,7 @@ struct klp_object {
struct klp_func *funcs;
/* internal */
- struct kobject *kobj;
+ struct kobject kobj;
struct module *mod;
enum klp_state state;
};
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 284e2691e380..e163faaef5e9 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -651,6 +651,15 @@ static struct kobj_type klp_ktype_patch = {
.default_attrs = klp_patch_attrs,
};
+static void klp_kobj_release_object(struct kobject *kobj)
+{
+}
+
+static struct kobj_type klp_ktype_object = {
+ .release = klp_kobj_release_object,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
static void klp_kobj_release_func(struct kobject *kobj)
{
}
@@ -695,7 +704,7 @@ static void klp_free_objects_limited(struct klp_patch *patch,
for (obj = patch->objs; obj->funcs && obj != limit; obj++) {
klp_free_funcs_limited(obj, NULL);
- kobject_put(obj->kobj);
+ kobject_put(&obj->kobj);
}
}
@@ -713,7 +722,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
func->state = KLP_DISABLED;
return kobject_init_and_add(&func->kobj, &klp_ktype_func,
- obj->kobj, "%s", func->old_name);
+ &obj->kobj, "%s", func->old_name);
}
/* parts of the initialization that is done only when the object is loaded */
@@ -753,9 +762,10 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
klp_find_object_module(obj);
name = klp_is_module(obj) ? obj->name : "vmlinux";
- obj->kobj = kobject_create_and_add(name, &patch->kobj);
- if (!obj->kobj)
- return -ENOMEM;
+ ret = kobject_init_and_add(&obj->kobj, &klp_ktype_object,
+ &patch->kobj, "%s", name);
+ if (ret)
+ return ret;
for (func = obj->funcs; func->old_name; func++) {
ret = klp_init_func(obj, func);
@@ -773,7 +783,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
free:
klp_free_funcs_limited(obj, func);
- kobject_put(obj->kobj);
+ kobject_put(&obj->kobj);
return ret;
}
--
2.3.5
klp_for_each_object and klp_for_each_func are now used all over the
code. One need not think what is the proper condition to check in the
for loop now.
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 6 ++++++
kernel/livepatch/core.c | 18 +++++++++---------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index fe45f2f02c8d..31db7a05dd36 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -123,6 +123,12 @@ struct klp_patch {
enum klp_state state;
};
+#define klp_for_each_object(patch, obj) \
+ for (obj = patch->objs; obj->funcs; obj++)
+
+#define klp_for_each_func(obj, func) \
+ for (func = obj->funcs; func->old_name; func++)
+
int klp_register_patch(struct klp_patch *);
int klp_unregister_patch(struct klp_patch *);
int klp_enable_patch(struct klp_patch *);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index e163faaef5e9..2da42be84452 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -422,7 +422,7 @@ static void klp_disable_object(struct klp_object *obj)
{
struct klp_func *func;
- for (func = obj->funcs; func->old_name; func++)
+ klp_for_each_func(obj, func)
if (func->state == KLP_ENABLED)
klp_disable_func(func);
@@ -440,7 +440,7 @@ static int klp_enable_object(struct klp_object *obj)
if (WARN_ON(!klp_is_object_loaded(obj)))
return -EINVAL;
- for (func = obj->funcs; func->old_name; func++) {
+ klp_for_each_func(obj, func) {
ret = klp_enable_func(func);
if (ret) {
klp_disable_object(obj);
@@ -463,7 +463,7 @@ static int __klp_disable_patch(struct klp_patch *patch)
pr_notice("disabling patch '%s'\n", patch->mod->name);
- for (obj = patch->objs; obj->funcs; obj++) {
+ klp_for_each_object(patch, obj) {
if (obj->state == KLP_ENABLED)
klp_disable_object(obj);
}
@@ -523,7 +523,7 @@ static int __klp_enable_patch(struct klp_patch *patch)
pr_notice("enabling patch '%s'\n", patch->mod->name);
- for (obj = patch->objs; obj->funcs; obj++) {
+ klp_for_each_object(patch, obj) {
if (!klp_is_object_loaded(obj))
continue;
@@ -689,7 +689,7 @@ static void klp_free_object_loaded(struct klp_object *obj)
obj->mod = NULL;
- for (func = obj->funcs; func->old_name; func++)
+ klp_for_each_func(obj, func)
func->old_addr = 0;
}
@@ -738,7 +738,7 @@ static int klp_init_object_loaded(struct klp_patch *patch,
return ret;
}
- for (func = obj->funcs; func->old_name; func++) {
+ klp_for_each_func(obj, func) {
ret = klp_find_verify_func_addr(obj, func);
if (ret)
return ret;
@@ -767,7 +767,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
if (ret)
return ret;
- for (func = obj->funcs; func->old_name; func++) {
+ klp_for_each_func(obj, func) {
ret = klp_init_func(obj, func);
if (ret)
goto free;
@@ -804,7 +804,7 @@ static int klp_init_patch(struct klp_patch *patch)
if (ret)
goto unlock;
- for (obj = patch->objs; obj->funcs; obj++) {
+ klp_for_each_object(patch, obj) {
ret = klp_init_object(patch, obj);
if (ret)
goto free;
@@ -959,7 +959,7 @@ static int klp_module_notify(struct notifier_block *nb, unsigned long action,
mod->klp_alive = false;
list_for_each_entry(patch, &klp_patches, list) {
- for (obj = patch->objs; obj->funcs; obj++) {
+ klp_for_each_object(patch, obj) {
if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
continue;
--
2.3.5
This will be used in the kGraft-like patching.
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 31db7a05dd36..fabb067a3f1d 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -129,6 +129,16 @@ struct klp_patch {
#define klp_for_each_func(obj, func) \
for (func = obj->funcs; func->old_name; func++)
+static inline struct klp_patch *klp_func_to_patch(struct klp_func *func)
+{
+ return container_of(func->kobj.parent->parent, struct klp_patch, kobj);
+}
+
+static inline struct klp_patch *klp_object_to_patch(struct klp_object *obj)
+{
+ return container_of(obj->kobj.parent, struct klp_patch, kobj);
+}
+
int klp_register_patch(struct klp_patch *);
int klp_unregister_patch(struct klp_patch *);
int klp_enable_patch(struct klp_patch *);
--
2.3.5
This means:
* add a per-thread flag to indicate whether a task is in the old or in
the new universe,
* reset it in _slow_ paths of syscall's entry/exit,
* add helpers around the flag to sched.h,
* export the status in /proc/<pid>/kgr_in_progress,
This was cherry-picked from the kGraft implementation and will serve
as a base for kGraft-like patching in Live Patching.
Miroslav helped to clean the assembly up and move to C.
Signed-off-by: Jiri Slaby <[email protected]>
Cc: Miroslav Benes <[email protected]>
Cc: Martin Schwidefsky <[email protected]>
Cc: Heiko Carstens <[email protected]>
Cc: [email protected]
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: [email protected]
---
arch/s390/include/asm/thread_info.h | 2 ++
arch/s390/kernel/Makefile | 1 +
arch/s390/kernel/entry.S | 11 +++++++++++
arch/s390/kernel/livepatch.c | 14 ++++++++++++++
arch/x86/include/asm/thread_info.h | 11 +++++++----
arch/x86/kernel/ptrace.c | 7 +++++++
arch/x86/kernel/signal.c | 5 +++++
fs/proc/base.c | 14 ++++++++++++++
include/linux/sched.h | 23 +++++++++++++++++++++++
9 files changed, 84 insertions(+), 4 deletions(-)
create mode 100644 arch/s390/kernel/livepatch.c
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 4c27ec764c36..88a559531a7b 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -82,6 +82,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define TIF_SINGLE_STEP 19 /* This task is single stepped */
#define TIF_BLOCK_STEP 20 /* This task is block stepped */
#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
+#define TIF_KGR_IN_PROGRESS 22 /* This task has not finished patching */
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -93,6 +94,7 @@ void arch_release_task_struct(struct task_struct *tsk);
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_31BIT (1<<TIF_31BIT)
#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP)
+#define _TIF_KGR_IN_PROGRESS (1<<TIF_KGR_IN_PROGRESS)
#define is_32bit_task() (test_thread_flag(TIF_31BIT))
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index ffb87617a36c..ff3ecac94ebe 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_AUDIT) += audit.o
compat-obj-$(CONFIG_AUDIT) += compat_audit.o
obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o
obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
+obj-$(CONFIG_LIVEPATCH) += livepatch.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 99b44acbfcc7..f4be8e142a50 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -73,6 +73,15 @@ _PIF_WORK = (_PIF_PER_TRAP)
#endif
.endm
+ .macro HANDLE_KGRAFT TI_reg
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+ tm __TI_flags+5(\TI_reg),(_TIF_KGR_IN_PROGRESS >> 16)
+ jz 0f
+ brasl %r14,s390_handle_kgraft
+0:
+#endif
+ .endm
+
.macro LPP newpp
#if IS_ENABLED(CONFIG_KVM)
tm __LC_MACHINE_FLAGS+6,0x20 # MACHINE_FLAG_LPP
@@ -217,6 +226,7 @@ ENTRY(system_call)
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
stg %r14,__PT_FLAGS(%r11)
.Lsysc_do_svc:
+ HANDLE_KGRAFT %r12
lg %r10,__TI_sysc_table(%r12) # address of system call table
llgh %r8,__PT_INT_CODE+2(%r11)
slag %r8,%r8,2 # shift and test for svc 0
@@ -248,6 +258,7 @@ ENTRY(system_call)
jnz .Lsysc_work # check for work
tm __LC_CPU_FLAGS+7,_CIF_WORK
jnz .Lsysc_work
+ HANDLE_KGRAFT %r12
.Lsysc_restore:
lg %r14,__LC_VDSO_PER_CPU
lmg %r0,%r10,__PT_R0(%r11)
diff --git a/arch/s390/kernel/livepatch.c b/arch/s390/kernel/livepatch.c
new file mode 100644
index 000000000000..99ace9df5576
--- /dev/null
+++ b/arch/s390/kernel/livepatch.c
@@ -0,0 +1,14 @@
+/*
+ * livepatch.c - s390-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2014-2015 SUSE
+ *
+ * This file is licensed under the GPLv2.
+ */
+
+#include <linux/sched.h>
+
+asmlinkage void s390_handle_kgraft(void)
+{
+ klp_kgraft_mark_task_safe(current);
+}
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b4bdec3e9523..3dad428caa6b 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -106,6 +106,7 @@ struct thread_info {
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
+#define TIF_KGR_IN_PROGRESS 26 /* kGraft patching running */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
#define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
@@ -129,6 +130,7 @@ struct thread_info {
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
+#define _TIF_KGR_IN_PROGRESS (1 << TIF_KGR_IN_PROGRESS)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
@@ -138,7 +140,7 @@ struct thread_info {
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_KGR_IN_PROGRESS)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
@@ -149,17 +151,18 @@ struct thread_info {
#define _TIF_WORK_MASK \
(0x0000FFFF & \
~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
- _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
+ _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU) | \
+ _TIF_KGR_IN_PROGRESS)
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_KGR_IN_PROGRESS)
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
- _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
+ _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE | _TIF_KGR_IN_PROGRESS)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a7bc79480719..454f4734b840 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1527,6 +1527,13 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
}
#endif
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+ if (work & _TIF_KGR_IN_PROGRESS) {
+ klp_kgraft_mark_task_safe(current);
+ work &= ~_TIF_KGR_IN_PROGRESS;
+ }
+#endif
+
/* Do our best to finish without phase 2. */
if (work == 0)
return ret; /* seccomp and/or nohz only (ret == 0 here) */
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 1ea14fd53933..d5e38e2161ac 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -730,6 +730,11 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+ if (thread_info_flags & _TIF_KGR_IN_PROGRESS)
+ klp_kgraft_mark_task_safe(current);
+#endif
+
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 093ca14f5701..4973ae795abd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2052,6 +2052,17 @@ static const struct file_operations proc_timers_operations = {
};
#endif /* CONFIG_CHECKPOINT_RESTORE */
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+static int proc_pid_kgr_in_progress(struct seq_file *m,
+ struct pid_namespace *ns, struct pid *pid,
+ struct task_struct *task)
+{
+ seq_printf(m, "%d\n", klp_kgraft_task_in_progress(task));
+
+ return 0;
+}
+#endif /* IS_ENABLED(CONFIG_LIVEPATCH) */
+
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
@@ -2640,6 +2651,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
#endif
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+ ONE("kgr_in_progress", S_IRUSR, proc_pid_kgr_in_progress),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8222ae40ecb0..4c0555261cb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3085,6 +3085,29 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
}
#endif /* CONFIG_MEMCG */
+#if IS_ENABLED(CONFIG_LIVEPATCH)
+static inline void klp_kgraft_mark_task_safe(struct task_struct *p)
+{
+ clear_tsk_thread_flag(p, TIF_KGR_IN_PROGRESS);
+}
+static inline void klp_kgraft_mark_task_in_progress(struct task_struct *p)
+{
+ set_tsk_thread_flag(p, TIF_KGR_IN_PROGRESS);
+}
+
+static inline bool klp_kgraft_task_in_progress(struct task_struct *p)
+{
+ return test_tsk_thread_flag(p, TIF_KGR_IN_PROGRESS);
+}
+#else
+static inline void klp_kgraft_mark_task_safe(struct task_struct *p) { }
+static inline void klp_kgraft_mark_task_in_progress(struct task_struct *p) { }
+static inline bool klp_kgraft_task_in_progress(struct task_struct *p)
+{
+ return false;
+}
+#endif /* IS_ENABLED(CONFIG_LIVEPATCH) */
+
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
{
--
2.3.5
We want more concise consistency models than simple is. This is a
preparation for other, more complex ones. It moves the simple handling
out of ftrace handler and is called as newly introduced struct
klp_cmodel->stub. This way, every model can implement its own handler.
On the top of that, I assume the structure will be extended over time.
For example, kGraft-like patching will need pre-patch and post-patch
hooks and more.
We store the models in a list and all have its ID, specified in every
patch. The ID is then looked up in the list and appropriate cmodel
used.
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 35 ++++++++++++++++++++++++++++++++
kernel/livepatch/Makefile | 2 +-
kernel/livepatch/cmodel-simple.c | 39 ++++++++++++++++++++++++++++++++++++
kernel/livepatch/core.c | 37 ++++++++++++++++++++++++++++++----
samples/livepatch/livepatch-sample.c | 1 +
5 files changed, 109 insertions(+), 5 deletions(-)
create mode 100644 kernel/livepatch/cmodel-simple.c
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index fabb067a3f1d..009f308ff756 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -23,11 +23,36 @@
#include <linux/module.h>
#include <linux/ftrace.h>
+#include <linux/ptrace.h>
#if IS_ENABLED(CONFIG_LIVEPATCH)
#include <asm/livepatch.h>
+struct klp_func;
+
+/**
+ * enum klp_cmodel_id - possible consistency models
+ */
+enum klp_cmodel_id {
+ KLP_CM_INVALID = 0,
+ KLP_CM_SIMPLE, /* LEAVE_FUNCTION and SWITCH_FUNCTION */
+};
+
+/**
+ * struct klp_cmodel - implementation of a consistency model
+ * @id: id of this model (from enum klp_cmodel_id)
+ * @list: member of klp_cmodel_list
+ * @stub: what to use as an ftrace handler (annotate with notrace!)
+ */
+struct klp_cmodel {
+ const enum klp_cmodel_id id;
+ struct list_head list;
+
+ void (*stub)(struct list_head *func_stack, struct klp_func *func,
+ struct pt_regs *regs);
+};
+
enum klp_state {
KLP_DISABLED,
KLP_ENABLED
@@ -42,6 +67,7 @@ enum klp_state {
* @kobj: kobject for sysfs resources
* @state: tracks function-level patch application state
* @stack_node: list node for klp_ops func_stack list
+ * @stub: cache of klp_patch.cmodel.stub
*/
struct klp_func {
/* external */
@@ -61,6 +87,8 @@ struct klp_func {
struct kobject kobj;
enum klp_state state;
struct list_head stack_node;
+ void (*stub)(struct list_head *func_stack, struct klp_func *func,
+ struct pt_regs *regs);
};
/**
@@ -108,19 +136,23 @@ struct klp_object {
* struct klp_patch - patch structure for live patching
* @mod: reference to the live patch module
* @objs: object entries for kernel objects to be patched
+ * @cmodel_id: consistency model used to apply this patch
* @list: list node for global list of registered patches
* @kobj: kobject for sysfs resources
* @state: tracks patch-level application state
+ * @cmodel: cmodel_id's implementation
*/
struct klp_patch {
/* external */
struct module *mod;
struct klp_object *objs;
+ const enum klp_cmodel_id cmodel_id;
/* internal */
struct list_head list;
struct kobject kobj;
enum klp_state state;
+ struct klp_cmodel *cmodel;
};
#define klp_for_each_object(patch, obj) \
@@ -144,6 +176,9 @@ int klp_unregister_patch(struct klp_patch *);
int klp_enable_patch(struct klp_patch *);
int klp_disable_patch(struct klp_patch *);
+void klp_init_cmodel_simple(void);
+void klp_register_cmodel(struct klp_cmodel *);
+
#endif /* CONFIG_LIVEPATCH */
#endif /* _LINUX_LIVEPATCH_H_ */
diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile
index e8780c0901d9..926533777247 100644
--- a/kernel/livepatch/Makefile
+++ b/kernel/livepatch/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_LIVEPATCH) += livepatch.o
-livepatch-objs := core.o
+livepatch-objs := core.o cmodel-simple.o
diff --git a/kernel/livepatch/cmodel-simple.c b/kernel/livepatch/cmodel-simple.c
new file mode 100644
index 000000000000..d4e430ff40c0
--- /dev/null
+++ b/kernel/livepatch/cmodel-simple.c
@@ -0,0 +1,39 @@
+/*
+ * cmodel-simple.c - KLP Simple Consistency Model
+ *
+ * Copyright (C) 2015 Seth Jennings <[email protected]>
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/ptrace.h>
+#include <linux/list.h>
+#include <linux/livepatch.h>
+
+static void notrace klp_simple_stub(struct list_head *func_stack,
+ struct klp_func *func, struct pt_regs *regs)
+{
+ klp_arch_set_pc(regs, (unsigned long)func->new_func);
+}
+
+static struct klp_cmodel klp_simple_model = {
+ .id = KLP_CM_SIMPLE,
+ .stub = klp_simple_stub,
+};
+
+void klp_init_cmodel_simple(void)
+{
+ klp_register_cmodel(&klp_simple_model);
+}
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 2da42be84452..ab6a36688c93 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -58,6 +58,7 @@ static DEFINE_MUTEX(klp_mutex);
static LIST_HEAD(klp_patches);
static LIST_HEAD(klp_ops);
+static LIST_HEAD(klp_cmodel_list);
static struct kobject *klp_root_kobj;
@@ -319,18 +320,16 @@ static void notrace klp_ftrace_handler(unsigned long ip,
struct ftrace_ops *fops,
struct pt_regs *regs)
{
- struct klp_ops *ops;
+ struct klp_ops *ops = container_of(fops, struct klp_ops, fops);
struct klp_func *func;
- ops = container_of(fops, struct klp_ops, fops);
-
rcu_read_lock();
func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
stack_node);
if (WARN_ON_ONCE(!func))
goto unlock;
- klp_arch_set_pc(regs, (unsigned long)func->new_func);
+ func->stub(&ops->func_stack, func, regs);
unlock:
rcu_read_unlock();
}
@@ -720,6 +719,7 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
{
INIT_LIST_HEAD(&func->stack_node);
func->state = KLP_DISABLED;
+ func->stub = klp_object_to_patch(obj)->cmodel->stub;
return kobject_init_and_add(&func->kobj, &klp_ktype_func,
&obj->kobj, "%s", func->old_name);
@@ -790,13 +790,28 @@ free:
static int klp_init_patch(struct klp_patch *patch)
{
struct klp_object *obj;
+ struct klp_cmodel *cm, *cmodel = NULL;
int ret;
if (!patch->objs)
return -EINVAL;
+ list_for_each_entry(cm, &klp_cmodel_list, list) {
+ if (patch->cmodel_id == cm->id) {
+ cmodel = cm;
+ break;
+ }
+ }
+
+ if (!cmodel) {
+ pr_err("%s: patch '%ps' requires unknown consistency model %d\n",
+ __func__, patch, patch->cmodel_id);
+ return -EINVAL;
+ }
+
mutex_lock(&klp_mutex);
+ patch->cmodel = cmodel;
patch->state = KLP_DISABLED;
ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
@@ -893,6 +908,18 @@ int klp_register_patch(struct klp_patch *patch)
}
EXPORT_SYMBOL_GPL(klp_register_patch);
+/**
+ * klp_register_cmodel - register a consistency model
+ * @model: model to register
+ *
+ * This functions has to be synchronously called before klp_root_kobj is
+ * created in klp_init since we use no locking.
+ */
+void klp_register_cmodel(struct klp_cmodel *model)
+{
+ list_add_tail(&model->list, &klp_cmodel_list);
+}
+
static void klp_module_notify_coming(struct klp_patch *patch,
struct klp_object *obj)
{
@@ -993,6 +1020,8 @@ static int klp_init(void)
return -EINVAL;
}
+ klp_init_cmodel_simple();
+
ret = register_module_notifier(&klp_module_nb);
if (ret)
return ret;
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
index fb8c8614e728..48621de040db 100644
--- a/samples/livepatch/livepatch-sample.c
+++ b/samples/livepatch/livepatch-sample.c
@@ -63,6 +63,7 @@ static struct klp_object objs[] = {
static struct klp_patch patch = {
.mod = THIS_MODULE,
.objs = objs,
+ .cmodel_id = KLP_CM_SIMPLE,
};
static int livepatch_init(void)
--
2.3.5
We separate the patching in two phases:
* preparation: this one can fail, but in the presence of the
kgraft-like patching can be handled easily.
* patching: this cannot fail, so that kgraft-like patching need not
handle failures in a very complicated way
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 11 ++++++-
include/linux/sched.h | 5 +++
kernel/livepatch/core.c | 84 +++++++++++++++++++++++++++++++----------------
3 files changed, 70 insertions(+), 30 deletions(-)
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 009f308ff756..add2b1bd1cce 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -53,9 +53,18 @@ struct klp_cmodel {
struct pt_regs *regs);
};
+/**
+ * enum klp_state -- state of patches, objects, functions
+ * @KLP_DISABLED: completely disabled
+ * @KLP_ENABLED: completely enabled (applied)
+ * @KLP_PREPARED: being applied
+ *
+ * @KLP_DISABLED & @KLP_ENABLED are part of the /sys ABI
+ */
enum klp_state {
KLP_DISABLED,
- KLP_ENABLED
+ KLP_ENABLED,
+ KLP_PREPARED,
};
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4c0555261cb1..6b2f4c01c516 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3086,6 +3086,11 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
#endif /* CONFIG_MEMCG */
#if IS_ENABLED(CONFIG_LIVEPATCH)
+/*
+ * This function is called only when the given thread is not running
+ * any patched function. Therefore the flag might be cleared without
+ * klp_kgr_state_lock.
+ */
static inline void klp_kgraft_mark_task_safe(struct task_struct *p)
{
clear_tsk_thread_flag(p, TIF_KGR_IN_PROGRESS);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index ab6a36688c93..87d94aadfebf 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -41,11 +41,13 @@
* @node: node for the global klp_ops list
* @func_stack: list head for the stack of klp_func's (active func is on top)
* @fops: registered ftrace ops struct
+ * @old_addr: the address of the function which is beine patched
*/
struct klp_ops {
struct list_head node;
struct list_head func_stack;
struct ftrace_ops fops;
+ unsigned long old_addr;
};
/*
@@ -65,12 +67,9 @@ static struct kobject *klp_root_kobj;
static struct klp_ops *klp_find_ops(unsigned long old_addr)
{
struct klp_ops *ops;
- struct klp_func *func;
list_for_each_entry(ops, &klp_ops, node) {
- func = list_first_entry(&ops->func_stack, struct klp_func,
- stack_node);
- if (func->old_addr == old_addr)
+ if (ops->old_addr == old_addr)
return ops;
}
@@ -326,11 +325,8 @@ static void notrace klp_ftrace_handler(unsigned long ip,
rcu_read_lock();
func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
stack_node);
- if (WARN_ON_ONCE(!func))
- goto unlock;
-
- func->stub(&ops->func_stack, func, regs);
-unlock:
+ if (func)
+ func->stub(&ops->func_stack, func, regs);
rcu_read_unlock();
}
@@ -338,18 +334,20 @@ static void klp_disable_func(struct klp_func *func)
{
struct klp_ops *ops;
- WARN_ON(func->state != KLP_ENABLED);
+ WARN_ON(func->state == KLP_DISABLED);
WARN_ON(!func->old_addr);
ops = klp_find_ops(func->old_addr);
if (WARN_ON(!ops))
return;
- if (list_is_singular(&ops->func_stack)) {
+ if (list_empty(&ops->func_stack) ||
+ list_is_singular(&ops->func_stack)) {
WARN_ON(unregister_ftrace_function(&ops->fops));
WARN_ON(ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0));
- list_del_rcu(&func->stack_node);
+ if (!list_empty(&ops->func_stack))
+ list_del_rcu(&func->stack_node);
list_del(&ops->node);
kfree(ops);
} else {
@@ -359,7 +357,7 @@ static void klp_disable_func(struct klp_func *func)
func->state = KLP_DISABLED;
}
-static int klp_enable_func(struct klp_func *func)
+static int klp_prepare_enable_func(struct klp_func *func)
{
struct klp_ops *ops;
int ret;
@@ -376,6 +374,7 @@ static int klp_enable_func(struct klp_func *func)
if (!ops)
return -ENOMEM;
+ ops->old_addr = func->old_addr;
ops->fops.func = klp_ftrace_handler;
ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS |
FTRACE_OPS_FL_DYNAMIC |
@@ -384,7 +383,6 @@ static int klp_enable_func(struct klp_func *func)
list_add(&ops->node, &klp_ops);
INIT_LIST_HEAD(&ops->func_stack);
- list_add_rcu(&func->stack_node, &ops->func_stack);
ret = ftrace_set_filter_ip(&ops->fops, func->old_addr, 0, 0);
if (ret) {
@@ -400,35 +398,43 @@ static int klp_enable_func(struct klp_func *func)
ftrace_set_filter_ip(&ops->fops, func->old_addr, 1, 0);
goto err;
}
-
-
- } else {
- list_add_rcu(&func->stack_node, &ops->func_stack);
}
- func->state = KLP_ENABLED;
+ func->state = KLP_PREPARED;
return 0;
err:
- list_del_rcu(&func->stack_node);
list_del(&ops->node);
kfree(ops);
return ret;
}
+static void klp_enable_func(struct klp_func *func)
+{
+ struct klp_ops *ops;
+
+ ops = klp_find_ops(func->old_addr);
+ if (WARN_ON(!ops)) /* we have just added that, so? */
+ return;
+
+ list_add_rcu(&func->stack_node, &ops->func_stack);
+
+ func->state = KLP_ENABLED;
+}
+
static void klp_disable_object(struct klp_object *obj)
{
struct klp_func *func;
klp_for_each_func(obj, func)
- if (func->state == KLP_ENABLED)
+ if (func->state != KLP_DISABLED)
klp_disable_func(func);
obj->state = KLP_DISABLED;
}
-static int klp_enable_object(struct klp_object *obj)
+static int klp_prepare_enable_object(struct klp_object *obj)
{
struct klp_func *func;
int ret;
@@ -440,17 +446,27 @@ static int klp_enable_object(struct klp_object *obj)
return -EINVAL;
klp_for_each_func(obj, func) {
- ret = klp_enable_func(func);
+ ret = klp_prepare_enable_func(func);
if (ret) {
klp_disable_object(obj);
return ret;
}
}
- obj->state = KLP_ENABLED;
+ obj->state = KLP_PREPARED;
return 0;
}
+static void klp_enable_object(struct klp_object *obj)
+{
+ struct klp_func *func;
+
+ klp_for_each_func(obj, func)
+ klp_enable_func(func);
+
+ obj->state = KLP_ENABLED;
+}
+
static int __klp_disable_patch(struct klp_patch *patch)
{
struct klp_object *obj;
@@ -463,7 +479,7 @@ static int __klp_disable_patch(struct klp_patch *patch)
pr_notice("disabling patch '%s'\n", patch->mod->name);
klp_for_each_object(patch, obj) {
- if (obj->state == KLP_ENABLED)
+ if (obj->state == KLP_PREPARED || obj->state == KLP_ENABLED)
klp_disable_object(obj);
}
@@ -526,11 +542,18 @@ static int __klp_enable_patch(struct klp_patch *patch)
if (!klp_is_object_loaded(obj))
continue;
- ret = klp_enable_object(obj);
+ ret = klp_prepare_enable_object(obj);
if (ret)
goto unregister;
}
+ klp_for_each_object(patch, obj) {
+ if (!klp_is_object_loaded(obj))
+ continue;
+
+ klp_enable_object(obj);
+ }
+
patch->state = KLP_ENABLED;
return 0;
@@ -937,10 +960,13 @@ static void klp_module_notify_coming(struct klp_patch *patch,
pr_notice("applying patch '%s' to loading module '%s'\n",
pmod->name, mod->name);
- ret = klp_enable_object(obj);
- if (!ret)
- return;
+ ret = klp_prepare_enable_object(obj);
+ if (ret)
+ goto err;
+
+ klp_enable_object(obj);
+ return;
err:
pr_warn("failed to apply patch '%s' to module '%s' (%d)\n",
pmod->name, mod->name, ret);
--
2.3.5
We will need to propagate few different target states in the next
patch adding kgraft-like patching. And this will make it easy. So this
is only a preparation which does not really change the behaviour.
Signed-off-by: Jiri Slaby <[email protected]>
---
kernel/livepatch/core.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 87d94aadfebf..5e89ea74cadb 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -330,7 +330,7 @@ static void notrace klp_ftrace_handler(unsigned long ip,
rcu_read_unlock();
}
-static void klp_disable_func(struct klp_func *func)
+static void klp_disable_func(struct klp_func *func, enum klp_state dstate)
{
struct klp_ops *ops;
@@ -354,7 +354,7 @@ static void klp_disable_func(struct klp_func *func)
list_del_rcu(&func->stack_node);
}
- func->state = KLP_DISABLED;
+ func->state = dstate;
}
static int klp_prepare_enable_func(struct klp_func *func)
@@ -410,7 +410,7 @@ err:
return ret;
}
-static void klp_enable_func(struct klp_func *func)
+static void klp_enable_func(struct klp_func *func, enum klp_state dstate)
{
struct klp_ops *ops;
@@ -420,18 +420,18 @@ static void klp_enable_func(struct klp_func *func)
list_add_rcu(&func->stack_node, &ops->func_stack);
- func->state = KLP_ENABLED;
+ func->state = dstate;
}
-static void klp_disable_object(struct klp_object *obj)
+static void klp_disable_object(struct klp_object *obj, enum klp_state dstate)
{
struct klp_func *func;
klp_for_each_func(obj, func)
if (func->state != KLP_DISABLED)
- klp_disable_func(func);
+ klp_disable_func(func, dstate);
- obj->state = KLP_DISABLED;
+ obj->state = dstate;
}
static int klp_prepare_enable_object(struct klp_object *obj)
@@ -448,7 +448,7 @@ static int klp_prepare_enable_object(struct klp_object *obj)
klp_for_each_func(obj, func) {
ret = klp_prepare_enable_func(func);
if (ret) {
- klp_disable_object(obj);
+ klp_disable_object(obj, KLP_DISABLED);
return ret;
}
}
@@ -457,14 +457,14 @@ static int klp_prepare_enable_object(struct klp_object *obj)
return 0;
}
-static void klp_enable_object(struct klp_object *obj)
+static void klp_enable_object(struct klp_object *obj, enum klp_state dstate)
{
struct klp_func *func;
klp_for_each_func(obj, func)
- klp_enable_func(func);
+ klp_enable_func(func, dstate);
- obj->state = KLP_ENABLED;
+ obj->state = dstate;
}
static int __klp_disable_patch(struct klp_patch *patch)
@@ -480,7 +480,7 @@ static int __klp_disable_patch(struct klp_patch *patch)
klp_for_each_object(patch, obj) {
if (obj->state == KLP_PREPARED || obj->state == KLP_ENABLED)
- klp_disable_object(obj);
+ klp_disable_object(obj, KLP_DISABLED);
}
patch->state = KLP_DISABLED;
@@ -551,7 +551,7 @@ static int __klp_enable_patch(struct klp_patch *patch)
if (!klp_is_object_loaded(obj))
continue;
- klp_enable_object(obj);
+ klp_enable_object(obj, KLP_ENABLED);
}
patch->state = KLP_ENABLED;
@@ -964,7 +964,7 @@ static void klp_module_notify_coming(struct klp_patch *patch,
if (ret)
goto err;
- klp_enable_object(obj);
+ klp_enable_object(obj, KLP_ENABLED);
return;
err:
@@ -984,7 +984,7 @@ static void klp_module_notify_going(struct klp_patch *patch,
pr_notice("reverting patch '%s' on unloading module '%s'\n",
pmod->name, mod->name);
- klp_disable_object(obj);
+ klp_disable_object(obj, KLP_DISABLED);
disabled:
klp_free_object_loaded(obj);
--
2.3.5
This adds a simplified kGraft to be a consistency model. kGraft builds
on Live Kernel Patching, but adds RCU-like update of code that does
not require stopping the kernel.
The same as the Live Kernel Patches, a kGraft patch is a kernel module
and fully relies on the in-kernel module loader to link the new code
with the kernel.
While kGraft is, by choice, limited to replacing whole functions and
constants they reference, this does not limit the set of code patches
that can be applied significantly.
IRQs are not specially handled in this patch. IRQs inherit the flag
from the process they interrupted. This can be easily tackled down by
porting the pieces of the code from the SUSE's kGraft implementation
which was not based on LKP yet.
Kthreads are ignored in this patch too. We need proper parking for all
of them to be implemented first. This is a work in progress and will
gradually improve the situation around kthreads&freezer/parker which
needs some care anyway. This means kthreads always call the old code,
but do not block finalization.
The operations are as follows:
ENABLE
1) !(stub registered) => register stub
2) state = KLP_PREPARED
3) write_lock(klp_kgr_state_lock)
4) add func to klp_ops
5) state = KLP_ASYNC_ENABLED
6) set tasks's in_progress
7) write_unlock(klp_kgr_state_lock);
=== async ===
8) state = KLP_ENABLED
DISABLE
1) write_lock(klp_kgr_state_lock)
2) state = KLP_ASYNC_DISABLED
3) set tasks's in_progress
4) write_unlock(klp_kgr_state_lock);
=== async ===
5) remove func from klp_ops
6) no more funcs in klp_ops => unregister stub
7) state = KLP_DISABLED
STUB
1) fstate == KLP_DISABLED || KLP_PREPARED => go_old = true
2) fstate == KLP_ASYNC_ENABLED => go_old = (task in_progress?)
3) fstate == KLP_ENABLED => go_old = false
4) fstate == KLP_ASYNC_DISABLED => go_old = !(task in_progress?)
5) go_old => take the next fn in the klp_ops list
Signed-off-by: Jiri Slaby <[email protected]>
---
include/linux/livepatch.h | 17 ++++
kernel/livepatch/Makefile | 2 +-
kernel/livepatch/cmodel-kgraft.c | 181 +++++++++++++++++++++++++++++++++++
kernel/livepatch/core.c | 119 ++++++++++++++++++++---
samples/livepatch/livepatch-sample.c | 2 +-
5 files changed, 308 insertions(+), 13 deletions(-)
create mode 100644 kernel/livepatch/cmodel-kgraft.c
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index add2b1bd1cce..dcf5500b3999 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -29,6 +29,7 @@
#include <asm/livepatch.h>
+struct klp_patch;
struct klp_func;
/**
@@ -37,20 +38,28 @@ struct klp_func;
enum klp_cmodel_id {
KLP_CM_INVALID = 0,
KLP_CM_SIMPLE, /* LEAVE_FUNCTION and SWITCH_FUNCTION */
+ KLP_CM_KGRAFT, /* LEAVE_KERNEL and SWITCH_THREAD */
};
/**
* struct klp_cmodel - implementation of a consistency model
* @id: id of this model (from enum klp_cmodel_id)
+ * @async_finish: cmodel finishes asynchronously
* @list: member of klp_cmodel_list
* @stub: what to use as an ftrace handler (annotate with notrace!)
+ * @pre_patch: hook to run before patching starts (optional)
+ * @post_patch: hook to run after patching finishes (optional)
*/
struct klp_cmodel {
const enum klp_cmodel_id id;
+ bool async_finish;
struct list_head list;
void (*stub)(struct list_head *func_stack, struct klp_func *func,
struct pt_regs *regs);
+
+ void (*pre_patch)(struct klp_patch *);
+ void (*post_patch)(struct klp_patch *);
};
/**
@@ -58,6 +67,8 @@ struct klp_cmodel {
* @KLP_DISABLED: completely disabled
* @KLP_ENABLED: completely enabled (applied)
* @KLP_PREPARED: being applied
+ * @KLP_ASYNC_DISABLED: in the process of disabling (will become @KLP_DISABLED)
+ * @KLP_ASYNC_ENABLED: in the process of enabling (will become @KLP_ENABLED)
*
* @KLP_DISABLED & @KLP_ENABLED are part of the /sys ABI
*/
@@ -65,6 +76,10 @@ enum klp_state {
KLP_DISABLED,
KLP_ENABLED,
KLP_PREPARED,
+
+ KLP_ASYNC = 0x100,
+ KLP_ASYNC_DISABLED = KLP_DISABLED | KLP_ASYNC,
+ KLP_ASYNC_ENABLED = KLP_ENABLED | KLP_ASYNC,
};
/**
@@ -184,8 +199,10 @@ int klp_register_patch(struct klp_patch *);
int klp_unregister_patch(struct klp_patch *);
int klp_enable_patch(struct klp_patch *);
int klp_disable_patch(struct klp_patch *);
+void klp_async_patch_done(void);
void klp_init_cmodel_simple(void);
+void klp_init_cmodel_kgraft(void);
void klp_register_cmodel(struct klp_cmodel *);
#endif /* CONFIG_LIVEPATCH */
diff --git a/kernel/livepatch/Makefile b/kernel/livepatch/Makefile
index 926533777247..0c7fd8361dc3 100644
--- a/kernel/livepatch/Makefile
+++ b/kernel/livepatch/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_LIVEPATCH) += livepatch.o
-livepatch-objs := core.o cmodel-simple.o
+livepatch-objs := core.o cmodel-simple.o cmodel-kgraft.o
diff --git a/kernel/livepatch/cmodel-kgraft.c b/kernel/livepatch/cmodel-kgraft.c
new file mode 100644
index 000000000000..196b08823f73
--- /dev/null
+++ b/kernel/livepatch/cmodel-kgraft.c
@@ -0,0 +1,181 @@
+/*
+ * cmodels-kgraft.c - KLP kGraft Consistency Model
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/ftrace.h>
+#include <linux/livepatch.h>
+#include <linux/rculist.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#define KGRAFT_TIMEOUT 2
+
+static void klp_kgraft_work_fn(struct work_struct *work);
+
+static struct workqueue_struct *klp_kgraft_wq;
+static DECLARE_DELAYED_WORK(klp_kgraft_work, klp_kgraft_work_fn);
+/*
+ * This lock protects manipulation of func->state and TIF_KGR_IN_PROGRESS
+ * flag when a patch is being added or removed. kGraft stub has to see
+ * both values in a consistent state for the whole patch and all threads.
+ */
+static DEFINE_RWLOCK(klp_kgr_state_lock);
+
+static void notrace klp_kgraft_stub(struct list_head *func_stack,
+ struct klp_func *func, struct pt_regs *regs)
+{
+ unsigned long flags;
+ bool go_old;
+
+ if (current->flags & PF_KTHREAD)
+ return;
+
+ /*
+ * The corresponding write lock is taken only when functions are moved
+ * to _ASYNC_ states and _IN_PROGRESS flag is set for all threads.
+ */
+ read_lock_irqsave(&klp_kgr_state_lock, flags);
+
+ switch (func->state) {
+ case KLP_DISABLED:
+ case KLP_PREPARED:
+ go_old = true;
+ break;
+ case KLP_ASYNC_ENABLED:
+ go_old = klp_kgraft_task_in_progress(current);
+ break;
+ case KLP_ENABLED:
+ go_old = false;
+ break;
+ case KLP_ASYNC_DISABLED:
+ go_old = !klp_kgraft_task_in_progress(current);
+ break;
+ /* default: none to catch missing states at compile time! */
+ }
+
+ read_unlock_irqrestore(&klp_kgr_state_lock, flags);
+
+ if (go_old)
+ func = list_entry_rcu(list_next_rcu(&func->stack_node),
+ struct klp_func, stack_node);
+
+ /* did we hit the bottom => run the original */
+ if (&func->stack_node != func_stack)
+ klp_arch_set_pc(regs, (unsigned long)func->new_func);
+}
+
+static void klp_kgraft_pre_patch(struct klp_patch *patch)
+ __acquires(&klp_kgr_state_lock)
+{
+ write_lock_irq(&klp_kgr_state_lock);
+}
+
+static bool klp_kgraft_still_patching(void)
+{
+ struct task_struct *p;
+ bool failed = false;
+
+ /*
+ * We do not need to take klp_kgr_state_lock here.
+ * Any race will just delay finalization.
+ */
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ if (klp_kgraft_task_in_progress(p)) {
+ failed = true;
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ return failed;
+}
+
+static void klp_kgraft_work_fn(struct work_struct *work)
+{
+ static bool printed = false;
+
+ if (klp_kgraft_still_patching()) {
+ if (!printed) {
+ pr_info("kGraft still in progress after timeout, will keep trying every %d seconds\n",
+ KGRAFT_TIMEOUT);
+ printed = true;
+ }
+ /* recheck again later */
+ queue_delayed_work(klp_kgraft_wq, &klp_kgraft_work,
+ KGRAFT_TIMEOUT * HZ);
+ return;
+ }
+
+ /*
+ * victory, patching finished, put everything back in shape
+ * with as less performance impact as possible again
+ */
+ klp_async_patch_done();
+ pr_info("kGraft succeeded\n");
+
+ printed = false;
+}
+
+static void klp_kgraft_handle_processes(void)
+{
+ struct task_struct *p;
+
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ /* kthreads cannot be patched yet */
+ if (p->flags & PF_KTHREAD)
+ continue;
+
+ klp_kgraft_mark_task_in_progress(p);
+ }
+ read_unlock(&tasklist_lock);
+}
+
+static void klp_kgraft_post_patch(struct klp_patch *patch)
+ __releases(&klp_kgr_state_lock)
+{
+ klp_kgraft_handle_processes();
+ write_unlock_irq(&klp_kgr_state_lock);
+
+ /*
+ * give everyone time to exit the kernel, and check after a while
+ */
+ queue_delayed_work(klp_kgraft_wq, &klp_kgraft_work,
+ KGRAFT_TIMEOUT * HZ);
+}
+
+static struct klp_cmodel kgraft_model = {
+ .id = KLP_CM_KGRAFT,
+ .async_finish = true,
+ .stub = klp_kgraft_stub,
+ .pre_patch = klp_kgraft_pre_patch,
+ .post_patch = klp_kgraft_post_patch,
+};
+
+void klp_init_cmodel_kgraft(void)
+{
+ klp_kgraft_wq = create_singlethread_workqueue("kgraft");
+ if (!klp_kgraft_wq) {
+ pr_err("kGraft: cannot allocate a work queue, aborting!\n");
+ return;
+ }
+
+ klp_register_cmodel(&kgraft_model);
+}
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 5e89ea74cadb..8f6fa8c8f593 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -58,6 +58,12 @@ struct klp_ops {
*/
static DEFINE_MUTEX(klp_mutex);
+/*
+ * current patch in progress. Access only under klp_mutex.
+ * This is useful to know only for async cmodels.
+ */
+static struct klp_patch *klp_async_patch;
+
static LIST_HEAD(klp_patches);
static LIST_HEAD(klp_ops);
static LIST_HEAD(klp_cmodel_list);
@@ -330,6 +336,27 @@ static void notrace klp_ftrace_handler(unsigned long ip,
rcu_read_unlock();
}
+static void klp_patch_change_state(struct klp_patch *patch, enum klp_state from,
+ enum klp_state to)
+{
+ struct klp_object *obj;
+ struct klp_func *func;
+
+ if (patch->state != from)
+ return;
+
+ klp_for_each_object(patch, obj)
+ if (klp_is_object_loaded(obj) && obj->state == from) {
+ klp_for_each_func(obj, func)
+ if (func->state == from)
+ func->state = to;
+
+ obj->state = to;
+ }
+
+ patch->state = to;
+}
+
static void klp_disable_func(struct klp_func *func, enum klp_state dstate)
{
struct klp_ops *ops;
@@ -467,10 +494,24 @@ static void klp_enable_object(struct klp_object *obj, enum klp_state dstate)
obj->state = dstate;
}
-static int __klp_disable_patch(struct klp_patch *patch)
+static void klp_disable_patch_real(struct klp_patch *patch)
{
struct klp_object *obj;
+ klp_for_each_object(patch, obj) {
+ if (obj->state == KLP_PREPARED || obj->state == KLP_ENABLED ||
+ obj->state == KLP_ASYNC_DISABLED)
+ klp_disable_object(obj, KLP_DISABLED);
+ }
+
+ patch->state = KLP_DISABLED;
+}
+
+static int __klp_disable_patch(struct klp_patch *patch)
+{
+ if (klp_async_patch)
+ return -EBUSY;
+
/* enforce stacking: only the last enabled patch can be disabled */
if (!list_is_last(&patch->list, &klp_patches) &&
list_next_entry(patch, list)->state == KLP_ENABLED)
@@ -478,12 +519,23 @@ static int __klp_disable_patch(struct klp_patch *patch)
pr_notice("disabling patch '%s'\n", patch->mod->name);
- klp_for_each_object(patch, obj) {
- if (obj->state == KLP_PREPARED || obj->state == KLP_ENABLED)
- klp_disable_object(obj, KLP_DISABLED);
+ /*
+ * Do only fast and non-blocking operations between pre_patch
+ * and post_patch callbacks. They might take a lock and block
+ * patched functions!
+ */
+ if (patch->cmodel->pre_patch)
+ patch->cmodel->pre_patch(patch);
+
+ if (patch->cmodel->async_finish) {
+ klp_async_patch = patch;
+ klp_patch_change_state(patch, KLP_ENABLED, KLP_ASYNC_DISABLED);
+ } else {
+ klp_disable_patch_real(patch);
}
- patch->state = KLP_DISABLED;
+ if (patch->cmodel->post_patch)
+ patch->cmodel->post_patch(patch);
return 0;
}
@@ -520,14 +572,35 @@ err:
}
EXPORT_SYMBOL_GPL(klp_disable_patch);
+/**
+ * klp_async_patch_done - asynchronous patch should be finished
+ */
+void klp_async_patch_done(void)
+{
+ mutex_lock(&klp_mutex);
+ if (klp_async_patch->state == KLP_ASYNC_ENABLED) {
+ klp_patch_change_state(klp_async_patch, KLP_ASYNC_ENABLED,
+ KLP_ENABLED);
+ } else {
+ klp_disable_patch_real(klp_async_patch);
+ }
+
+ klp_async_patch = NULL;
+ mutex_unlock(&klp_mutex);
+}
+
static int __klp_enable_patch(struct klp_patch *patch)
{
struct klp_object *obj;
- int ret;
+ enum klp_state dstate;
+ int ret = 0;
if (WARN_ON(patch->state != KLP_DISABLED))
return -EINVAL;
+ if (klp_async_patch)
+ return -EBUSY;
+
/* enforce stacking: only the first disabled patch can be enabled */
if (patch->list.prev != &klp_patches &&
list_prev_entry(patch, list)->state == KLP_DISABLED)
@@ -547,19 +620,35 @@ static int __klp_enable_patch(struct klp_patch *patch)
goto unregister;
}
+ /*
+ * Do only fast and non-blocking operations between pre_patch
+ * and post_patch callbacks. They might take a lock and block
+ * patched functions!
+ */
+ if (patch->cmodel->pre_patch)
+ patch->cmodel->pre_patch(patch);
+
+ dstate = patch->cmodel->async_finish ? KLP_ASYNC_ENABLED : KLP_ENABLED;
+
klp_for_each_object(patch, obj) {
if (!klp_is_object_loaded(obj))
continue;
- klp_enable_object(obj, KLP_ENABLED);
+ klp_enable_object(obj, dstate);
}
- patch->state = KLP_ENABLED;
+ if (patch->cmodel->async_finish)
+ klp_async_patch = patch;
- return 0;
+ patch->state = dstate;
+
+ if (patch->cmodel->post_patch)
+ patch->cmodel->post_patch(patch);
unregister:
- WARN_ON(__klp_disable_patch(patch));
+ if (ret)
+ klp_disable_patch_real(patch);
+
return ret;
}
@@ -948,6 +1037,7 @@ static void klp_module_notify_coming(struct klp_patch *patch,
{
struct module *pmod = patch->mod;
struct module *mod = obj->mod;
+ enum klp_state dstate;
int ret;
ret = klp_init_object_loaded(patch, obj);
@@ -964,7 +1054,13 @@ static void klp_module_notify_coming(struct klp_patch *patch,
if (ret)
goto err;
- klp_enable_object(obj, KLP_ENABLED);
+ /*
+ * Put the module to the ASYNC state in case we are in the transition.
+ * The module still can affect behaviour of running processes.
+ */
+ dstate = klp_async_patch ? klp_async_patch->state : KLP_ENABLED;
+
+ klp_enable_object(obj, dstate);
return;
err:
@@ -1047,6 +1143,7 @@ static int klp_init(void)
}
klp_init_cmodel_simple();
+ klp_init_cmodel_kgraft();
ret = register_module_notifier(&klp_module_nb);
if (ret)
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
index 48621de040db..25289083deac 100644
--- a/samples/livepatch/livepatch-sample.c
+++ b/samples/livepatch/livepatch-sample.c
@@ -63,7 +63,7 @@ static struct klp_object objs[] = {
static struct klp_patch patch = {
.mod = THIS_MODULE,
.objs = objs,
- .cmodel_id = KLP_CM_SIMPLE,
+ .cmodel_id = KLP_CM_KGRAFT,
};
static int livepatch_init(void)
--
2.3.5
From: Miroslav Benes <[email protected]>
kGraft consistency model is of LEAVE_KERNEL and SWITCH_THREAD. This
means that all tasks in the system have to be marked one by one as safe
to call a new patched function. Safe place is on the boundary between
kernel and userspace. The patching waits for all tasks to cross this
boundary and finishes the process afterwards.
The problem is that a task can block the finalization of patching
process for quite a long time, if not forever. The task could sleep
somewhere in the kernel or could be running in the userspace with no
prospect of entering the kernel and thus going through the safe place.
Luckily we can force the task to do that by sending it a fake signal,
that is a signal with no data in signal pending structures (no handler,
no sign of proper signal delivered). Suspend/freezer use this to
freeze the tasks as well. The task gets TIF_SIGPENDING set and is
woken up (if it has been sleeping in the kernel before) or kicked by
rescheduling IPI (if it was running on other CPU). This causes the task
to go to kernel/userspace boundary where the signal would be handled and
the task would be marked as safe in terms of live patching.
There are tasks which are not affected by this technique though. The
fake signal is not sent to kthreads. They should be handled in a
different way. Also if the task is in TASK_RUNNING state but not
currently running on some CPU it doesn't get the IPI, but it would
eventually handle the signal anyway. Last, if the task runs in the kernel
(in TASK_RUNNING state) it gets the IPI, but the signal is not handled
on return from the interrupt. It would be handled on return to the
userspace in the future.
If the task was sleeping in a syscall it would be woken by our fake
signal, it would check if TIF_SIGPENDING is set (by calling
signal_pending() predicate) and return ERESTART* or EINTR. Syscalls with
ERESTART* return values are restarted in case of the fake signal (see
do_signal()). EINTR is propagated back to the userspace program. This
could disturb the program, but...
* each process dealing with signals should react accordingly to EINTR
return values.
* syscalls returning EINTR happen to be quite common situation in the
system even if no fake signal is sent.
* freezer sends the fake signal and does not deal with EINTR anyhow.
Thus EINTR values are returned when the system is resumed.
The very safe marking is done in entry_64.S on syscall and
interrupt/exception exit paths.
Signed-off-by: Miroslav Benes <[email protected]>
Reviewed-by: Jiri Kosina <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Signed-off-by: Jiri Slaby <[email protected]>
---
kernel/livepatch/cmodel-kgraft.c | 23 +++++++++++++++++++++++
kernel/signal.c | 3 ++-
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/kernel/livepatch/cmodel-kgraft.c b/kernel/livepatch/cmodel-kgraft.c
index 196b08823f73..fd041ca30161 100644
--- a/kernel/livepatch/cmodel-kgraft.c
+++ b/kernel/livepatch/cmodel-kgraft.c
@@ -107,6 +107,27 @@ static bool klp_kgraft_still_patching(void)
return failed;
}
+static void klp_kgraft_send_fake_signal(void)
+{
+ struct task_struct *p;
+ unsigned long flags;
+
+ read_lock(&tasklist_lock);
+ for_each_process(p) {
+ /*
+ * send fake signal to all non-kthread processes which are still
+ * not migrated
+ */
+ if (!(p->flags & PF_KTHREAD) &&
+ klp_kgraft_task_in_progress(p) &&
+ lock_task_sighand(p, &flags)) {
+ signal_wake_up(p, 0);
+ unlock_task_sighand(p, &flags);
+ }
+ }
+ read_unlock(&tasklist_lock);
+}
+
static void klp_kgraft_work_fn(struct work_struct *work)
{
static bool printed = false;
@@ -117,6 +138,8 @@ static void klp_kgraft_work_fn(struct work_struct *work)
KGRAFT_TIMEOUT);
printed = true;
}
+ /* send fake signal */
+ klp_kgraft_send_fake_signal();
/* recheck again later */
queue_delayed_work(klp_kgraft_wq, &klp_kgraft_work,
KGRAFT_TIMEOUT * HZ);
diff --git a/kernel/signal.c b/kernel/signal.c
index d51c5ddd855c..5a3f56a69122 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -157,7 +157,8 @@ void recalc_sigpending_and_wake(struct task_struct *t)
void recalc_sigpending(void)
{
- if (!recalc_sigpending_tsk(current) && !freezing(current))
+ if (!recalc_sigpending_tsk(current) && !freezing(current) &&
+ !klp_kgraft_task_in_progress(current))
clear_thread_flag(TIF_SIGPENDING);
}
--
2.3.5
Hello,
this is a patchset which teaches the Kernel Live Patching to be
consistent when patching with the use of the kGraft approach [1].
Besides helpers, the set adds support for consistency models,
implements the kGraft consistency model, and finally allows for
sending signals to complete the patching process quickly (but still
safely).
Currently we have only two consistency models:
* none
* kGraft
None is the one which was present before the patchset and that one
indeed guarantees no consistency at all. LEAVE_FUNCTION and
SWITCH_FUNCTION as was described earlier [2]. kGraft is based on the
well-known RCU principle and every process is converted to the patched
world on its own, safely. kGraft is LEAVE_KERNEL and SWITCH_THREAD.
Signal "broadcasting" is added in the last patch of the series and is
based on the previous public discussions [3].
As usual, see the respective patches for more details about a
particular patch.
Since the KLP supports x86_64 and s390 currently, the same holds for
this patchset. It was tested on both of these arch's.
Thanks go to Petr Mladek, Mirek Benes, Jiri Kosina, and others, who
worked hard to improved the set much.
[1] https://lkml.org/lkml/2014/6/25/284
[2] https://lkml.org/lkml/2014/11/7/354
[3] https://lkml.org/lkml/2015/2/21/119
Jiri Slaby (7):
livepatch: introduce patch/func-walking helpers
livepatch: add klp_*_to_patch helpers
livepatch: add kgr infrastructure
livepatch: teach klp about consistency models
livepatch: do not allow failure while really patching
livepatch: propagate the patch status to functions
livepatch: add kgraft-like patching
Miroslav Benes (2):
livepatch: make kobject in klp_object statically allocated
livepatch: send a fake signal to all tasks
arch/s390/include/asm/thread_info.h | 2 +
arch/s390/kernel/Makefile | 1 +
arch/s390/kernel/entry.S | 11 ++
arch/s390/kernel/livepatch.c | 14 ++
arch/x86/include/asm/thread_info.h | 11 +-
arch/x86/kernel/ptrace.c | 7 +
arch/x86/kernel/signal.c | 5 +
fs/proc/base.c | 14 ++
include/linux/livepatch.h | 81 +++++++++-
include/linux/sched.h | 28 ++++
kernel/livepatch/Makefile | 2 +-
kernel/livepatch/cmodel-kgraft.c | 204 +++++++++++++++++++++++++
kernel/livepatch/cmodel-simple.c | 39 +++++
kernel/livepatch/core.c | 284 +++++++++++++++++++++++++++--------
kernel/signal.c | 3 +-
samples/livepatch/livepatch-sample.c | 1 +
16 files changed, 638 insertions(+), 69 deletions(-)
create mode 100644 arch/s390/kernel/livepatch.c
create mode 100644 kernel/livepatch/cmodel-kgraft.c
create mode 100644 kernel/livepatch/cmodel-simple.c
--
2.3.5
On Mon, 4 May 2015 13:40:20 +0200
Jiri Slaby <[email protected]> wrote:
> This means:
> * add a per-thread flag to indicate whether a task is in the old or in
> the new universe,
> * reset it in _slow_ paths of syscall's entry/exit,
> * add helpers around the flag to sched.h,
> * export the status in /proc/<pid>/kgr_in_progress,
> @@ -217,6 +226,7 @@ ENTRY(system_call)
> mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
> stg %r14,__PT_FLAGS(%r11)
> .Lsysc_do_svc:
> + HANDLE_KGRAFT %r12
> lg %r10,__TI_sysc_table(%r12) # address of system call table
> llgh %r8,__PT_INT_CODE+2(%r11)
> slag %r8,%r8,2 # shift and test for svc 0
This is not the slow path, .Lsysc_do_svc is on the main svc path. It is
"only" two instruction but nevertheless this should be avoided.
One way is to combine it with the _TIF_TRACE mechanics:
.Lsysc_nr_ok:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lgf %r9,0(%r8,%r10) # get system call add.
-> tm __TI_flags+6(%r12),_TIF_TRACE>>8
-> jnz .Lsysc_tracesys
basr %r14,%r9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
Add _TIF_KGR_IN_PROGRESS to _TIF_TRACE and branch to a new label,
e.g. to .Lsysc_trace. Distinguish between _TIF_KGR_IN_PROGRESS and
the other trace reasons and either call s390_handle_kgraft or
do_syscall_trace_enter / do_syscall_trace_exit.
The same for the exit work, add _TIF_KGR_IN_PROGRESS to _TIF_WORK
and sort out the reason in .Lsysc_work. That avoids another two
instructions on the main system call path.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
Well, I can't really comment this change because I didn't see other
changes, and I do not know what klp_kgraft_task_in_progress() means...
On 05/04, Jiri Slaby wrote:
>
> Luckily we can force the task to do that by sending it a fake signal,
But note that signal_wake_up(0) won't wake the stopped/traced tasks up.
> +static void klp_kgraft_send_fake_signal(void)
> +{
> + struct task_struct *p;
> + unsigned long flags;
> +
> + read_lock(&tasklist_lock);
> + for_each_process(p) {
Only the group leader can be klp_kgraft_task_in_progress?
Looks like you need for_each_process_thread()...
> + /*
> + * send fake signal to all non-kthread processes which are still
> + * not migrated
> + */
> + if (!(p->flags & PF_KTHREAD) &&
So this can miss the execing kernel thread, I do not know if this is
correct or not. PF_KTHREAD is cleared in flush_old_exec().
> + klp_kgraft_task_in_progress(p) &&
> + lock_task_sighand(p, &flags)) {
No need for lock_task_sighand(). Just spin_lock_irq(p->sighand->siglock).
tasklist_lock + for_each_process guarantees that "p" has a valid ->sighand.
> + signal_wake_up(p, 0);
To remind, this won't wakeup a TASK_STOPPED/TRACED thread.
> void recalc_sigpending(void)
> {
> - if (!recalc_sigpending_tsk(current) && !freezing(current))
> + if (!recalc_sigpending_tsk(current) && !freezing(current) &&
> + !klp_kgraft_task_in_progress(current))
> clear_thread_flag(TIF_SIGPENDING);
It is not clear from this patch when TIF_SIGPENDING will be cleared.
I assume other changes add some hooks into do_notify_resume/get_signal
paths, otherwise a klp_kgraft_task_in_progress() will spin until
klp_kgraft_task_in_progress(current) becomes "false".
Oleg.
On Mon, May 04, 2015 at 02:20:09PM +0200, Jiri Slaby wrote:
> Hello,
>
> this is a patchset which teaches the Kernel Live Patching to be
> consistent when patching with the use of the kGraft approach [1].
> Besides helpers, the set adds support for consistency models,
> implements the kGraft consistency model, and finally allows for
> sending signals to complete the patching process quickly (but still
> safely).
>
> Currently we have only two consistency models:
> * none
> * kGraft
>
> None is the one which was present before the patchset and that one
> indeed guarantees no consistency at all. LEAVE_FUNCTION and
> SWITCH_FUNCTION as was described earlier [2]. kGraft is based on the
> well-known RCU principle and every process is converted to the patched
> world on its own, safely. kGraft is LEAVE_KERNEL and SWITCH_THREAD.
Why do we need multiple consistency models?
What are the advantages of the kGraft model over the kGraft/kpatch
hybrid model [1]?
[1] http://lkml.kernel.org/r/[email protected]
--
Josh
On Mon, 4 May 2015, Josh Poimboeuf wrote:
> Why do we need multiple consistency models?
Well, I am pretty sure we need always at least two:
- the "immediate" one, where the code redirection flip is switched
unconditionally and immediately (i.e. exactly what we currently have in
Linus' tree); semantically applicable to many patches, but not all of
them
- something that fills the "but not all of them" gap above.
Both of the solutions that have been presnted so far have some drawbacks
that need to be discussed further. To me, the "highlights" (in the
"drawbacks" space) are:
- any method that is stack-checking-based basically means that we have to
functionally 100% rely on stack unwinding correctness. We have never
done that before, and current stack unwinder is not ready for that
(Josh is working on improving that); plus it can cause the patching to
fail under certain circumstances
- the kGraft method is not (yet) able to patch kernel threads, and allows
for multiple instances of the patched functions to be running in
parallel (i.e. patch author needs to be aware of this constaint, and
write the code accordingly)
This is exactly why we are submitting the kGraft-on-klp patchset, so that
we have concurrent implementations (sharing the same goal) to compare, and
ultimately merge whatever the best possible outcome will be.
Thanks,
--
Jiri Kosina
SUSE Labs
On Tue, May 05, 2015 at 12:48:22AM +0200, Jiri Kosina wrote:
> On Mon, 4 May 2015, Josh Poimboeuf wrote:
> > Why do we need multiple consistency models?
>
> Well, I am pretty sure we need always at least two:
>
> - the "immediate" one, where the code redirection flip is switched
> unconditionally and immediately (i.e. exactly what we currently have in
> Linus' tree); semantically applicable to many patches, but not all of
> them
>
> - something that fills the "but not all of them" gap above.
What's the benefit of having the "immediate" model in addition to
the more comprehensive model?
> Both of the solutions that have been presnted so far have some drawbacks
> that need to be discussed further. To me, the "highlights" (in the
> "drawbacks" space) are:
>
> - any method that is stack-checking-based basically means that we have to
> functionally 100% rely on stack unwinding correctness. We have never
> done that before, and current stack unwinder is not ready for that
> (Josh is working on improving that);
I wouldn't call it a drawback. More like a deal breaker :-) But yeah,
I'm working on that.
> plus it can cause the patching to fail under certain circumstances
Assuming you're talking about the kGraft/kpatch hybrid RFC, it actually
doesn't fail. It falls back to asynchronous lazy migration for any
straggler tasks.
> - the kGraft method is not (yet) able to patch kernel threads, and allows
> for multiple instances of the patched functions to be running in
> parallel (i.e. patch author needs to be aware of this constaint, and
> write the code accordingly)
Not being able to patch kthreads sounds like a huge drawback, if not a
deal breaker. How does the patching state ever reach completion?
> This is exactly why we are submitting the kGraft-on-klp patchset, so that
> we have concurrent implementations (sharing the same goal) to compare, and
> ultimately merge whatever the best possible outcome will be.
Another big downside to kGraft, assuming you want the patching to
complete within a realistic period of time, is that you have to wake up
all the sleeping tasks and send them through their signal handling
paths. I would say it's orders of magnitude more disruptive and much
riskier compared to walking the stacks (again, assuming we can make
stack walking "safe").
--
Josh
On Mon, 4 May 2015, Josh Poimboeuf wrote:
> > - the "immediate" one, where the code redirection flip is switched
> > unconditionally and immediately (i.e. exactly what we currently have in
> > Linus' tree); semantically applicable to many patches, but not all of
> > them
> >
> > - something that fills the "but not all of them" gap above.
>
> What's the benefit of having the "immediate" model in addition to
> the more comprehensive model?
Fair enoungh, I agree that in case of the hybrid aproach you're proposing
the immediate model is not necessary.
> > - the kGraft method is not (yet) able to patch kernel threads, and allows
> > for multiple instances of the patched functions to be running in
> > parallel (i.e. patch author needs to be aware of this constaint, and
> > write the code accordingly)
>
> Not being able to patch kthreads sounds like a huge drawback, if not a
> deal breaker.
It depends on bringing some sanity to freezing / parking / signal handling
for kthreads, which is an independent work in progress in parallel.
> How does the patching state ever reach completion?
kthread context always calls the old code and it doesn't block the
finalization; that's basically a documented feature for now.
That surely is a limitation and something the patch author has to be aware
of, but I wouldn't really consider it a show stopper for now, for the
reason pointed out above; it'll eventually be made to work, it's not a
substantial issue.
> I would say it's orders of magnitude more disruptive and much riskier
> compared to walking the stacks (again, assuming we can make stack
> walking "safe").
Agreed ... under the condition that it can be made really 100% reliable
*and* we'd be reasonably sure that we will be able to realistically
achieve the same goal on other architectures as well. Have you even
started exploring that space, please?
Thanks,
--
Jiri Kosina
SUSE Labs
On 05/04/2015, 02:23 PM, Martin Schwidefsky wrote:
> On Mon, 4 May 2015 13:40:20 +0200
> Jiri Slaby <[email protected]> wrote:
>
>> This means:
>> * add a per-thread flag to indicate whether a task is in the old or in
>> the new universe,
>> * reset it in _slow_ paths of syscall's entry/exit,
>> * add helpers around the flag to sched.h,
>> * export the status in /proc/<pid>/kgr_in_progress,
>
>> @@ -217,6 +226,7 @@ ENTRY(system_call)
>> mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
>> stg %r14,__PT_FLAGS(%r11)
>> .Lsysc_do_svc:
>> + HANDLE_KGRAFT %r12
>> lg %r10,__TI_sysc_table(%r12) # address of system call table
>> llgh %r8,__PT_INT_CODE+2(%r11)
>> slag %r8,%r8,2 # shift and test for svc 0
>
> This is not the slow path, .Lsysc_do_svc is on the main svc path. It is
> "only" two instruction but nevertheless this should be avoided.
Hi,
the commit log says the reset is in the slow path, not the test. But OK,
we can optimize, see below.
> One way is to combine it with the _TIF_TRACE mechanics:
>
> .Lsysc_nr_ok:
> xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
> stg %r2,__PT_ORIG_GPR2(%r11)
> stg %r7,STACK_FRAME_OVERHEAD(%r15)
> lgf %r9,0(%r8,%r10) # get system call add.
> -> tm __TI_flags+6(%r12),_TIF_TRACE>>8
> -> jnz .Lsysc_tracesys
> basr %r14,%r9 # call sys_xxxx
> stg %r2,__PT_R2(%r11) # store return value
>
> Add _TIF_KGR_IN_PROGRESS to _TIF_TRACE and branch to a new label,
> e.g. to .Lsysc_trace. Distinguish between _TIF_KGR_IN_PROGRESS and
> the other trace reasons and either call s390_handle_kgraft or
> do_syscall_trace_enter / do_syscall_trace_exit.
>
> The same for the exit work, add _TIF_KGR_IN_PROGRESS to _TIF_WORK
> and sort out the reason in .Lsysc_work. That avoids another two
> instructions on the main system call path.
I considered this, but there was no space in the word.
_TIF_WORK is:
TIF_NOTIFY_RESUME 0
TIF_SIGPENDING 1
TIF_NEED_RESCHED 2
TIF_UPROBE 7
_TIF_TRACE is:
TIF_SYSCALL_TRACE 3
TIF_SYSCALL_AUDIT 4
TIF_SECCOMP 5
TIF_SYSCALL_TRACEPOINT 6
=====
What I could do is to split them and make this setup:
_TIF_WORK:
TIF_NOTIFY_RESUME 0
TIF_SIGPENDING 1
TIF_NEED_RESCHED 2
TIF_KGR_IN_PROGRESS_W 3
TIF_UPROBE 7
_TIF_TRACE:
TIF_SYSCALL_TRACE 24
TIF_SYSCALL_AUDIT 25
TIF_SECCOMP 26
TIF_SYSCALL_TRACEPOINT 27
TIF_KGR_IN_PROGRESS_T 28
=====
Then make TIF_KGR_IN_PROGRESS_W fire when "tm"-ing _TIF_WORK in
"__TI_flags+7". TIF_KGR_IN_PROGRESS_T will work along with _TIF_TRACE
using "tm" on "__TI_flags+4".
What do you think?
thanks,
--
js
suse labs
On Tue, 05 May 2015 15:27:19 +0200
Jiri Slaby <[email protected]> wrote:
> What I could do is to split them and make this setup:
>
> _TIF_WORK:
> TIF_NOTIFY_RESUME 0
> TIF_SIGPENDING 1
> TIF_NEED_RESCHED 2
> TIF_KGR_IN_PROGRESS_W 3
> TIF_UPROBE 7
>
> _TIF_TRACE:
> TIF_SYSCALL_TRACE 24
> TIF_SYSCALL_AUDIT 25
> TIF_SECCOMP 26
> TIF_SYSCALL_TRACEPOINT 27
> TIF_KGR_IN_PROGRESS_T 28
>
> =====
>
> Then make TIF_KGR_IN_PROGRESS_W fire when "tm"-ing _TIF_WORK in
> "__TI_flags+7". TIF_KGR_IN_PROGRESS_T will work along with _TIF_TRACE
> using "tm" on "__TI_flags+4".
>
> What do you think?
Yes, that is what I had in mind. Feel free to reorder the TIF bits
in any way necessary. I have done that several times already.
--
blue skies,
Martin.
"Reality continues to ruin my life." - Calvin.
On Tue, May 05, 2015 at 08:14:50AM +0200, Jiri Kosina wrote:
> On Mon, 4 May 2015, Josh Poimboeuf wrote:
> > > - the "immediate" one, where the code redirection flip is switched
> > > unconditionally and immediately (i.e. exactly what we currently have in
> > > Linus' tree); semantically applicable to many patches, but not all of
> > > them
> > >
> > > - something that fills the "but not all of them" gap above.
> >
> > What's the benefit of having the "immediate" model in addition to
> > the more comprehensive model?
>
> Fair enoungh, I agree that in case of the hybrid aproach you're proposing
> the immediate model is not necessary.
Just to make sure I understand, would the immediate model be needed in
order to cover some of the gaps caused by not being able to patch
kthreads?
> > > - the kGraft method is not (yet) able to patch kernel threads, and allows
> > > for multiple instances of the patched functions to be running in
> > > parallel (i.e. patch author needs to be aware of this constaint, and
> > > write the code accordingly)
> >
> > Not being able to patch kthreads sounds like a huge drawback, if not a
> > deal breaker.
>
> It depends on bringing some sanity to freezing / parking / signal handling
> for kthreads, which is an independent work in progress in parallel.
>
> > How does the patching state ever reach completion?
>
> kthread context always calls the old code and it doesn't block the
> finalization; that's basically a documented feature for now.
>
> That surely is a limitation and something the patch author has to be aware
> of, but I wouldn't really consider it a show stopper for now, for the
> reason pointed out above; it'll eventually be made to work, it's not a
> substantial issue.
Until the kthread issues are sorted out, I would call it a _very_
substantial issue. Not being able to patch kthreads is a huge
limitation. Also it would in many cases block the ability to properly
change data semantics, which is one of the big reasons for a consistency
model.
> > I would say it's orders of magnitude more disruptive and much riskier
> > compared to walking the stacks (again, assuming we can make stack
> > walking "safe").
>
> Agreed ... under the condition that it can be made really 100% reliable
> *and* we'd be reasonably sure that we will be able to realistically
> achieve the same goal on other architectures as well. Have you even
> started exploring that space, please?
Yes. As I postulated before [1], there are two obstacles to achieving
reliable frame pointer stack traces: 1) missing frame pointer logic and
2) exceptions. If either 1 or 2 was involved in the creation of any of
the frames on the stack, some frame pointers might be missing, and one
or more frames could be skipped by the stack walker.
The first obstacle can be overcome and enforced at compile time using
stackvalidate [1].
The second obstacle can be overcome at run time with a future RFC:
something like a save_stack_trace_tsk_validate() function which does
some validations while it walks the stack. It can return an error if it
detects an exception frame.
(It can also do some sanity checks like ensuring that it walks all the
way to the bottom of the stack and that each frame has a valid ktext
address. I also would propose a CONFIG_DEBUG_VALIDATE_STACK option
which tries to validate the stack on every call to schedule.)
Then we can have the hybrid consistency model rely on
save_stack_trace_tsk_validate(). If the stack is deemed unsafe, we can
fall back to retrying later, or to the kGraft mode of user mode barrier
patching.
Eventually I want to try to make *all* stacks reliable, even those with
exception frames. That would involve compile and run time validations
of DWARF data, and ensuring that DWARF and frame pointers are consistent
with each other. But those are general improvements which aren't
prerequisites for the hybrid model.
[1] http://lkml.kernel.org/r/[email protected]
--
Josh
Hi,
On Mon, 4 May 2015, Oleg Nesterov wrote:
> Well, I can't really comment this change because I didn't see other
> changes, and I do not know what klp_kgraft_task_in_progress() means...
>
> On 05/04, Jiri Slaby wrote:
> >
> > Luckily we can force the task to do that by sending it a fake signal,
>
> But note that signal_wake_up(0) won't wake the stopped/traced tasks up.
Yes, this could happen. Such process would prevent the patching to
finish, but that should not be an issue for patching as such. The
process's flag would be eventually cleared.
> > +static void klp_kgraft_send_fake_signal(void)
> > +{
> > + struct task_struct *p;
> > + unsigned long flags;
> > +
> > + read_lock(&tasklist_lock);
> > + for_each_process(p) {
>
> Only the group leader can be klp_kgraft_task_in_progress?
>
> Looks like you need for_each_process_thread()...
Thanks for spotting. This is consistent with other places in the code and
needs to be fixed.
> > + /*
> > + * send fake signal to all non-kthread processes which are still
> > + * not migrated
> > + */
> > + if (!(p->flags & PF_KTHREAD) &&
>
> So this can miss the execing kernel thread, I do not know if this is
> correct or not. PF_KTHREAD is cleared in flush_old_exec().
Correct, we do not deal with kthreads in this RFC yet. There is more work
to do it correctly. See changelogs and comments in other patches.
> > + klp_kgraft_task_in_progress(p) &&
> > + lock_task_sighand(p, &flags)) {
>
> No need for lock_task_sighand(). Just spin_lock_irq(p->sighand->siglock).
> tasklist_lock + for_each_process guarantees that "p" has a valid ->sighand.
Ah, thank you.
>
> > + signal_wake_up(p, 0);
>
> To remind, this won't wakeup a TASK_STOPPED/TRACED thread.
>
> > void recalc_sigpending(void)
> > {
> > - if (!recalc_sigpending_tsk(current) && !freezing(current))
> > + if (!recalc_sigpending_tsk(current) && !freezing(current) &&
> > + !klp_kgraft_task_in_progress(current))
> > clear_thread_flag(TIF_SIGPENDING);
>
> It is not clear from this patch when TIF_SIGPENDING will be cleared.
>
> I assume other changes add some hooks into do_notify_resume/get_signal
> paths, otherwise a klp_kgraft_task_in_progress() will spin until
> klp_kgraft_task_in_progress(current) becomes "false".
That is correct. The flag is cleared in do_notify_resume path and also in
syscall_trace_enter_phase1. See patch number 4 of this RFC.
Thanks a lot for the feedback
Miroslav
On Tue, 5 May 2015, Josh Poimboeuf wrote:
> > Agreed ... under the condition that it can be made really 100% reliable
> > *and* we'd be reasonably sure that we will be able to realistically
> > achieve the same goal on other architectures as well. Have you even
> > started exploring that space, please?
>
> Yes. As I postulated before [1], there are two obstacles to achieving
> reliable frame pointer stack traces: 1) missing frame pointer logic and
> 2) exceptions. If either 1 or 2 was involved in the creation of any of
> the frames on the stack, some frame pointers might be missing, and one
> or more frames could be skipped by the stack walker.
>
> The first obstacle can be overcome and enforced at compile time using
> stackvalidate [1].
>
> The second obstacle can be overcome at run time with a future RFC:
> something like a save_stack_trace_tsk_validate() function which does
> some validations while it walks the stack. It can return an error if it
> detects an exception frame.
>
> (It can also do some sanity checks like ensuring that it walks all the
> way to the bottom of the stack and that each frame has a valid ktext
> address. I also would propose a CONFIG_DEBUG_VALIDATE_STACK option
> which tries to validate the stack on every call to schedule.)
>
> Then we can have the hybrid consistency model rely on
> save_stack_trace_tsk_validate(). If the stack is deemed unsafe, we can
> fall back to retrying later, or to the kGraft mode of user mode barrier
> patching.
>
> Eventually I want to try to make *all* stacks reliable, even those with
> exception frames. That would involve compile and run time validations
> of DWARF data, and ensuring that DWARF and frame pointers are consistent
> with each other. But those are general improvements which aren't
> prerequisites for the hybrid model.
>
> [1] http://lkml.kernel.org/r/[email protected]
Yup, I understand what is the goal here (and don't get me wrong, I am of
course all for making frame pointer based stack traces reliable). The
question I had was -- your patchset is now very x86-centric. If we are
going to proceed with the hybrid patching model, we'd need to be able to
extend to other architectures as easily as possible.
I currently haven't yet tried to explore how difficult would it be to
extend your aproach to other archs. Have you?
Thanks,
--
Jiri Kosina
SUSE Labs
On Tue, May 12, 2015 at 11:45:15AM +0200, Jiri Kosina wrote:
> On Tue, 5 May 2015, Josh Poimboeuf wrote:
>
> > > Agreed ... under the condition that it can be made really 100% reliable
> > > *and* we'd be reasonably sure that we will be able to realistically
> > > achieve the same goal on other architectures as well. Have you even
> > > started exploring that space, please?
> >
> > Yes. As I postulated before [1], there are two obstacles to achieving
> > reliable frame pointer stack traces: 1) missing frame pointer logic and
> > 2) exceptions. If either 1 or 2 was involved in the creation of any of
> > the frames on the stack, some frame pointers might be missing, and one
> > or more frames could be skipped by the stack walker.
> >
> > The first obstacle can be overcome and enforced at compile time using
> > stackvalidate [1].
> >
> > The second obstacle can be overcome at run time with a future RFC:
> > something like a save_stack_trace_tsk_validate() function which does
> > some validations while it walks the stack. It can return an error if it
> > detects an exception frame.
> >
> > (It can also do some sanity checks like ensuring that it walks all the
> > way to the bottom of the stack and that each frame has a valid ktext
> > address. I also would propose a CONFIG_DEBUG_VALIDATE_STACK option
> > which tries to validate the stack on every call to schedule.)
> >
> > Then we can have the hybrid consistency model rely on
> > save_stack_trace_tsk_validate(). If the stack is deemed unsafe, we can
> > fall back to retrying later, or to the kGraft mode of user mode barrier
> > patching.
> >
> > Eventually I want to try to make *all* stacks reliable, even those with
> > exception frames. That would involve compile and run time validations
> > of DWARF data, and ensuring that DWARF and frame pointers are consistent
> > with each other. But those are general improvements which aren't
> > prerequisites for the hybrid model.
> >
> > [1] http://lkml.kernel.org/r/[email protected]
>
> Yup, I understand what is the goal here (and don't get me wrong, I am of
> course all for making frame pointer based stack traces reliable). The
> question I had was -- your patchset is now very x86-centric. If we are
> going to proceed with the hybrid patching model, we'd need to be able to
> extend to other architectures as easily as possible.
>
> I currently haven't yet tried to explore how difficult would it be to
> extend your aproach to other archs. Have you?
Sorry, I missed that part of the question. Right now stackvalidate only
supports x86_64, but the framework is very generic. Support for other
architectures can be easily plugged in.
The same approach can be used for most architectures, including powerpc
and s390 (which both have back chain pointers which can be validated)
and arm64 (which requires CONFIG_FRAME_POINTER).
Those architectures which don't have frame/backchain pointers will still
have some live patching options:
- make DWARF stack data reliable
- only support the fallback mode of the hybrid model (syscall barrier
switching)
- only support the "immediate" consistency model (using the same code as
today)
--
Josh