The current kpropes insn caches allocate memory areas for insn slots with
module_alloc(). The assumption is that the kernel image and module area
are both within the same +/- 2GB memory area.
This however is not true for s390 where the kernel image resides within
the first 2GB (DMA memory area), but the module area is far away in the
vmalloc area, usually somewhere close below the 4TB area.
For new pc relative instructions s390 needs insn slots that are within +/-
2GB of each area. That way we can patch displacements of pc-relative
instructions within the insn slots just like x86 and powerpc.
The module area works already with the normal insn slot allocator, however
there is currently no way to get insn slots that are within the first 2GB
on s390 (aka DMA area).
Therefore this patch set modifies the kprobes insn slot cache code in order
to allow to specify a custom allocator for the insn slot cache pages.
In addition architecure can now have private insn slot caches withhout the
need to modify common code.
Patch 1 unifies and simplifies the current insn and optinsn caches
implementation. This is a preparation which allows to add more
insn caches in a simple way.
Patch 2 adds the possibility to specify a custom allocator.
Patch 3 makes s390 use the new insn slot mechanisms and adds support for
pc-relative instructions with long displacements.
v1->v2: add possibility to specifiy a custom allocator and move the new
dmainsn cache into s390 code and leave common code alone.
Heiko Carstens (3):
kprobes: unify insn caches
kprobes: allow to specify custum allocator for insn caches
s390/kprobes: add support for pc-relative long displacement instructions
arch/s390/include/asm/kprobes.h | 4 +-
arch/s390/kernel/kprobes.c | 144 +++++++++++++++++++++++++++++++++++++--
include/linux/kprobes.h | 34 +++++++--
kernel/kprobes.c | 95 +++++++++++---------------
4 files changed, 209 insertions(+), 68 deletions(-)
--
1.7.10.4
The two insn caches (insn, and optinsn) each have an own mutex and
alloc/free functions (get_[opt]insn_slot() / free_[opt]insn_slot()).
Since there is the need for yet another insn cache which satifies
dma allocations on s390, unify and simplify the current implementation:
- Move the per insn cache mutex into struct kprobe_insn_cache.
- Move the alloc/free functions to kprobe.h so they are simply
wrappers for the generic __get_insn_slot/__free_insn_slot functions.
The implementation is done with a DEFINE_INSN_CACHE_OPS() macro
which provides the alloc/free functions for each cache if needed.
- move the struct kprobe_insn_cache to kprobe.h which allows to generate
architecture specific insn slot caches outside of the core kprobes
code.
Signed-off-by: Heiko Carstens <[email protected]>
---
include/linux/kprobes.h | 32 +++++++++++++++++---
kernel/kprobes.c | 75 +++++++++++++----------------------------------
2 files changed, 49 insertions(+), 58 deletions(-)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index ca1d27a..077f653 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -264,10 +264,34 @@ extern void arch_arm_kprobe(struct kprobe *p);
extern void arch_disarm_kprobe(struct kprobe *p);
extern int arch_init_kprobes(void);
extern void show_registers(struct pt_regs *regs);
-extern kprobe_opcode_t *get_insn_slot(void);
-extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
extern void kprobes_inc_nmissed_count(struct kprobe *p);
+struct kprobe_insn_cache {
+ struct mutex mutex;
+ struct list_head pages; /* list of kprobe_insn_page */
+ size_t insn_size; /* size of instruction slot */
+ int nr_garbage;
+};
+
+extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
+extern void __free_insn_slot(struct kprobe_insn_cache *c,
+ kprobe_opcode_t *slot, int dirty);
+
+#define DEFINE_INSN_CACHE_OPS(__name) \
+extern struct kprobe_insn_cache kprobe_##__name##_slots; \
+ \
+static inline kprobe_opcode_t *get_##__name##_slot(void) \
+{ \
+ return __get_insn_slot(&kprobe_##__name##_slots); \
+} \
+ \
+static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
+{ \
+ __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \
+} \
+
+DEFINE_INSN_CACHE_OPS(insn);
+
#ifdef CONFIG_OPTPROBES
/*
* Internal structure for direct jump optimized probe
@@ -287,13 +311,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist);
extern void arch_unoptimize_kprobes(struct list_head *oplist,
struct list_head *done_list);
extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
-extern kprobe_opcode_t *get_optinsn_slot(void);
-extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
unsigned long addr);
extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
+DEFINE_INSN_CACHE_OPS(optinsn);
+
#ifdef CONFIG_SYSCTL
extern int sysctl_kprobes_optimization;
extern int proc_kprobes_optimization_handler(struct ctl_table *table,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6e33498..9e4912d 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -121,12 +121,6 @@ struct kprobe_insn_page {
(offsetof(struct kprobe_insn_page, slot_used) + \
(sizeof(char) * (slots)))
-struct kprobe_insn_cache {
- struct list_head pages; /* list of kprobe_insn_page */
- size_t insn_size; /* size of instruction slot */
- int nr_garbage;
-};
-
static int slots_per_page(struct kprobe_insn_cache *c)
{
return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
@@ -138,8 +132,8 @@ enum kprobe_slot_state {
SLOT_USED = 2,
};
-static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */
-static struct kprobe_insn_cache kprobe_insn_slots = {
+struct kprobe_insn_cache kprobe_insn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
.nr_garbage = 0,
@@ -150,10 +144,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
-static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
{
struct kprobe_insn_page *kip;
+ kprobe_opcode_t *slot = NULL;
+ mutex_lock(&c->mutex);
retry:
list_for_each_entry(kip, &c->pages, list) {
if (kip->nused < slots_per_page(c)) {
@@ -162,7 +158,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
if (kip->slot_used[i] == SLOT_CLEAN) {
kip->slot_used[i] = SLOT_USED;
kip->nused++;
- return kip->insns + (i * c->insn_size);
+ slot = kip->insns + (i * c->insn_size);
+ goto out;
}
}
/* kip->nused is broken. Fix it. */
@@ -178,7 +175,7 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
/* All out of space. Need to allocate a new page. */
kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
if (!kip)
- return NULL;
+ goto out;
/*
* Use module_alloc so this page is within +/- 2GB of where the
@@ -188,7 +185,7 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
kip->insns = module_alloc(PAGE_SIZE);
if (!kip->insns) {
kfree(kip);
- return NULL;
+ goto out;
}
INIT_LIST_HEAD(&kip->list);
memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
@@ -196,19 +193,10 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
kip->nused = 1;
kip->ngarbage = 0;
list_add(&kip->list, &c->pages);
- return kip->insns;
-}
-
-
-kprobe_opcode_t __kprobes *get_insn_slot(void)
-{
- kprobe_opcode_t *ret = NULL;
-
- mutex_lock(&kprobe_insn_mutex);
- ret = __get_insn_slot(&kprobe_insn_slots);
- mutex_unlock(&kprobe_insn_mutex);
-
- return ret;
+ slot = kip->insns;
+out:
+ mutex_unlock(&c->mutex);
+ return slot;
}
/* Return 1 if all garbages are collected, otherwise 0. */
@@ -255,11 +243,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
return 0;
}
-static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
- kprobe_opcode_t *slot, int dirty)
+void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
+ kprobe_opcode_t *slot, int dirty)
{
struct kprobe_insn_page *kip;
+ mutex_lock(&c->mutex);
list_for_each_entry(kip, &c->pages, list) {
long idx = ((long)slot - (long)kip->insns) /
(c->insn_size * sizeof(kprobe_opcode_t));
@@ -272,45 +261,23 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
collect_garbage_slots(c);
} else
collect_one_slot(kip, idx);
- return;
+ goto out;
}
}
/* Could not free this slot. */
WARN_ON(1);
+out:
+ mutex_unlock(&c->mutex);
}
-void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
-{
- mutex_lock(&kprobe_insn_mutex);
- __free_insn_slot(&kprobe_insn_slots, slot, dirty);
- mutex_unlock(&kprobe_insn_mutex);
-}
#ifdef CONFIG_OPTPROBES
/* For optimized_kprobe buffer */
-static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
-static struct kprobe_insn_cache kprobe_optinsn_slots = {
+struct kprobe_insn_cache kprobe_optinsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
.nr_garbage = 0,
};
-/* Get a slot for optimized_kprobe buffer */
-kprobe_opcode_t __kprobes *get_optinsn_slot(void)
-{
- kprobe_opcode_t *ret = NULL;
-
- mutex_lock(&kprobe_optinsn_mutex);
- ret = __get_insn_slot(&kprobe_optinsn_slots);
- mutex_unlock(&kprobe_optinsn_mutex);
-
- return ret;
-}
-
-void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
-{
- mutex_lock(&kprobe_optinsn_mutex);
- __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
- mutex_unlock(&kprobe_optinsn_mutex);
-}
#endif
#endif
--
1.7.10.4
With the general-instruction extension facility (z10) a couple of
instructions with a pc-relative long displacement were introduced.
The kprobes support for these instructions however was never implemented.
In result, if anybody ever put a probe on any of these instructions the
result would have been random behaviour after the instruction got executed
within the insn slot.
So lets add the missing handling for these instructions. Since all of the
new instructions have 32 bit signed displacement the easiest solution is
to allocate an insn slot that is within the same 2GB area like the original
instruction and patch the displacement field.
Signed-off-by: Heiko Carstens <[email protected]>
---
arch/s390/include/asm/kprobes.h | 4 +-
arch/s390/kernel/kprobes.c | 144 +++++++++++++++++++++++++++++++++++++--
2 files changed, 140 insertions(+), 8 deletions(-)
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index dcf6948..4176dfe 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -31,6 +31,8 @@
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
struct pt_regs;
struct kprobe;
@@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t;
/* Architecture specific copy of original instruction */
struct arch_specific_insn {
/* copy of original instruction */
- kprobe_opcode_t insn[MAX_INSN_SIZE];
+ kprobe_opcode_t *insn;
};
struct prev_kprobe {
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 3388b2b..cb7ac9e 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+ return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+ free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+ .alloc = alloc_dmainsn_page,
+ .free = free_dmainsn_page,
+ .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+ .insn_size = MAX_INSN_SIZE,
+};
+
static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
{
switch (insn[0] >> 8) {
@@ -100,9 +120,8 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xc0:
- if ((insn[0] & 0x0f) == 0x00 || /* larl */
- (insn[0] & 0x0f) == 0x05) /* brasl */
- fixup |= FIXUP_RETURN_REGISTER;
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
break;
case 0xeb:
if ((insn[2] & 0xff) == 0x44 || /* bxhg */
@@ -117,18 +136,128 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
return fixup;
}
+static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
+{
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
+
+static void __kprobes copy_instruction(struct kprobe *p)
+{
+ s64 disp, new_disp;
+ u64 addr, new_addr;
+
+ memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
+ if (!is_insn_relative_long(p->ainsn.insn))
+ return;
+ /*
+ * For pc-relative instructions in RIL-b or RIL-c format patch the
+ * RI2 displacement field. We have already made sure that the insn
+ * slot for the patched instruction is within the same 2GB area
+ * as the original instruction (either kernel image or module area).
+ * Therefore the new displacement will always fit.
+ */
+ disp = *(s32 *)&p->ainsn.insn[1];
+ addr = (u64)(unsigned long)p->addr;
+ new_addr = (u64)(unsigned long)p->ainsn.insn;
+ new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+ *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+
+static inline int is_kernel_addr(void *addr)
+{
+ return addr < (void *)_end;
+}
+
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+ BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+ if (addr < (void *)MODULES_VADDR)
+ return 0;
+ if (addr > (void *)MODULES_END)
+ return 0;
+#endif
+ return 1;
+}
+
+static int __kprobes s390_get_insn_slot(struct kprobe *p)
+{
+ /*
+ * Get an insn slot that is within the same 2GB area like the original
+ * instruction. That way instructions with a 32bit signed displacement
+ * field can be patched and executed within the insn slot.
+ */
+ p->ainsn.insn = NULL;
+ if (is_kernel_addr(p->addr))
+ p->ainsn.insn = get_dmainsn_slot();
+ if (is_module_addr(p->addr))
+ p->ainsn.insn = get_insn_slot();
+ return p->ainsn.insn ? 0 : -ENOMEM;
+}
+
+static void __kprobes s390_free_insn_slot(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+ if (is_kernel_addr(p->addr))
+ free_dmainsn_slot(p->ainsn.insn, 0);
+ else
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+
int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
if ((unsigned long) p->addr & 0x01)
return -EINVAL;
-
/* Make sure the probe isn't going on a difficult instruction */
if (is_prohibited_opcode(p->addr))
return -EINVAL;
-
+ if (s390_get_insn_slot(p))
+ return -ENOMEM;
p->opcode = *p->addr;
- memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
-
+ copy_instruction(p);
return 0;
}
@@ -169,6 +298,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
+ s390_free_insn_slot(p);
}
static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
--
1.7.10.4
The current two insn slot caches both use module_alloc/module_free
to allocate and free insn slot cache pages.
For s390 this is not sufficient since there is the need to allocate
insn slots that are either within the vmalloc module area or within
dma memory.
Therefore add a mechanism which allows to specify an own allocator
for an own insn slot cache.
Signed-off-by: Heiko Carstens <[email protected]>
---
include/linux/kprobes.h | 2 ++
kernel/kprobes.c | 20 ++++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 077f653..925eaf2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -268,6 +268,8 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
struct kprobe_insn_cache {
struct mutex mutex;
+ void *(*alloc)(void); /* allocate insn page */
+ void (*free)(void *); /* free insn page */
struct list_head pages; /* list of kprobe_insn_page */
size_t insn_size; /* size of instruction slot */
int nr_garbage;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9e4912d..a0d367a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
struct kprobe_insn_page {
struct list_head list;
kprobe_opcode_t *insns; /* Page of instruction slots */
+ struct kprobe_insn_cache *cache;
int nused;
int ngarbage;
char slot_used[];
@@ -132,8 +133,20 @@ enum kprobe_slot_state {
SLOT_USED = 2,
};
+static void *alloc_insn_page(void)
+{
+ return module_alloc(PAGE_SIZE);
+}
+
+static void free_insn_page(void *page)
+{
+ module_free(NULL, page);
+}
+
struct kprobe_insn_cache kprobe_insn_slots = {
.mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
+ .alloc = alloc_insn_page,
+ .free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
.nr_garbage = 0,
@@ -182,7 +195,7 @@ kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
* kernel image and loaded module images reside. This is required
* so x86_64 can correctly handle the %rip-relative fixups.
*/
- kip->insns = module_alloc(PAGE_SIZE);
+ kip->insns = c->alloc();
if (!kip->insns) {
kfree(kip);
goto out;
@@ -192,6 +205,7 @@ kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
kip->slot_used[0] = SLOT_USED;
kip->nused = 1;
kip->ngarbage = 0;
+ kip->cache = c;
list_add(&kip->list, &c->pages);
slot = kip->insns;
out:
@@ -213,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
*/
if (!list_is_singular(&kip->list)) {
list_del(&kip->list);
- module_free(NULL, kip->insns);
+ kip->cache->free(kip->insns);
kfree(kip);
}
return 1;
@@ -274,6 +288,8 @@ out:
/* For optimized_kprobe buffer */
struct kprobe_insn_cache kprobe_optinsn_slots = {
.mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
+ .alloc = alloc_insn_page,
+ .free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
.nr_garbage = 0,
--
1.7.10.4
(2013/08/23 20:04), Heiko Carstens wrote:
> The two insn caches (insn, and optinsn) each have an own mutex and
> alloc/free functions (get_[opt]insn_slot() / free_[opt]insn_slot()).
>
> Since there is the need for yet another insn cache which satifies
> dma allocations on s390, unify and simplify the current implementation:
>
> - Move the per insn cache mutex into struct kprobe_insn_cache.
> - Move the alloc/free functions to kprobe.h so they are simply
> wrappers for the generic __get_insn_slot/__free_insn_slot functions.
> The implementation is done with a DEFINE_INSN_CACHE_OPS() macro
> which provides the alloc/free functions for each cache if needed.
> - move the struct kprobe_insn_cache to kprobe.h which allows to generate
> architecture specific insn slot caches outside of the core kprobes
> code.
>
Looks Good for me :)
Acked-by: Masami Hiramatsu <[email protected]>
Thank you!
> Signed-off-by: Heiko Carstens <[email protected]>
> ---
> include/linux/kprobes.h | 32 +++++++++++++++++---
> kernel/kprobes.c | 75 +++++++++++++----------------------------------
> 2 files changed, 49 insertions(+), 58 deletions(-)
>
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index ca1d27a..077f653 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -264,10 +264,34 @@ extern void arch_arm_kprobe(struct kprobe *p);
> extern void arch_disarm_kprobe(struct kprobe *p);
> extern int arch_init_kprobes(void);
> extern void show_registers(struct pt_regs *regs);
> -extern kprobe_opcode_t *get_insn_slot(void);
> -extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
> extern void kprobes_inc_nmissed_count(struct kprobe *p);
>
> +struct kprobe_insn_cache {
> + struct mutex mutex;
> + struct list_head pages; /* list of kprobe_insn_page */
> + size_t insn_size; /* size of instruction slot */
> + int nr_garbage;
> +};
> +
> +extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
> +extern void __free_insn_slot(struct kprobe_insn_cache *c,
> + kprobe_opcode_t *slot, int dirty);
> +
> +#define DEFINE_INSN_CACHE_OPS(__name) \
> +extern struct kprobe_insn_cache kprobe_##__name##_slots; \
> + \
> +static inline kprobe_opcode_t *get_##__name##_slot(void) \
> +{ \
> + return __get_insn_slot(&kprobe_##__name##_slots); \
> +} \
> + \
> +static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
> +{ \
> + __free_insn_slot(&kprobe_##__name##_slots, slot, dirty); \
> +} \
> +
> +DEFINE_INSN_CACHE_OPS(insn);
> +
> #ifdef CONFIG_OPTPROBES
> /*
> * Internal structure for direct jump optimized probe
> @@ -287,13 +311,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist);
> extern void arch_unoptimize_kprobes(struct list_head *oplist,
> struct list_head *done_list);
> extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
> -extern kprobe_opcode_t *get_optinsn_slot(void);
> -extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
> extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> unsigned long addr);
>
> extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
>
> +DEFINE_INSN_CACHE_OPS(optinsn);
> +
> #ifdef CONFIG_SYSCTL
> extern int sysctl_kprobes_optimization;
> extern int proc_kprobes_optimization_handler(struct ctl_table *table,
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 6e33498..9e4912d 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -121,12 +121,6 @@ struct kprobe_insn_page {
> (offsetof(struct kprobe_insn_page, slot_used) + \
> (sizeof(char) * (slots)))
>
> -struct kprobe_insn_cache {
> - struct list_head pages; /* list of kprobe_insn_page */
> - size_t insn_size; /* size of instruction slot */
> - int nr_garbage;
> -};
> -
> static int slots_per_page(struct kprobe_insn_cache *c)
> {
> return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
> @@ -138,8 +132,8 @@ enum kprobe_slot_state {
> SLOT_USED = 2,
> };
>
> -static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_slots */
> -static struct kprobe_insn_cache kprobe_insn_slots = {
> +struct kprobe_insn_cache kprobe_insn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
> .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
> .insn_size = MAX_INSN_SIZE,
> .nr_garbage = 0,
> @@ -150,10 +144,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
> * __get_insn_slot() - Find a slot on an executable page for an instruction.
> * We allocate an executable page if there's no room on existing ones.
> */
> -static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> +kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> {
> struct kprobe_insn_page *kip;
> + kprobe_opcode_t *slot = NULL;
>
> + mutex_lock(&c->mutex);
> retry:
> list_for_each_entry(kip, &c->pages, list) {
> if (kip->nused < slots_per_page(c)) {
> @@ -162,7 +158,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> if (kip->slot_used[i] == SLOT_CLEAN) {
> kip->slot_used[i] = SLOT_USED;
> kip->nused++;
> - return kip->insns + (i * c->insn_size);
> + slot = kip->insns + (i * c->insn_size);
> + goto out;
> }
> }
> /* kip->nused is broken. Fix it. */
> @@ -178,7 +175,7 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> /* All out of space. Need to allocate a new page. */
> kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
> if (!kip)
> - return NULL;
> + goto out;
>
> /*
> * Use module_alloc so this page is within +/- 2GB of where the
> @@ -188,7 +185,7 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> kip->insns = module_alloc(PAGE_SIZE);
> if (!kip->insns) {
> kfree(kip);
> - return NULL;
> + goto out;
> }
> INIT_LIST_HEAD(&kip->list);
> memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
> @@ -196,19 +193,10 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> kip->nused = 1;
> kip->ngarbage = 0;
> list_add(&kip->list, &c->pages);
> - return kip->insns;
> -}
> -
> -
> -kprobe_opcode_t __kprobes *get_insn_slot(void)
> -{
> - kprobe_opcode_t *ret = NULL;
> -
> - mutex_lock(&kprobe_insn_mutex);
> - ret = __get_insn_slot(&kprobe_insn_slots);
> - mutex_unlock(&kprobe_insn_mutex);
> -
> - return ret;
> + slot = kip->insns;
> +out:
> + mutex_unlock(&c->mutex);
> + return slot;
> }
>
> /* Return 1 if all garbages are collected, otherwise 0. */
> @@ -255,11 +243,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
> return 0;
> }
>
> -static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
> - kprobe_opcode_t *slot, int dirty)
> +void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
> + kprobe_opcode_t *slot, int dirty)
> {
> struct kprobe_insn_page *kip;
>
> + mutex_lock(&c->mutex);
> list_for_each_entry(kip, &c->pages, list) {
> long idx = ((long)slot - (long)kip->insns) /
> (c->insn_size * sizeof(kprobe_opcode_t));
> @@ -272,45 +261,23 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
> collect_garbage_slots(c);
> } else
> collect_one_slot(kip, idx);
> - return;
> + goto out;
> }
> }
> /* Could not free this slot. */
> WARN_ON(1);
> +out:
> + mutex_unlock(&c->mutex);
> }
>
> -void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
> -{
> - mutex_lock(&kprobe_insn_mutex);
> - __free_insn_slot(&kprobe_insn_slots, slot, dirty);
> - mutex_unlock(&kprobe_insn_mutex);
> -}
> #ifdef CONFIG_OPTPROBES
> /* For optimized_kprobe buffer */
> -static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
> -static struct kprobe_insn_cache kprobe_optinsn_slots = {
> +struct kprobe_insn_cache kprobe_optinsn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
> .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
> /* .insn_size is initialized later */
> .nr_garbage = 0,
> };
> -/* Get a slot for optimized_kprobe buffer */
> -kprobe_opcode_t __kprobes *get_optinsn_slot(void)
> -{
> - kprobe_opcode_t *ret = NULL;
> -
> - mutex_lock(&kprobe_optinsn_mutex);
> - ret = __get_insn_slot(&kprobe_optinsn_slots);
> - mutex_unlock(&kprobe_optinsn_mutex);
> -
> - return ret;
> -}
> -
> -void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
> -{
> - mutex_lock(&kprobe_optinsn_mutex);
> - __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
> - mutex_unlock(&kprobe_optinsn_mutex);
> -}
> #endif
> #endif
>
>
--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]
(2013/08/23 20:04), Heiko Carstens wrote:
> The current two insn slot caches both use module_alloc/module_free
> to allocate and free insn slot cache pages.
> For s390 this is not sufficient since there is the need to allocate
> insn slots that are either within the vmalloc module area or within
> dma memory.
> Therefore add a mechanism which allows to specify an own allocator
> for an own insn slot cache.
>
Acked-by: Masami Hiramatsu <[email protected]>
Thank you!
> Signed-off-by: Heiko Carstens <[email protected]>
> ---
> include/linux/kprobes.h | 2 ++
> kernel/kprobes.c | 20 ++++++++++++++++++--
> 2 files changed, 20 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 077f653..925eaf2 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -268,6 +268,8 @@ extern void kprobes_inc_nmissed_count(struct kprobe *p);
>
> struct kprobe_insn_cache {
> struct mutex mutex;
> + void *(*alloc)(void); /* allocate insn page */
> + void (*free)(void *); /* free insn page */
> struct list_head pages; /* list of kprobe_insn_page */
> size_t insn_size; /* size of instruction slot */
> int nr_garbage;
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 9e4912d..a0d367a 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
> struct kprobe_insn_page {
> struct list_head list;
> kprobe_opcode_t *insns; /* Page of instruction slots */
> + struct kprobe_insn_cache *cache;
> int nused;
> int ngarbage;
> char slot_used[];
> @@ -132,8 +133,20 @@ enum kprobe_slot_state {
> SLOT_USED = 2,
> };
>
> +static void *alloc_insn_page(void)
> +{
> + return module_alloc(PAGE_SIZE);
> +}
> +
> +static void free_insn_page(void *page)
> +{
> + module_free(NULL, page);
> +}
> +
> struct kprobe_insn_cache kprobe_insn_slots = {
> .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
> + .alloc = alloc_insn_page,
> + .free = free_insn_page,
> .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
> .insn_size = MAX_INSN_SIZE,
> .nr_garbage = 0,
> @@ -182,7 +195,7 @@ kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> * kernel image and loaded module images reside. This is required
> * so x86_64 can correctly handle the %rip-relative fixups.
> */
> - kip->insns = module_alloc(PAGE_SIZE);
> + kip->insns = c->alloc();
> if (!kip->insns) {
> kfree(kip);
> goto out;
> @@ -192,6 +205,7 @@ kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
> kip->slot_used[0] = SLOT_USED;
> kip->nused = 1;
> kip->ngarbage = 0;
> + kip->cache = c;
> list_add(&kip->list, &c->pages);
> slot = kip->insns;
> out:
> @@ -213,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
> */
> if (!list_is_singular(&kip->list)) {
> list_del(&kip->list);
> - module_free(NULL, kip->insns);
> + kip->cache->free(kip->insns);
> kfree(kip);
> }
> return 1;
> @@ -274,6 +288,8 @@ out:
> /* For optimized_kprobe buffer */
> struct kprobe_insn_cache kprobe_optinsn_slots = {
> .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
> + .alloc = alloc_insn_page,
> + .free = free_insn_page,
> .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
> /* .insn_size is initialized later */
> .nr_garbage = 0,
>
--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]
(2013/08/23 20:04), Heiko Carstens wrote:
> With the general-instruction extension facility (z10) a couple of
> instructions with a pc-relative long displacement were introduced.
> The kprobes support for these instructions however was never implemented.
>
> In result, if anybody ever put a probe on any of these instructions the
> result would have been random behaviour after the instruction got executed
> within the insn slot.
>
> So lets add the missing handling for these instructions. Since all of the
> new instructions have 32 bit signed displacement the easiest solution is
> to allocate an insn slot that is within the same 2GB area like the original
> instruction and patch the displacement field.
>
At least generic kprobes flow looks good for me.
Reviewed-by: Masami Hiramatsu <[email protected]>
> Signed-off-by: Heiko Carstens <[email protected]>
> ---
> arch/s390/include/asm/kprobes.h | 4 +-
> arch/s390/kernel/kprobes.c | 144 +++++++++++++++++++++++++++++++++++++--
> 2 files changed, 140 insertions(+), 8 deletions(-)
>
> diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
> index dcf6948..4176dfe 100644
> --- a/arch/s390/include/asm/kprobes.h
> +++ b/arch/s390/include/asm/kprobes.h
> @@ -31,6 +31,8 @@
> #include <linux/ptrace.h>
> #include <linux/percpu.h>
>
> +#define __ARCH_WANT_KPROBES_INSN_SLOT
> +
> struct pt_regs;
> struct kprobe;
>
> @@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t;
> /* Architecture specific copy of original instruction */
> struct arch_specific_insn {
> /* copy of original instruction */
> - kprobe_opcode_t insn[MAX_INSN_SIZE];
> + kprobe_opcode_t *insn;
> };
>
> struct prev_kprobe {
> diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
> index 3388b2b..cb7ac9e 100644
> --- a/arch/s390/kernel/kprobes.c
> +++ b/arch/s390/kernel/kprobes.c
> @@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>
> struct kretprobe_blackpoint kretprobe_blacklist[] = { };
>
> +DEFINE_INSN_CACHE_OPS(dmainsn);
> +
> +static void *alloc_dmainsn_page(void)
> +{
> + return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
> +}
> +
> +static void free_dmainsn_page(void *page)
> +{
> + free_page((unsigned long)page);
> +}
> +
> +struct kprobe_insn_cache kprobe_dmainsn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
> + .alloc = alloc_dmainsn_page,
> + .free = free_dmainsn_page,
> + .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
> + .insn_size = MAX_INSN_SIZE,
> +};
> +
> static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
> {
> switch (insn[0] >> 8) {
> @@ -100,9 +120,8 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
> fixup |= FIXUP_RETURN_REGISTER;
> break;
> case 0xc0:
> - if ((insn[0] & 0x0f) == 0x00 || /* larl */
> - (insn[0] & 0x0f) == 0x05) /* brasl */
> - fixup |= FIXUP_RETURN_REGISTER;
> + if ((insn[0] & 0x0f) == 0x05) /* brasl */
> + fixup |= FIXUP_RETURN_REGISTER;
> break;
> case 0xeb:
> if ((insn[2] & 0xff) == 0x44 || /* bxhg */
> @@ -117,18 +136,128 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
> return fixup;
> }
>
> +static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
> +{
> + /* Check if we have a RIL-b or RIL-c format instruction which
> + * we need to modify in order to avoid instruction emulation. */
> + switch (insn[0] >> 8) {
> + case 0xc0:
> + if ((insn[0] & 0x0f) == 0x00) /* larl */
> + return true;
> + break;
> + case 0xc4:
> + switch (insn[0] & 0x0f) {
> + case 0x02: /* llhrl */
> + case 0x04: /* lghrl */
> + case 0x05: /* lhrl */
> + case 0x06: /* llghrl */
> + case 0x07: /* sthrl */
> + case 0x08: /* lgrl */
> + case 0x0b: /* stgrl */
> + case 0x0c: /* lgfrl */
> + case 0x0d: /* lrl */
> + case 0x0e: /* llgfrl */
> + case 0x0f: /* strl */
> + return true;
> + }
> + break;
> + case 0xc6:
> + switch (insn[0] & 0x0f) {
> + case 0x00: /* exrl */
> + case 0x02: /* pfdrl */
> + case 0x04: /* cghrl */
> + case 0x05: /* chrl */
> + case 0x06: /* clghrl */
> + case 0x07: /* clhrl */
> + case 0x08: /* cgrl */
> + case 0x0a: /* clgrl */
> + case 0x0c: /* cgfrl */
> + case 0x0d: /* crl */
> + case 0x0e: /* clgfrl */
> + case 0x0f: /* clrl */
> + return true;
> + }
> + break;
> + }
> + return false;
> +}
> +
> +static void __kprobes copy_instruction(struct kprobe *p)
> +{
> + s64 disp, new_disp;
> + u64 addr, new_addr;
> +
> + memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
> + if (!is_insn_relative_long(p->ainsn.insn))
> + return;
> + /*
> + * For pc-relative instructions in RIL-b or RIL-c format patch the
> + * RI2 displacement field. We have already made sure that the insn
> + * slot for the patched instruction is within the same 2GB area
> + * as the original instruction (either kernel image or module area).
> + * Therefore the new displacement will always fit.
> + */
> + disp = *(s32 *)&p->ainsn.insn[1];
> + addr = (u64)(unsigned long)p->addr;
> + new_addr = (u64)(unsigned long)p->ainsn.insn;
> + new_disp = ((addr + (disp * 2)) - new_addr) / 2;
> + *(s32 *)&p->ainsn.insn[1] = new_disp;
> +}
> +
> +static inline int is_kernel_addr(void *addr)
> +{
> + return addr < (void *)_end;
> +}
> +
> +static inline int is_module_addr(void *addr)
> +{
> +#ifdef CONFIG_64BIT
> + BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
> + if (addr < (void *)MODULES_VADDR)
> + return 0;
> + if (addr > (void *)MODULES_END)
> + return 0;
> +#endif
> + return 1;
> +}
> +
> +static int __kprobes s390_get_insn_slot(struct kprobe *p)
> +{
> + /*
> + * Get an insn slot that is within the same 2GB area like the original
> + * instruction. That way instructions with a 32bit signed displacement
> + * field can be patched and executed within the insn slot.
> + */
> + p->ainsn.insn = NULL;
> + if (is_kernel_addr(p->addr))
> + p->ainsn.insn = get_dmainsn_slot();
> + if (is_module_addr(p->addr))
> + p->ainsn.insn = get_insn_slot();
> + return p->ainsn.insn ? 0 : -ENOMEM;
> +}
> +
> +static void __kprobes s390_free_insn_slot(struct kprobe *p)
> +{
> + if (!p->ainsn.insn)
> + return;
> + if (is_kernel_addr(p->addr))
> + free_dmainsn_slot(p->ainsn.insn, 0);
> + else
> + free_insn_slot(p->ainsn.insn, 0);
> + p->ainsn.insn = NULL;
> +}
> +
> int __kprobes arch_prepare_kprobe(struct kprobe *p)
> {
> if ((unsigned long) p->addr & 0x01)
> return -EINVAL;
> -
> /* Make sure the probe isn't going on a difficult instruction */
> if (is_prohibited_opcode(p->addr))
> return -EINVAL;
> -
> + if (s390_get_insn_slot(p))
> + return -ENOMEM;
> p->opcode = *p->addr;
> - memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
> -
> + copy_instruction(p);
> return 0;
> }
>
> @@ -169,6 +298,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
>
> void __kprobes arch_remove_kprobe(struct kprobe *p)
> {
> + s390_free_insn_slot(p);
> }
>
> static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
>
--
Masami HIRAMATSU
IT Management Research Dept. Linux Technology Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: [email protected]