From: Peter Zijlstra <[email protected]>
Because of late module patching, a livepatch module needs to be able to
apply some of its relocations well after it has been loaded. Instead of
playing games with module_{dis,en}able_ro(), use existing text poking
mechanisms to apply relocations after module loading.
So far only x86, s390 and Power have HAVE_LIVEPATCH but only the first
two also have STRICT_MODULE_RWX.
This will allow removal of the last module_disable_ro() usage in
livepatch. The ultimate goal is to completely disallow making
executable mappings writable.
Also, for the late patching case, use text_mutex, which is supposed to
be held for all runtime text patching operations.
[ jpoimboe: Split up patches. Use mod state to determine whether
memcpy() can be used. Add text_mutex. Make it build. ]
Cc: [email protected]
Cc: [email protected]
Suggested-by: Josh Poimboeuf <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Signed-off-by: Josh Poimboeuf <[email protected]>
---
arch/s390/kernel/module.c | 125 ++++++++++++++++++++++++--------------
1 file changed, 79 insertions(+), 46 deletions(-)
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index ba8f19bb438b..2798329ebb74 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -19,6 +19,7 @@
#include <linux/kasan.h>
#include <linux/moduleloader.h>
#include <linux/bug.h>
+#include <linux/memory.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/facility.h>
@@ -174,10 +175,12 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
}
static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
- int sign, int bits, int shift)
+ int sign, int bits, int shift,
+ void *(*write)(void *dest, const void *src, size_t len))
{
unsigned long umax;
long min, max;
+ void *dest = (void *)loc;
if (val & ((1UL << shift) - 1))
return -ENOEXEC;
@@ -194,26 +197,33 @@ static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
return -ENOEXEC;
}
- if (bits == 8)
- *(unsigned char *) loc = val;
- else if (bits == 12)
- *(unsigned short *) loc = (val & 0xfff) |
+ if (bits == 8) {
+ unsigned char tmp = val;
+ write(dest, &tmp, 1);
+ } else if (bits == 12) {
+ unsigned short tmp = (val & 0xfff) |
(*(unsigned short *) loc & 0xf000);
- else if (bits == 16)
- *(unsigned short *) loc = val;
- else if (bits == 20)
- *(unsigned int *) loc = (val & 0xfff) << 16 |
- (val & 0xff000) >> 4 |
- (*(unsigned int *) loc & 0xf00000ff);
- else if (bits == 32)
- *(unsigned int *) loc = val;
- else if (bits == 64)
- *(unsigned long *) loc = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 16) {
+ unsigned short tmp = val;
+ write(dest, &tmp, 2);
+ } else if (bits == 20) {
+ unsigned int tmp = (val & 0xfff) << 16 |
+ (val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+ write(dest, &tmp, 4);
+ } else if (bits == 32) {
+ unsigned int tmp = val;
+ write(dest, &tmp, 4);
+ } else if (bits == 64) {
+ unsigned long tmp = val;
+ write(dest, &tmp, 8);
+ }
return 0;
}
static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
- const char *strtab, struct module *me)
+ const char *strtab, struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
struct mod_arch_syminfo *info;
Elf_Addr loc, val;
@@ -241,17 +251,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_64: /* Direct 64 bit. */
val += rela->r_addend;
if (r_type == R_390_8)
- rc = apply_rela_bits(loc, val, 0, 8, 0);
+ rc = apply_rela_bits(loc, val, 0, 8, 0, write);
else if (r_type == R_390_12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_PC16: /* PC relative 16 bit. */
case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
@@ -260,15 +270,15 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PC64: /* PC relative 64 bit. */
val += rela->r_addend - loc;
if (r_type == R_390_PC16)
- rc = apply_rela_bits(loc, val, 1, 16, 0);
+ rc = apply_rela_bits(loc, val, 1, 16, 0, write);
else if (r_type == R_390_PC16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PC32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PC32)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_PC64)
- rc = apply_rela_bits(loc, val, 1, 64, 0);
+ rc = apply_rela_bits(loc, val, 1, 64, 0, write);
break;
case R_390_GOT12: /* 12 bit GOT offset. */
case R_390_GOT16: /* 16 bit GOT offset. */
@@ -293,23 +303,23 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = info->got_offset + rela->r_addend;
if (r_type == R_390_GOT12 ||
r_type == R_390_GOTPLT12)
- rc = apply_rela_bits(loc, val, 0, 12, 0);
+ rc = apply_rela_bits(loc, val, 0, 12, 0, write);
else if (r_type == R_390_GOT16 ||
r_type == R_390_GOTPLT16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOT20 ||
r_type == R_390_GOTPLT20)
- rc = apply_rela_bits(loc, val, 1, 20, 0);
+ rc = apply_rela_bits(loc, val, 1, 20, 0, write);
else if (r_type == R_390_GOT32 ||
r_type == R_390_GOTPLT32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOT64 ||
r_type == R_390_GOTPLT64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
else if (r_type == R_390_GOTENT ||
r_type == R_390_GOTPLTENT) {
val += (Elf_Addr) me->core_layout.base - loc;
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
}
break;
case R_390_PLT16DBL: /* 16 bit PC rel. PLT shifted by 1. */
@@ -357,17 +367,17 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val += rela->r_addend - loc;
}
if (r_type == R_390_PLT16DBL)
- rc = apply_rela_bits(loc, val, 1, 16, 1);
+ rc = apply_rela_bits(loc, val, 1, 16, 1, write);
else if (r_type == R_390_PLTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_PLT32DBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
else if (r_type == R_390_PLT32 ||
r_type == R_390_PLTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_PLT64 ||
r_type == R_390_PLTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTOFF16: /* 16 bit offset to GOT. */
case R_390_GOTOFF32: /* 32 bit offset to GOT. */
@@ -375,20 +385,20 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
val = val + rela->r_addend -
((Elf_Addr) me->core_layout.base + me->arch.got_offset);
if (r_type == R_390_GOTOFF16)
- rc = apply_rela_bits(loc, val, 0, 16, 0);
+ rc = apply_rela_bits(loc, val, 0, 16, 0, write);
else if (r_type == R_390_GOTOFF32)
- rc = apply_rela_bits(loc, val, 0, 32, 0);
+ rc = apply_rela_bits(loc, val, 0, 32, 0, write);
else if (r_type == R_390_GOTOFF64)
- rc = apply_rela_bits(loc, val, 0, 64, 0);
+ rc = apply_rela_bits(loc, val, 0, 64, 0, write);
break;
case R_390_GOTPC: /* 32 bit PC relative offset to GOT. */
case R_390_GOTPCDBL: /* 32 bit PC rel. off. to GOT shifted by 1. */
val = (Elf_Addr) me->core_layout.base + me->arch.got_offset +
rela->r_addend - loc;
if (r_type == R_390_GOTPC)
- rc = apply_rela_bits(loc, val, 1, 32, 0);
+ rc = apply_rela_bits(loc, val, 1, 32, 0, write);
else if (r_type == R_390_GOTPCDBL)
- rc = apply_rela_bits(loc, val, 1, 32, 1);
+ rc = apply_rela_bits(loc, val, 1, 32, 1, write);
break;
case R_390_COPY:
case R_390_GLOB_DAT: /* Create GOT entry. */
@@ -412,9 +422,10 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
return 0;
}
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
unsigned int symindex, unsigned int relsec,
- struct module *me)
+ struct module *me,
+ void *(*write)(void *dest, const void *src, size_t len))
{
Elf_Addr base;
Elf_Sym *symtab;
@@ -430,13 +441,35 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
for (i = 0; i < n; i++, rela++) {
- rc = apply_rela(rela, base, symtab, strtab, me);
+ rc = apply_rela(rela, base, symtab, strtab, me, write);
if (rc)
return rc;
}
return 0;
}
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+ unsigned int symindex, unsigned int relsec,
+ struct module *me)
+{
+ int ret;
+ bool early = me->state == MODULE_STATE_UNFORMED;
+ void *(*write)(void *, const void *, size_t) = memcpy;
+
+ if (!early) {
+ write = s390_kernel_write;
+ mutex_lock(&text_mutex);
+ }
+
+ ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+ write);
+
+ if (!early)
+ mutex_unlock(&text_mutex);
+
+ return ret;
+}
+
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
--
2.21.1
> +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
> + unsigned int symindex, unsigned int relsec,
> + struct module *me)
> +{
> + int ret;
> + bool early = me->state == MODULE_STATE_UNFORMED;
> + void *(*write)(void *, const void *, size_t) = memcpy;
> +
> + if (!early) {
> + write = s390_kernel_write;
> + mutex_lock(&text_mutex);
> + }
> +
> + ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> + write);
> +
> + if (!early)
> + mutex_unlock(&text_mutex);
> +
> + return ret;
> +}
It means that you can take text_mutex the second time here because it
is still taken in klp_init_object_loaded(). It is removed later in the
series though. The same applies for x86 patch.
Also, s390_kernel_write() grabs s390_kernel_write_lock spinlock before
writing anything, so maybe text_mutex is not really needed as long as
s390_kernel_write is called everywhere for text patching.
Miroslav
On Fri, 17 Apr 2020 09:04:31 -0500
Josh Poimboeuf <[email protected]> wrote:
> From: Peter Zijlstra <[email protected]>
>
> Because of late module patching, a livepatch module needs to be able to
> apply some of its relocations well after it has been loaded. Instead of
> playing games with module_{dis,en}able_ro(), use existing text poking
> mechanisms to apply relocations after module loading.
>
> So far only x86, s390 and Power have HAVE_LIVEPATCH but only the first
> two also have STRICT_MODULE_RWX.
>
> This will allow removal of the last module_disable_ro() usage in
> livepatch. The ultimate goal is to completely disallow making
> executable mappings writable.
>
> Also, for the late patching case, use text_mutex, which is supposed to
> be held for all runtime text patching operations.
>
> [ jpoimboe: Split up patches. Use mod state to determine whether
> memcpy() can be used. Add text_mutex. Make it build. ]
>
> Cc: [email protected]
> Cc: [email protected]
> Suggested-by: Josh Poimboeuf <[email protected]>
> Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> Signed-off-by: Josh Poimboeuf <[email protected]>
> ---
> arch/s390/kernel/module.c | 125 ++++++++++++++++++++++++--------------
> 1 file changed, 79 insertions(+), 46 deletions(-)
Sorry, just noticed this. Heiko will return next month, and I'm not
really familiar with s390 livepatching. Adding Vasily, he might
have some more insight.
So, I might be completely wrong here, but using s390_kernel_write()
for writing to anything other than 1:1 mapped kernel, should go
horribly wrong, as that runs w/o DAT. It would allow to bypass
DAT write protection, which I assume is why you want to use it,
but it should not work on module text section, as that would be
in vmalloc space and not 1:1 mapped kernel memory.
Not quite sure how to test / trigger this, did this really work for
you on s390?
Regards,
Gerald
On Wed, 22 Apr 2020 16:40:37 +0200
Gerald Schaefer <[email protected]> wrote:
> On Fri, 17 Apr 2020 09:04:31 -0500
> Josh Poimboeuf <[email protected]> wrote:
>
> > From: Peter Zijlstra <[email protected]>
> >
> > Because of late module patching, a livepatch module needs to be able to
> > apply some of its relocations well after it has been loaded. Instead of
> > playing games with module_{dis,en}able_ro(), use existing text poking
> > mechanisms to apply relocations after module loading.
> >
> > So far only x86, s390 and Power have HAVE_LIVEPATCH but only the first
> > two also have STRICT_MODULE_RWX.
> >
> > This will allow removal of the last module_disable_ro() usage in
> > livepatch. The ultimate goal is to completely disallow making
> > executable mappings writable.
> >
> > Also, for the late patching case, use text_mutex, which is supposed to
> > be held for all runtime text patching operations.
> >
> > [ jpoimboe: Split up patches. Use mod state to determine whether
> > memcpy() can be used. Add text_mutex. Make it build. ]
> >
> > Cc: [email protected]
> > Cc: [email protected]
> > Suggested-by: Josh Poimboeuf <[email protected]>
> > Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
> > Signed-off-by: Josh Poimboeuf <[email protected]>
> > ---
> > arch/s390/kernel/module.c | 125 ++++++++++++++++++++++++--------------
> > 1 file changed, 79 insertions(+), 46 deletions(-)
>
> Sorry, just noticed this. Heiko will return next month, and I'm not
> really familiar with s390 livepatching. Adding Vasily, he might
> have some more insight.
>
> So, I might be completely wrong here, but using s390_kernel_write()
> for writing to anything other than 1:1 mapped kernel, should go
> horribly wrong, as that runs w/o DAT. It would allow to bypass
> DAT write protection, which I assume is why you want to use it,
> but it should not work on module text section, as that would be
> in vmalloc space and not 1:1 mapped kernel memory.
>
> Not quite sure how to test / trigger this, did this really work for
> you on s390?
OK, using s390_kernel_write() as default write function for module
relocation seems to work fine for me, so apparently I am missing /
mixing up something. Sorry for the noise, please ignore my concern.
On Wed, Apr 22, 2020 at 05:21:26PM +0200, Gerald Schaefer wrote:
> > Sorry, just noticed this. Heiko will return next month, and I'm not
> > really familiar with s390 livepatching. Adding Vasily, he might
> > have some more insight.
> >
> > So, I might be completely wrong here, but using s390_kernel_write()
> > for writing to anything other than 1:1 mapped kernel, should go
> > horribly wrong, as that runs w/o DAT. It would allow to bypass
> > DAT write protection, which I assume is why you want to use it,
> > but it should not work on module text section, as that would be
> > in vmalloc space and not 1:1 mapped kernel memory.
> >
> > Not quite sure how to test / trigger this, did this really work for
> > you on s390?
>
> OK, using s390_kernel_write() as default write function for module
> relocation seems to work fine for me, so apparently I am missing /
> mixing up something. Sorry for the noise, please ignore my concern.
Hi Gerald,
I think you were right. Joe found the below panic with his klp-convert
tests.
Your test was probably the early module loading case (normal relocations
before write protection), rather than the late case. Not sure why that
would work, but calling s390_kernel_write() late definitely seems to be
broken.
Is there some other way to write vmalloc'ed s390 text without using
module_disable_ro()?
[ 50.294476] Unable to handle kernel pointer dereference in virtual kernel address space
[ 50.294479] Failing address: 000003ff8015b000 TEID: 000003ff8015b407
[ 50.294480] Fault in home space mode while using kernel ASCE.
[ 50.294483] AS:000000006cef0007 R3:000000007e2c4007 S:0000000003ccb800 P:0000 00000257321d
[ 50.294557] Oops: 0004 ilc:3 [#1] SMP
[ 50.294561] Modules linked in: test_klp_convert1(K+) test_klp_convert_mod ghash_s390 prng xts aes_s390 des_s390 libdes sha512_s390 vmur zcrypt_cex4 ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth lcs ctcm qdio cc
wgroup fsm dm_mirror dm_region_hash dm_log dm_mod pkey zcrypt [last unloaded: test_klp_atomic_replace]
[ 50.294576] CPU: 0 PID: 1743 Comm: modprobe Tainted: G K 5.6.0 + #2
[ 50.294579] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
[ 50.294583] Krnl PSW : 0704e00180000000 000000006bf6be0a (apply_rela+0x2ba/0x 4e0)
[ 50.294589] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI: 0 EA:3
[ 50.294684] Krnl GPRS: 000003ff80147010 000003e0001b9588 000003ff8015c168 000 003ff8015b19a
[ 50.294686] 000003ff8015b07c 0d10e310100a0004 000003ff80147010 000 00000000000a0
[ 50.294687] 000003ff8015e588 000003ff8015e5e8 000003ff8015d300 000 0003b00000014
[ 50.294698] 000000007a663000 000000006c6bbb80 000003e0009a7918 000 003e0009a78b8
[ 50.294707] Krnl Code: 000000006bf6bdf8: e350d0080004 lg %r5,8(%r 13)
[ 50.294707] 000000006bf6bdfe: e34010080008 ag %r4,8(%r 1)
[ 50.294707] #000000006bf6be04: e340a2000008 ag %r4,512( %r10)
[ 50.294707] >000000006bf6be0a: e35040000024 stg %r5,0(%r 4)
[ 50.294707] 000000006bf6be10: c050007c6136 larl %r5,0000 00006cef807c
[ 50.294707] 000000006bf6be16: e35050000012 lt %r5,0(%r 5)
[ 50.294707] 000000006bf6be1c: a78400a6 brc 8,000000 006bf6bf68
[ 50.294707] 000000006bf6be20: a55e07f1 llilh %r5,2033
01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 01.
01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 00.
[ 50.295369] Call Trace:
[ 50.295372] [<000000006bf6be0a>] apply_rela+0x2ba/0x4e0
[ 50.295376] [<000000006bf6c5c8>] apply_relocate_add+0xe0/0x138
[ 50.295378] [<000000006c0229a0>] klp_apply_section_relocs+0xe8/0x128
[ 50.295380] [<000000006c022b4c>] klp_apply_object_relocs+0x9c/0xd0
[ 50.295382] [<000000006c022bb0>] klp_init_object_loaded+0x30/0x138
[ 50.295384] [<000000006c023052>] klp_enable_patch+0x39a/0x870
[ 50.295387] [<000003ff8015b0da>] test_klp_convert_init+0x22/0x50 [test_klp_convert1]
[ 50.295389] [<000000006bf54838>] do_one_initcall+0x40/0x1f0
[ 50.295391] [<000000006c04d610>] do_init_module+0x70/0x280
[ 50.295392] [<000000006c05002a>] load_module+0x1aba/0x1d10
[ 50.295394] [<000000006c0504c4>] __do_sys_finit_module+0xa4/0xe8
[ 50.295416] [<000000006c6b5742>] system_call+0x2aa/0x2c8
[ 50.295416] Last Breaking-Event-Address:
[ 50.295418] [<000000006c6b6aa0>] __s390_indirect_jump_r4+0x0/0xc
[ 50.295421] Kernel panic - not syncing: Fatal exception: panic_on_oops
--
Josh
On Wed, 22 Apr 2020 14:46:05 -0500
Josh Poimboeuf <[email protected]> wrote:
> On Wed, Apr 22, 2020 at 05:21:26PM +0200, Gerald Schaefer wrote:
> > > Sorry, just noticed this. Heiko will return next month, and I'm not
> > > really familiar with s390 livepatching. Adding Vasily, he might
> > > have some more insight.
> > >
> > > So, I might be completely wrong here, but using s390_kernel_write()
> > > for writing to anything other than 1:1 mapped kernel, should go
> > > horribly wrong, as that runs w/o DAT. It would allow to bypass
> > > DAT write protection, which I assume is why you want to use it,
> > > but it should not work on module text section, as that would be
> > > in vmalloc space and not 1:1 mapped kernel memory.
> > >
> > > Not quite sure how to test / trigger this, did this really work for
> > > you on s390?
> >
> > OK, using s390_kernel_write() as default write function for module
> > relocation seems to work fine for me, so apparently I am missing /
> > mixing up something. Sorry for the noise, please ignore my concern.
>
> Hi Gerald,
>
> I think you were right. Joe found the below panic with his klp-convert
> tests.
>
> Your test was probably the early module loading case (normal relocations
> before write protection), rather than the late case. Not sure why that
> would work, but calling s390_kernel_write() late definitely seems to be
> broken.
>
> Is there some other way to write vmalloc'ed s390 text without using
> module_disable_ro()?
>
> [ 50.294476] Unable to handle kernel pointer dereference in virtual kernel address space
> [ 50.294479] Failing address: 000003ff8015b000 TEID: 000003ff8015b407
> [ 50.294480] Fault in home space mode while using kernel ASCE.
> [ 50.294483] AS:000000006cef0007 R3:000000007e2c4007 S:0000000003ccb800 P:0000 00000257321d
> [ 50.294557] Oops: 0004 ilc:3 [#1] SMP
> [ 50.294561] Modules linked in: test_klp_convert1(K+) test_klp_convert_mod ghash_s390 prng xts aes_s390 des_s390 libdes sha512_s390 vmur zcrypt_cex4 ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth lcs ctcm qdio cc
> wgroup fsm dm_mirror dm_region_hash dm_log dm_mod pkey zcrypt [last unloaded: test_klp_atomic_replace]
> [ 50.294576] CPU: 0 PID: 1743 Comm: modprobe Tainted: G K 5.6.0 + #2
> [ 50.294579] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> [ 50.294583] Krnl PSW : 0704e00180000000 000000006bf6be0a (apply_rela+0x2ba/0x 4e0)
> [ 50.294589] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI: 0 EA:3
> [ 50.294684] Krnl GPRS: 000003ff80147010 000003e0001b9588 000003ff8015c168 000 003ff8015b19a
> [ 50.294686] 000003ff8015b07c 0d10e310100a0004 000003ff80147010 000 00000000000a0
> [ 50.294687] 000003ff8015e588 000003ff8015e5e8 000003ff8015d300 000 0003b00000014
> [ 50.294698] 000000007a663000 000000006c6bbb80 000003e0009a7918 000 003e0009a78b8
> [ 50.294707] Krnl Code: 000000006bf6bdf8: e350d0080004 lg %r5,8(%r 13)
> [ 50.294707] 000000006bf6bdfe: e34010080008 ag %r4,8(%r 1)
> [ 50.294707] #000000006bf6be04: e340a2000008 ag %r4,512( %r10)
> [ 50.294707] >000000006bf6be0a: e35040000024 stg %r5,0(%r 4)
> [ 50.294707] 000000006bf6be10: c050007c6136 larl %r5,0000 00006cef807c
> [ 50.294707] 000000006bf6be16: e35050000012 lt %r5,0(%r 5)
> [ 50.294707] 000000006bf6be1c: a78400a6 brc 8,000000 006bf6bf68
> [ 50.294707] 000000006bf6be20: a55e07f1 llilh %r5,2033
> 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 01.
> 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 00.
> [ 50.295369] Call Trace:
> [ 50.295372] [<000000006bf6be0a>] apply_rela+0x2ba/0x4e0
> [ 50.295376] [<000000006bf6c5c8>] apply_relocate_add+0xe0/0x138
> [ 50.295378] [<000000006c0229a0>] klp_apply_section_relocs+0xe8/0x128
> [ 50.295380] [<000000006c022b4c>] klp_apply_object_relocs+0x9c/0xd0
> [ 50.295382] [<000000006c022bb0>] klp_init_object_loaded+0x30/0x138
> [ 50.295384] [<000000006c023052>] klp_enable_patch+0x39a/0x870
> [ 50.295387] [<000003ff8015b0da>] test_klp_convert_init+0x22/0x50 [test_klp_convert1]
> [ 50.295389] [<000000006bf54838>] do_one_initcall+0x40/0x1f0
> [ 50.295391] [<000000006c04d610>] do_init_module+0x70/0x280
> [ 50.295392] [<000000006c05002a>] load_module+0x1aba/0x1d10
> [ 50.295394] [<000000006c0504c4>] __do_sys_finit_module+0xa4/0xe8
> [ 50.295416] [<000000006c6b5742>] system_call+0x2aa/0x2c8
> [ 50.295416] Last Breaking-Event-Address:
> [ 50.295418] [<000000006c6b6aa0>] __s390_indirect_jump_r4+0x0/0xc
> [ 50.295421] Kernel panic - not syncing: Fatal exception: panic_on_oops
>
Hi Josh,
this is strange. While I would have expected an exception similar to
this, it really should have happened on the "sturg" instruction which
does the DAT-off store in s390_kernel_write(), and certainly not with
an ID of 0004 (protection). However, in your case, it happens on a
normal store instruction, with 0004 indicating a protection exception.
This is more like what I would expect e.g. in the case where you do
_not_ use the s390_kernel_write() function for RO module text patching,
but rather normal memory access. So I am pretty sure that this is not
related to the s390_kernel_write(), but some other issue, maybe some
place left where you still use normal memory access?
There is also some good news. While thinking about how to use "sturg"
for vmalloc addresses, I came up with the idea to use "lra" (load
real address) before that. Then I found out that we already do exactly
that in the inline assembly, so all should be fine. Well, maybe the
comment for s390_kernel_write() could be improved...
Vasily also found out that we apparently already use s390_kernel_write()
for module text, for alternatives, so I guess we can safely assume that
it should work fine in principle.
Regards,
Gerald
On Thu, Apr 23, 2020 at 03:22:06PM +0200, Miroslav Benes wrote:
> > > [ 50.294476] Unable to handle kernel pointer dereference in virtual kernel address space
> > > [ 50.294479] Failing address: 000003ff8015b000 TEID: 000003ff8015b407
> > > [ 50.294480] Fault in home space mode while using kernel ASCE.
> > > [ 50.294483] AS:000000006cef0007 R3:000000007e2c4007 S:0000000003ccb800 P:0000 00000257321d
> > > [ 50.294557] Oops: 0004 ilc:3 [#1] SMP
> > > [ 50.294561] Modules linked in: test_klp_convert1(K+) test_klp_convert_mod ghash_s390 prng xts aes_s390 des_s390 libdes sha512_s390 vmur zcrypt_cex4 ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth lcs ctcm qdio cc
> > > wgroup fsm dm_mirror dm_region_hash dm_log dm_mod pkey zcrypt [last unloaded: test_klp_atomic_replace]
> > > [ 50.294576] CPU: 0 PID: 1743 Comm: modprobe Tainted: G K 5.6.0 + #2
> > > [ 50.294579] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> > > [ 50.294583] Krnl PSW : 0704e00180000000 000000006bf6be0a (apply_rela+0x2ba/0x 4e0)
> > > [ 50.294589] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI: 0 EA:3
> > > [ 50.294684] Krnl GPRS: 000003ff80147010 000003e0001b9588 000003ff8015c168 000 003ff8015b19a
> > > [ 50.294686] 000003ff8015b07c 0d10e310100a0004 000003ff80147010 000 00000000000a0
> > > [ 50.294687] 000003ff8015e588 000003ff8015e5e8 000003ff8015d300 000 0003b00000014
> > > [ 50.294698] 000000007a663000 000000006c6bbb80 000003e0009a7918 000 003e0009a78b8
> > > [ 50.294707] Krnl Code: 000000006bf6bdf8: e350d0080004 lg %r5,8(%r 13)
> > > [ 50.294707] 000000006bf6bdfe: e34010080008 ag %r4,8(%r 1)
> > > [ 50.294707] #000000006bf6be04: e340a2000008 ag %r4,512( %r10)
> > > [ 50.294707] >000000006bf6be0a: e35040000024 stg %r5,0(%r 4)
> > > [ 50.294707] 000000006bf6be10: c050007c6136 larl %r5,0000 00006cef807c
> > > [ 50.294707] 000000006bf6be16: e35050000012 lt %r5,0(%r 5)
> > > [ 50.294707] 000000006bf6be1c: a78400a6 brc 8,000000 006bf6bf68
> > > [ 50.294707] 000000006bf6be20: a55e07f1 llilh %r5,2033
> > > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 01.
> > > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 00.
> > > [ 50.295369] Call Trace:
> > > [ 50.295372] [<000000006bf6be0a>] apply_rela+0x2ba/0x4e0
> > > [ 50.295376] [<000000006bf6c5c8>] apply_relocate_add+0xe0/0x138
> > > [ 50.295378] [<000000006c0229a0>] klp_apply_section_relocs+0xe8/0x128
> > > [ 50.295380] [<000000006c022b4c>] klp_apply_object_relocs+0x9c/0xd0
> > > [ 50.295382] [<000000006c022bb0>] klp_init_object_loaded+0x30/0x138
> > > [ 50.295384] [<000000006c023052>] klp_enable_patch+0x39a/0x870
> > > [ 50.295387] [<000003ff8015b0da>] test_klp_convert_init+0x22/0x50 [test_klp_convert1]
> > > [ 50.295389] [<000000006bf54838>] do_one_initcall+0x40/0x1f0
> > > [ 50.295391] [<000000006c04d610>] do_init_module+0x70/0x280
> > > [ 50.295392] [<000000006c05002a>] load_module+0x1aba/0x1d10
> > > [ 50.295394] [<000000006c0504c4>] __do_sys_finit_module+0xa4/0xe8
> > > [ 50.295416] [<000000006c6b5742>] system_call+0x2aa/0x2c8
> > > [ 50.295416] Last Breaking-Event-Address:
> > > [ 50.295418] [<000000006c6b6aa0>] __s390_indirect_jump_r4+0x0/0xc
> > > [ 50.295421] Kernel panic - not syncing: Fatal exception: panic_on_oops
> >
> > this is strange. While I would have expected an exception similar to
> > this, it really should have happened on the "sturg" instruction which
> > does the DAT-off store in s390_kernel_write(), and certainly not with
> > an ID of 0004 (protection). However, in your case, it happens on a
> > normal store instruction, with 0004 indicating a protection exception.
> >
> > This is more like what I would expect e.g. in the case where you do
> > _not_ use the s390_kernel_write() function for RO module text patching,
> > but rather normal memory access. So I am pretty sure that this is not
> > related to the s390_kernel_write(), but some other issue, maybe some
> > place left where you still use normal memory access?
>
> The call trace above also suggests that it is not a late relocation, no?
> The path is from KLP module init function through klp_enable_patch. It should
> mean that the to-be-patched object is loaded (it must be a module thanks
> to a check klp_init_object_loaded(), vmlinux relocations were processed
> earlier in apply_relocations()).
>
> However, the KLP module state here must be COMING, so s390_kernel_write()
> should be used. What are we missing?
I'm also scratching my head. It _should_ be using s390_kernel_write()
based on the module state, but I don't see that on the stack trace.
This trace (and Gerald's comment) seem to imply it's using
__builtin_memcpy(), which might expected for UNFORMED state.
Weird...
--
Josh
On Thu, Apr 23, 2020 at 03:22:06PM +0200, Miroslav Benes wrote:
> On Thu, 23 Apr 2020, Gerald Schaefer wrote:
>
> > On Wed, 22 Apr 2020 14:46:05 -0500
> > Josh Poimboeuf <[email protected]> wrote:
> >
> > > On Wed, Apr 22, 2020 at 05:21:26PM +0200, Gerald Schaefer wrote:
> > > > > Sorry, just noticed this. Heiko will return next month, and I'm not
> > > > > really familiar with s390 livepatching. Adding Vasily, he might
> > > > > have some more insight.
> > > > >
> > > > > So, I might be completely wrong here, but using s390_kernel_write()
> > > > > for writing to anything other than 1:1 mapped kernel, should go
> > > > > horribly wrong, as that runs w/o DAT. It would allow to bypass
> > > > > DAT write protection, which I assume is why you want to use it,
> > > > > but it should not work on module text section, as that would be
> > > > > in vmalloc space and not 1:1 mapped kernel memory.
> > > > >
> > > > > Not quite sure how to test / trigger this, did this really work for
> > > > > you on s390?
> > > >
> > > > OK, using s390_kernel_write() as default write function for module
> > > > relocation seems to work fine for me, so apparently I am missing /
> > > > mixing up something. Sorry for the noise, please ignore my concern.
> > >
> > > Hi Gerald,
> > >
> > > I think you were right. Joe found the below panic with his klp-convert
> > > tests.
> > >
> > > Your test was probably the early module loading case (normal relocations
> > > before write protection), rather than the late case. Not sure why that
> > > would work, but calling s390_kernel_write() late definitely seems to be
> > > broken.
> > >
> > > Is there some other way to write vmalloc'ed s390 text without using
> > > module_disable_ro()?
> > >
> > > [ 50.294476] Unable to handle kernel pointer dereference in virtual kernel address space
> > > [ 50.294479] Failing address: 000003ff8015b000 TEID: 000003ff8015b407
> > > [ 50.294480] Fault in home space mode while using kernel ASCE.
> > > [ 50.294483] AS:000000006cef0007 R3:000000007e2c4007 S:0000000003ccb800 P:0000 00000257321d
> > > [ 50.294557] Oops: 0004 ilc:3 [#1] SMP
> > > [ 50.294561] Modules linked in: test_klp_convert1(K+) test_klp_convert_mod ghash_s390 prng xts aes_s390 des_s390 libdes sha512_s390 vmur zcrypt_cex4 ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth lcs ctcm qdio cc
> > > wgroup fsm dm_mirror dm_region_hash dm_log dm_mod pkey zcrypt [last unloaded: test_klp_atomic_replace]
> > > [ 50.294576] CPU: 0 PID: 1743 Comm: modprobe Tainted: G K 5.6.0 + #2
> > > [ 50.294579] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> > > [ 50.294583] Krnl PSW : 0704e00180000000 000000006bf6be0a (apply_rela+0x2ba/0x 4e0)
> > > [ 50.294589] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI: 0 EA:3
> > > [ 50.294684] Krnl GPRS: 000003ff80147010 000003e0001b9588 000003ff8015c168 000 003ff8015b19a
> > > [ 50.294686] 000003ff8015b07c 0d10e310100a0004 000003ff80147010 000 00000000000a0
> > > [ 50.294687] 000003ff8015e588 000003ff8015e5e8 000003ff8015d300 000 0003b00000014
> > > [ 50.294698] 000000007a663000 000000006c6bbb80 000003e0009a7918 000 003e0009a78b8
> > > [ 50.294707] Krnl Code: 000000006bf6bdf8: e350d0080004 lg %r5,8(%r 13)
> > > [ 50.294707] 000000006bf6bdfe: e34010080008 ag %r4,8(%r 1)
> > > [ 50.294707] #000000006bf6be04: e340a2000008 ag %r4,512( %r10)
> > > [ 50.294707] >000000006bf6be0a: e35040000024 stg %r5,0(%r 4)
> > > [ 50.294707] 000000006bf6be10: c050007c6136 larl %r5,0000 00006cef807c
> > > [ 50.294707] 000000006bf6be16: e35050000012 lt %r5,0(%r 5)
> > > [ 50.294707] 000000006bf6be1c: a78400a6 brc 8,000000 006bf6bf68
> > > [ 50.294707] 000000006bf6be20: a55e07f1 llilh %r5,2033
> > > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 01.
> > > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 00.
> > > [ 50.295369] Call Trace:
> > > [ 50.295372] [<000000006bf6be0a>] apply_rela+0x2ba/0x4e0
> > > [ 50.295376] [<000000006bf6c5c8>] apply_relocate_add+0xe0/0x138
> > > [ 50.295378] [<000000006c0229a0>] klp_apply_section_relocs+0xe8/0x128
> > > [ 50.295380] [<000000006c022b4c>] klp_apply_object_relocs+0x9c/0xd0
> > > [ 50.295382] [<000000006c022bb0>] klp_init_object_loaded+0x30/0x138
> > > [ 50.295384] [<000000006c023052>] klp_enable_patch+0x39a/0x870
> > > [ 50.295387] [<000003ff8015b0da>] test_klp_convert_init+0x22/0x50 [test_klp_convert1]
> > > [ 50.295389] [<000000006bf54838>] do_one_initcall+0x40/0x1f0
> > > [ 50.295391] [<000000006c04d610>] do_init_module+0x70/0x280
> > > [ 50.295392] [<000000006c05002a>] load_module+0x1aba/0x1d10
> > > [ 50.295394] [<000000006c0504c4>] __do_sys_finit_module+0xa4/0xe8
> > > [ 50.295416] [<000000006c6b5742>] system_call+0x2aa/0x2c8
> > > [ 50.295416] Last Breaking-Event-Address:
> > > [ 50.295418] [<000000006c6b6aa0>] __s390_indirect_jump_r4+0x0/0xc
> > > [ 50.295421] Kernel panic - not syncing: Fatal exception: panic_on_oops
> > >
> >
> > Hi Josh,
> >
> > this is strange. While I would have expected an exception similar to
> > this, it really should have happened on the "sturg" instruction which
> > does the DAT-off store in s390_kernel_write(), and certainly not with
> > an ID of 0004 (protection). However, in your case, it happens on a
> > normal store instruction, with 0004 indicating a protection exception.
> >
> > This is more like what I would expect e.g. in the case where you do
> > _not_ use the s390_kernel_write() function for RO module text patching,
> > but rather normal memory access. So I am pretty sure that this is not
> > related to the s390_kernel_write(), but some other issue, maybe some
> > place left where you still use normal memory access?
>
> The call trace above also suggests that it is not a late relocation, no?
You are correct, details below...
> The path is from KLP module init function through klp_enable_patch. It should
> mean that the to-be-patched object is loaded (it must be a module thanks
> to a check klp_init_object_loaded(), vmlinux relocations were processed
> earlier in apply_relocations()).
>
> However, the KLP module state here must be COMING, so s390_kernel_write()
> should be used. What are we missing?
>
> Joe, could you debug this a bit, please?
>
Here is the combined branch that I tested yesterday:
https://github.com/joe-lawrence/linux/tree/jp-v2-klp-convert
That combined WIP klp-convert changes merged on top of Josh's v2 patches
(and follow-ups) posted here. Before I merged with Josh's changes, the
klp-convert code + tests ran without incident. There was a slight merge
conflict, but I hope that's not related to this crash. Perhaps the test
case is shaky and Josh's changes expose an underlying issue?
More info on the test case:
# TEST: klp-convert symbols
https://github.com/joe-lawrence/linux/blob/jp-v2-klp-convert/tools/testing/selftests/livepatch/test-livepatch.sh#L172
which loads an ordinary kernel module (test_klp_convert_mod) and then
this livepatch module, which contains references to both vmlinux
symbols and a few found in the first module:
https://github.com/joe-lawrence/linux/blob/jp-v2-klp-convert/lib/livepatch/test_klp_convert1.c
The livepatch's init function calls klp_enable_patch() as usual, and
then for testing purposes calls a few functions that required
klp-relocations. I don't know if real livepatch modules would ever need
to do this from init code, but I did so just to make the test code
simpler.
This is what klp-convert created for klp-relocations:
% readelf --wide --relocs test_klp_convert1.ko
...
Relocation section '.klp.rela.vmlinux..text.unlikely' at offset 0x52000 contains 1 entry:
Offset Info Type Symbol's Value Symbol's Name + Addend
0000000000000002 000000340000001a R_390_GOTENT 0000000000000000 .klp.sym.vmlinux.saved_command_line,0 + 2
Relocation section '.klp.rela.test_klp_convert_mod..text.unlikely' at offset 0x52018 contains 4 entries:
Offset Info Type Symbol's Value Symbol's Name + Addend
000000000000008a 0000003b00000014 R_390_PLT32DBL 0000000000000000 .klp.sym.test_klp_convert_mod.get_homonym_string,1 + 2
000000000000006c 000000350000001a R_390_GOTENT 0000000000000000 .klp.sym.test_klp_convert_mod.homonym_string,1 + 2
0000000000000042 0000003e00000014 R_390_PLT32DBL 0000000000000000 .klp.sym.test_klp_convert_mod.test_klp_get_driver_name,0 + 2
0000000000000024 000000380000001a R_390_GOTENT 0000000000000000 .klp.sym.test_klp_convert_mod.driver_name,0 + 2
I can rework the test case to simplify and debug few things. Let me
know if you have any specific ideas in mind.
-- Joe
On Thu, Apr 23, 2020 at 09:12:28AM -0500, Josh Poimboeuf wrote:
> > > this is strange. While I would have expected an exception similar to
> > > this, it really should have happened on the "sturg" instruction which
> > > does the DAT-off store in s390_kernel_write(), and certainly not with
> > > an ID of 0004 (protection). However, in your case, it happens on a
> > > normal store instruction, with 0004 indicating a protection exception.
> > >
> > > This is more like what I would expect e.g. in the case where you do
> > > _not_ use the s390_kernel_write() function for RO module text patching,
> > > but rather normal memory access. So I am pretty sure that this is not
> > > related to the s390_kernel_write(), but some other issue, maybe some
> > > place left where you still use normal memory access?
> >
> > The call trace above also suggests that it is not a late relocation, no?
> > The path is from KLP module init function through klp_enable_patch. It should
> > mean that the to-be-patched object is loaded (it must be a module thanks
> > to a check klp_init_object_loaded(), vmlinux relocations were processed
> > earlier in apply_relocations()).
> >
> > However, the KLP module state here must be COMING, so s390_kernel_write()
> > should be used. What are we missing?
>
> I'm also scratching my head. It _should_ be using s390_kernel_write()
> based on the module state, but I don't see that on the stack trace.
>
> This trace (and Gerald's comment) seem to imply it's using
> __builtin_memcpy(), which might expected for UNFORMED state.
>
> Weird...
Mystery solved:
$ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
apply_rela+0x16a/0x520:
apply_rela at arch/s390/kernel/module.c:336
which corresponds to the following code in apply_rela():
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
unsigned int *ip;
ip = me->core_layout.base + me->arch.plt_offset +
info->plt_offset;
ip[0] = 0x0d10e310; /* basr 1,0 */
ip[1] = 0x100a0004; /* lg 1,10(1) */
Notice how it's writing directly to text... oops.
--
Josh
On Thu, 23 Apr 2020, Gerald Schaefer wrote:
> On Wed, 22 Apr 2020 14:46:05 -0500
> Josh Poimboeuf <[email protected]> wrote:
>
> > On Wed, Apr 22, 2020 at 05:21:26PM +0200, Gerald Schaefer wrote:
> > > > Sorry, just noticed this. Heiko will return next month, and I'm not
> > > > really familiar with s390 livepatching. Adding Vasily, he might
> > > > have some more insight.
> > > >
> > > > So, I might be completely wrong here, but using s390_kernel_write()
> > > > for writing to anything other than 1:1 mapped kernel, should go
> > > > horribly wrong, as that runs w/o DAT. It would allow to bypass
> > > > DAT write protection, which I assume is why you want to use it,
> > > > but it should not work on module text section, as that would be
> > > > in vmalloc space and not 1:1 mapped kernel memory.
> > > >
> > > > Not quite sure how to test / trigger this, did this really work for
> > > > you on s390?
> > >
> > > OK, using s390_kernel_write() as default write function for module
> > > relocation seems to work fine for me, so apparently I am missing /
> > > mixing up something. Sorry for the noise, please ignore my concern.
> >
> > Hi Gerald,
> >
> > I think you were right. Joe found the below panic with his klp-convert
> > tests.
> >
> > Your test was probably the early module loading case (normal relocations
> > before write protection), rather than the late case. Not sure why that
> > would work, but calling s390_kernel_write() late definitely seems to be
> > broken.
> >
> > Is there some other way to write vmalloc'ed s390 text without using
> > module_disable_ro()?
> >
> > [ 50.294476] Unable to handle kernel pointer dereference in virtual kernel address space
> > [ 50.294479] Failing address: 000003ff8015b000 TEID: 000003ff8015b407
> > [ 50.294480] Fault in home space mode while using kernel ASCE.
> > [ 50.294483] AS:000000006cef0007 R3:000000007e2c4007 S:0000000003ccb800 P:0000 00000257321d
> > [ 50.294557] Oops: 0004 ilc:3 [#1] SMP
> > [ 50.294561] Modules linked in: test_klp_convert1(K+) test_klp_convert_mod ghash_s390 prng xts aes_s390 des_s390 libdes sha512_s390 vmur zcrypt_cex4 ip_tables xfs libcrc32c dasd_fba_mod qeth_l2 dasd_eckd_mod dasd_mod qeth lcs ctcm qdio cc
> > wgroup fsm dm_mirror dm_region_hash dm_log dm_mod pkey zcrypt [last unloaded: test_klp_atomic_replace]
> > [ 50.294576] CPU: 0 PID: 1743 Comm: modprobe Tainted: G K 5.6.0 + #2
> > [ 50.294579] Hardware name: IBM 2964 N96 400 (z/VM 6.4.0)
> > [ 50.294583] Krnl PSW : 0704e00180000000 000000006bf6be0a (apply_rela+0x2ba/0x 4e0)
> > [ 50.294589] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI: 0 EA:3
> > [ 50.294684] Krnl GPRS: 000003ff80147010 000003e0001b9588 000003ff8015c168 000 003ff8015b19a
> > [ 50.294686] 000003ff8015b07c 0d10e310100a0004 000003ff80147010 000 00000000000a0
> > [ 50.294687] 000003ff8015e588 000003ff8015e5e8 000003ff8015d300 000 0003b00000014
> > [ 50.294698] 000000007a663000 000000006c6bbb80 000003e0009a7918 000 003e0009a78b8
> > [ 50.294707] Krnl Code: 000000006bf6bdf8: e350d0080004 lg %r5,8(%r 13)
> > [ 50.294707] 000000006bf6bdfe: e34010080008 ag %r4,8(%r 1)
> > [ 50.294707] #000000006bf6be04: e340a2000008 ag %r4,512( %r10)
> > [ 50.294707] >000000006bf6be0a: e35040000024 stg %r5,0(%r 4)
> > [ 50.294707] 000000006bf6be10: c050007c6136 larl %r5,0000 00006cef807c
> > [ 50.294707] 000000006bf6be16: e35050000012 lt %r5,0(%r 5)
> > [ 50.294707] 000000006bf6be1c: a78400a6 brc 8,000000 006bf6bf68
> > [ 50.294707] 000000006bf6be20: a55e07f1 llilh %r5,2033
> > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 01.
> > 01: HCPGSP2629I The virtual machine is placed in CP mode due to a SIGP stop from CPU 00.
> > [ 50.295369] Call Trace:
> > [ 50.295372] [<000000006bf6be0a>] apply_rela+0x2ba/0x4e0
> > [ 50.295376] [<000000006bf6c5c8>] apply_relocate_add+0xe0/0x138
> > [ 50.295378] [<000000006c0229a0>] klp_apply_section_relocs+0xe8/0x128
> > [ 50.295380] [<000000006c022b4c>] klp_apply_object_relocs+0x9c/0xd0
> > [ 50.295382] [<000000006c022bb0>] klp_init_object_loaded+0x30/0x138
> > [ 50.295384] [<000000006c023052>] klp_enable_patch+0x39a/0x870
> > [ 50.295387] [<000003ff8015b0da>] test_klp_convert_init+0x22/0x50 [test_klp_convert1]
> > [ 50.295389] [<000000006bf54838>] do_one_initcall+0x40/0x1f0
> > [ 50.295391] [<000000006c04d610>] do_init_module+0x70/0x280
> > [ 50.295392] [<000000006c05002a>] load_module+0x1aba/0x1d10
> > [ 50.295394] [<000000006c0504c4>] __do_sys_finit_module+0xa4/0xe8
> > [ 50.295416] [<000000006c6b5742>] system_call+0x2aa/0x2c8
> > [ 50.295416] Last Breaking-Event-Address:
> > [ 50.295418] [<000000006c6b6aa0>] __s390_indirect_jump_r4+0x0/0xc
> > [ 50.295421] Kernel panic - not syncing: Fatal exception: panic_on_oops
> >
>
> Hi Josh,
>
> this is strange. While I would have expected an exception similar to
> this, it really should have happened on the "sturg" instruction which
> does the DAT-off store in s390_kernel_write(), and certainly not with
> an ID of 0004 (protection). However, in your case, it happens on a
> normal store instruction, with 0004 indicating a protection exception.
>
> This is more like what I would expect e.g. in the case where you do
> _not_ use the s390_kernel_write() function for RO module text patching,
> but rather normal memory access. So I am pretty sure that this is not
> related to the s390_kernel_write(), but some other issue, maybe some
> place left where you still use normal memory access?
The call trace above also suggests that it is not a late relocation, no?
The path is from KLP module init function through klp_enable_patch. It should
mean that the to-be-patched object is loaded (it must be a module thanks
to a check klp_init_object_loaded(), vmlinux relocations were processed
earlier in apply_relocations()).
However, the KLP module state here must be COMING, so s390_kernel_write()
should be used. What are we missing?
Joe, could you debug this a bit, please?
> There is also some good news. While thinking about how to use "sturg"
> for vmalloc addresses, I came up with the idea to use "lra" (load
> real address) before that. Then I found out that we already do exactly
> that in the inline assembly, so all should be fine. Well, maybe the
> comment for s390_kernel_write() could be improved...
>
> Vasily also found out that we apparently already use s390_kernel_write()
> for module text, for alternatives, so I guess we can safely assume that
> it should work fine in principle.
Phew, okay. I noticed that s390_kernel_write() is called on a couple of
places already (kprobes) and I wondered where the trick was.
Thanks
Miroslav
On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
> On Thu, Apr 23, 2020 at 09:12:28AM -0500, Josh Poimboeuf wrote:
> > > > this is strange. While I would have expected an exception similar to
> > > > this, it really should have happened on the "sturg" instruction which
> > > > does the DAT-off store in s390_kernel_write(), and certainly not with
> > > > an ID of 0004 (protection). However, in your case, it happens on a
> > > > normal store instruction, with 0004 indicating a protection exception.
> > > >
> > > > This is more like what I would expect e.g. in the case where you do
> > > > _not_ use the s390_kernel_write() function for RO module text patching,
> > > > but rather normal memory access. So I am pretty sure that this is not
> > > > related to the s390_kernel_write(), but some other issue, maybe some
> > > > place left where you still use normal memory access?
> > >
> > > The call trace above also suggests that it is not a late relocation, no?
> > > The path is from KLP module init function through klp_enable_patch. It should
> > > mean that the to-be-patched object is loaded (it must be a module thanks
> > > to a check klp_init_object_loaded(), vmlinux relocations were processed
> > > earlier in apply_relocations()).
> > >
> > > However, the KLP module state here must be COMING, so s390_kernel_write()
> > > should be used. What are we missing?
> >
> > I'm also scratching my head. It _should_ be using s390_kernel_write()
> > based on the module state, but I don't see that on the stack trace.
> >
> > This trace (and Gerald's comment) seem to imply it's using
> > __builtin_memcpy(), which might expected for UNFORMED state.
> >
> > Weird...
>
> Mystery solved:
>
> $ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
> apply_rela+0x16a/0x520:
> apply_rela at arch/s390/kernel/module.c:336
>
> which corresponds to the following code in apply_rela():
>
>
> case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> if (info->plt_initialized == 0) {
> unsigned int *ip;
> ip = me->core_layout.base + me->arch.plt_offset +
> info->plt_offset;
> ip[0] = 0x0d10e310; /* basr 1,0 */
> ip[1] = 0x100a0004; /* lg 1,10(1) */
>
>
> Notice how it's writing directly to text... oops.
Here's a fix, using write() for the PLT and the GOT.
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 2798329ebb74..fe446f42818f 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -297,7 +297,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
gotent = me->core_layout.base + me->arch.got_offset +
info->got_offset;
- *gotent = val;
+ write(gotent, &val, sizeof(*gotent));
info->got_initialized = 1;
}
val = info->got_offset + rela->r_addend;
@@ -330,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
if (info->plt_initialized == 0) {
- unsigned int *ip;
+ unsigned int *ip, insn[5];
+
ip = me->core_layout.base + me->arch.plt_offset +
info->plt_offset;
- ip[0] = 0x0d10e310; /* basr 1,0 */
- ip[1] = 0x100a0004; /* lg 1,10(1) */
+
+ insn[0] = 0x0d10e310; /* basr 1,0 */
+ insn[1] = 0x100a0004; /* lg 1,10(1) */
if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
unsigned int *ij;
ij = me->core_layout.base +
me->arch.plt_offset +
me->arch.plt_size - PLT_ENTRY_SIZE;
- ip[2] = 0xa7f40000 + /* j __jump_r1 */
+ insn[2] = 0xa7f40000 + /* j __jump_r1 */
(unsigned int)(u16)
(((unsigned long) ij - 8 -
(unsigned long) ip) / 2);
} else {
- ip[2] = 0x07f10000; /* br %r1 */
+ insn[2] = 0x07f10000; /* br %r1 */
}
- ip[3] = (unsigned int) (val >> 32);
- ip[4] = (unsigned int) val;
+ insn[3] = (unsigned int) (val >> 32);
+ insn[4] = (unsigned int) val;
+
+ write(ip, insn, sizeof(insn));
info->plt_initialized = 1;
}
if (r_type == R_390_PLTOFF16 ||
On Thu, Apr 23, 2020 at 06:26:57PM -0500, Josh Poimboeuf wrote:
> On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
> > Mystery solved:
> >
> > $ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
> > apply_rela+0x16a/0x520:
> > apply_rela at arch/s390/kernel/module.c:336
> >
> > which corresponds to the following code in apply_rela():
> >
> >
> > case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> > if (info->plt_initialized == 0) {
> > unsigned int *ip;
> > ip = me->core_layout.base + me->arch.plt_offset +
> > info->plt_offset;
> > ip[0] = 0x0d10e310; /* basr 1,0 */
> > ip[1] = 0x100a0004; /* lg 1,10(1) */
> >
> >
> > Notice how it's writing directly to text... oops.
>
> Here's a fix, using write() for the PLT and the GOT.
>
> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
> index 2798329ebb74..fe446f42818f 100644
> --- a/arch/s390/kernel/module.c
> +++ b/arch/s390/kernel/module.c
> @@ -297,7 +297,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
>
> gotent = me->core_layout.base + me->arch.got_offset +
> info->got_offset;
> - *gotent = val;
> + write(gotent, &val, sizeof(*gotent));
> info->got_initialized = 1;
> }
> val = info->got_offset + rela->r_addend;
> @@ -330,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
> case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
> case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> if (info->plt_initialized == 0) {
> - unsigned int *ip;
> + unsigned int *ip, insn[5];
> +
> ip = me->core_layout.base + me->arch.plt_offset +
> info->plt_offset;
Would it be too paranoid to declare:
const unsigned int *ip = me->core_layout.base +
me->arch.plt_offset +
info->plt_offset;
That would trip an assignment to read-only error if someone were tempted
to write directly through the pointer in the future. Ditto for Elf_Addr
*gotent pointer in the R_390_GOTPLTENT case.
-- Joe
On Thu, Apr 23, 2020 at 10:35:21PM -0400, Joe Lawrence wrote:
> > diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
> > index 2798329ebb74..fe446f42818f 100644
> > --- a/arch/s390/kernel/module.c
> > +++ b/arch/s390/kernel/module.c
> > @@ -297,7 +297,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
> >
> > gotent = me->core_layout.base + me->arch.got_offset +
> > info->got_offset;
> > - *gotent = val;
> > + write(gotent, &val, sizeof(*gotent));
> > info->got_initialized = 1;
> > }
> > val = info->got_offset + rela->r_addend;
> > @@ -330,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
> > case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
> > case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> > if (info->plt_initialized == 0) {
> > - unsigned int *ip;
> > + unsigned int *ip, insn[5];
> > +
> > ip = me->core_layout.base + me->arch.plt_offset +
> > info->plt_offset;
>
> Would it be too paranoid to declare:
>
> const unsigned int *ip = me->core_layout.base +
> me->arch.plt_offset +
> info->plt_offset;
>
> That would trip an assignment to read-only error if someone were tempted
> to write directly through the pointer in the future. Ditto for Elf_Addr
> *gotent pointer in the R_390_GOTPLTENT case.
The only problem is then the write() triggers a warning because then we
*are* trying to write through the pointer :-)
arch/s390/kernel/module.c:300:10: warning: passing argument 1 of ‘write’ discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
300 | write(gotent, &val, sizeof(*gotent));
--
Josh
On 24.04.20 01:26, Josh Poimboeuf wrote:
> On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
>> On Thu, Apr 23, 2020 at 09:12:28AM -0500, Josh Poimboeuf wrote:
>>>>> this is strange. While I would have expected an exception similar to
>>>>> this, it really should have happened on the "sturg" instruction which
>>>>> does the DAT-off store in s390_kernel_write(), and certainly not with
>>>>> an ID of 0004 (protection). However, in your case, it happens on a
>>>>> normal store instruction, with 0004 indicating a protection exception.
>>>>>
>>>>> This is more like what I would expect e.g. in the case where you do
>>>>> _not_ use the s390_kernel_write() function for RO module text patching,
>>>>> but rather normal memory access. So I am pretty sure that this is not
>>>>> related to the s390_kernel_write(), but some other issue, maybe some
>>>>> place left where you still use normal memory access?
>>>>
>>>> The call trace above also suggests that it is not a late relocation, no?
>>>> The path is from KLP module init function through klp_enable_patch. It should
>>>> mean that the to-be-patched object is loaded (it must be a module thanks
>>>> to a check klp_init_object_loaded(), vmlinux relocations were processed
>>>> earlier in apply_relocations()).
>>>>
>>>> However, the KLP module state here must be COMING, so s390_kernel_write()
>>>> should be used. What are we missing?
>>>
>>> I'm also scratching my head. It _should_ be using s390_kernel_write()
>>> based on the module state, but I don't see that on the stack trace.
>>>
>>> This trace (and Gerald's comment) seem to imply it's using
>>> __builtin_memcpy(), which might expected for UNFORMED state.
>>>
>>> Weird...
>>
>> Mystery solved:
>>
>> $ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
>> apply_rela+0x16a/0x520:
>> apply_rela at arch/s390/kernel/module.c:336
>>
>> which corresponds to the following code in apply_rela():
>>
>>
>> case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
>> if (info->plt_initialized == 0) {
>> unsigned int *ip;
>> ip = me->core_layout.base + me->arch.plt_offset +
>> info->plt_offset;
>> ip[0] = 0x0d10e310; /* basr 1,0 */
>> ip[1] = 0x100a0004; /* lg 1,10(1) */
>>
>>
>> Notice how it's writing directly to text... oops.
>
> Here's a fix, using write() for the PLT and the GOT.
Are you going to provide a proper patch?
>
> diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
> index 2798329ebb74..fe446f42818f 100644
> --- a/arch/s390/kernel/module.c
> +++ b/arch/s390/kernel/module.c
> @@ -297,7 +297,7 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
>
> gotent = me->core_layout.base + me->arch.got_offset +
> info->got_offset;
> - *gotent = val;
> + write(gotent, &val, sizeof(*gotent));
> info->got_initialized = 1;
> }
> val = info->got_offset + rela->r_addend;
> @@ -330,25 +330,29 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
> case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */
> case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> if (info->plt_initialized == 0) {
> - unsigned int *ip;
> + unsigned int *ip, insn[5];
> +
> ip = me->core_layout.base + me->arch.plt_offset +
> info->plt_offset;
> - ip[0] = 0x0d10e310; /* basr 1,0 */
> - ip[1] = 0x100a0004; /* lg 1,10(1) */
> +
> + insn[0] = 0x0d10e310; /* basr 1,0 */
> + insn[1] = 0x100a0004; /* lg 1,10(1) */
> if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
> unsigned int *ij;
> ij = me->core_layout.base +
> me->arch.plt_offset +
> me->arch.plt_size - PLT_ENTRY_SIZE;
> - ip[2] = 0xa7f40000 + /* j __jump_r1 */
> + insn[2] = 0xa7f40000 + /* j __jump_r1 */
> (unsigned int)(u16)
> (((unsigned long) ij - 8 -
> (unsigned long) ip) / 2);
> } else {
> - ip[2] = 0x07f10000; /* br %r1 */
> + insn[2] = 0x07f10000; /* br %r1 */
> }
> - ip[3] = (unsigned int) (val >> 32);
> - ip[4] = (unsigned int) val;
> + insn[3] = (unsigned int) (val >> 32);
> + insn[4] = (unsigned int) val;
> +
> + write(ip, insn, sizeof(insn));
> info->plt_initialized = 1;
> }
> if (r_type == R_390_PLTOFF16 ||
>
On Fri, Apr 24, 2020 at 09:20:24AM +0200, Christian Borntraeger wrote:
> > Here's a fix, using write() for the PLT and the GOT.
>
> Are you going to provide a proper patch?
Yes. These incremental patches are intended to be part of the
discussion, feel free to ignore them.
--
Josh
On Wed, Apr 22, 2020 at 02:28:26PM +0200, Miroslav Benes wrote:
> > +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
> > + unsigned int symindex, unsigned int relsec,
> > + struct module *me)
> > +{
> > + int ret;
> > + bool early = me->state == MODULE_STATE_UNFORMED;
> > + void *(*write)(void *, const void *, size_t) = memcpy;
> > +
> > + if (!early) {
> > + write = s390_kernel_write;
> > + mutex_lock(&text_mutex);
> > + }
> > +
> > + ret = __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
> > + write);
> > +
> > + if (!early)
> > + mutex_unlock(&text_mutex);
> > +
> > + return ret;
> > +}
>
> It means that you can take text_mutex the second time here because it
> is still taken in klp_init_object_loaded(). It is removed later in the
> series though. The same applies for x86 patch.
>
> Also, s390_kernel_write() grabs s390_kernel_write_lock spinlock before
> writing anything, so maybe text_mutex is not really needed as long as
> s390_kernel_write is called everywhere for text patching.
Good catch, maybe I'll just drop the text_mutex here.
--
Josh
On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
> On Thu, Apr 23, 2020 at 09:12:28AM -0500, Josh Poimboeuf wrote:
> > > > this is strange. While I would have expected an exception similar to
> > > > this, it really should have happened on the "sturg" instruction which
> > > > does the DAT-off store in s390_kernel_write(), and certainly not with
> > > > an ID of 0004 (protection). However, in your case, it happens on a
> > > > normal store instruction, with 0004 indicating a protection exception.
> > > >
> > > > This is more like what I would expect e.g. in the case where you do
> > > > _not_ use the s390_kernel_write() function for RO module text patching,
> > > > but rather normal memory access. So I am pretty sure that this is not
> > > > related to the s390_kernel_write(), but some other issue, maybe some
> > > > place left where you still use normal memory access?
> > >
> > > The call trace above also suggests that it is not a late relocation, no?
> > > The path is from KLP module init function through klp_enable_patch. It should
> > > mean that the to-be-patched object is loaded (it must be a module thanks
> > > to a check klp_init_object_loaded(), vmlinux relocations were processed
> > > earlier in apply_relocations()).
> > >
> > > However, the KLP module state here must be COMING, so s390_kernel_write()
> > > should be used. What are we missing?
> >
> > I'm also scratching my head. It _should_ be using s390_kernel_write()
> > based on the module state, but I don't see that on the stack trace.
> >
> > This trace (and Gerald's comment) seem to imply it's using
> > __builtin_memcpy(), which might expected for UNFORMED state.
> >
> > Weird...
>
> Mystery solved:
>
> $ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
> apply_rela+0x16a/0x520:
> apply_rela at arch/s390/kernel/module.c:336
>
> which corresponds to the following code in apply_rela():
>
>
> case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> if (info->plt_initialized == 0) {
> unsigned int *ip;
> ip = me->core_layout.base + me->arch.plt_offset +
> info->plt_offset;
> ip[0] = 0x0d10e310; /* basr 1,0 */
> ip[1] = 0x100a0004; /* lg 1,10(1) */
>
>
> Notice how it's writing directly to text... oops.
>
This is more of note for the future, but when/if we add livepatch
support on arm64 we'll need to make the very same adjustment there as
well. See the following pattern:
arch/arm64/kernel/module.c:
reloc_insn_movw()
reloc_insn_imm()
reloc_insn_adrp()
*place = cpu_to_le32(insn);
maybe something like aarch64_insn_patch_text_nosync() could be used
there, I dunno. (It looks like ftrace and jump_labels are using that
interface.)
This is outside the scope of the patchset, but I thought I'd mention it
as I was curious to see how other arches were currently handling their
relocation updates.
-- Joe
On Thu, Apr 30, 2020 at 10:38:21AM -0400, Joe Lawrence wrote:
> On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
> > On Thu, Apr 23, 2020 at 09:12:28AM -0500, Josh Poimboeuf wrote:
> > > > > this is strange. While I would have expected an exception similar to
> > > > > this, it really should have happened on the "sturg" instruction which
> > > > > does the DAT-off store in s390_kernel_write(), and certainly not with
> > > > > an ID of 0004 (protection). However, in your case, it happens on a
> > > > > normal store instruction, with 0004 indicating a protection exception.
> > > > >
> > > > > This is more like what I would expect e.g. in the case where you do
> > > > > _not_ use the s390_kernel_write() function for RO module text patching,
> > > > > but rather normal memory access. So I am pretty sure that this is not
> > > > > related to the s390_kernel_write(), but some other issue, maybe some
> > > > > place left where you still use normal memory access?
> > > >
> > > > The call trace above also suggests that it is not a late relocation, no?
> > > > The path is from KLP module init function through klp_enable_patch. It should
> > > > mean that the to-be-patched object is loaded (it must be a module thanks
> > > > to a check klp_init_object_loaded(), vmlinux relocations were processed
> > > > earlier in apply_relocations()).
> > > >
> > > > However, the KLP module state here must be COMING, so s390_kernel_write()
> > > > should be used. What are we missing?
> > >
> > > I'm also scratching my head. It _should_ be using s390_kernel_write()
> > > based on the module state, but I don't see that on the stack trace.
> > >
> > > This trace (and Gerald's comment) seem to imply it's using
> > > __builtin_memcpy(), which might expected for UNFORMED state.
> > >
> > > Weird...
> >
> > Mystery solved:
> >
> > $ CROSS_COMPILE=s390x-linux-gnu- scripts/faddr2line vmlinux apply_rela+0x16a/0x520
> > apply_rela+0x16a/0x520:
> > apply_rela at arch/s390/kernel/module.c:336
> >
> > which corresponds to the following code in apply_rela():
> >
> >
> > case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */
> > if (info->plt_initialized == 0) {
> > unsigned int *ip;
> > ip = me->core_layout.base + me->arch.plt_offset +
> > info->plt_offset;
> > ip[0] = 0x0d10e310; /* basr 1,0 */
> > ip[1] = 0x100a0004; /* lg 1,10(1) */
> >
> >
> > Notice how it's writing directly to text... oops.
> >
>
> This is more of note for the future, but when/if we add livepatch
> support on arm64 we'll need to make the very same adjustment there as
> well. See the following pattern:
>
> arch/arm64/kernel/module.c:
>
> reloc_insn_movw()
> reloc_insn_imm()
> reloc_insn_adrp()
>
> *place = cpu_to_le32(insn);
>
> maybe something like aarch64_insn_patch_text_nosync() could be used
> there, I dunno. (It looks like ftrace and jump_labels are using that
> interface.)
>
> This is outside the scope of the patchset, but I thought I'd mention it
> as I was curious to see how other arches were currently handling their
> relocation updates.
True... I suspect your klp-convert selftests will catch that?
--
Josh
On 4/30/20 12:48 PM, Josh Poimboeuf wrote:
> On Thu, Apr 30, 2020 at 10:38:21AM -0400, Joe Lawrence wrote:
>> On Thu, Apr 23, 2020 at 01:10:30PM -0500, Josh Poimboeuf wrote:
>> This is more of note for the future, but when/if we add livepatch
>> support on arm64 we'll need to make the very same adjustment there as
>> well. See the following pattern:
>>
>> arch/arm64/kernel/module.c:
>>
>> reloc_insn_movw()
>> reloc_insn_imm()
>> reloc_insn_adrp()
>>
>> *place = cpu_to_le32(insn);
>>
>> maybe something like aarch64_insn_patch_text_nosync() could be used
>> there, I dunno. (It looks like ftrace and jump_labels are using that
>> interface.)
>>
>> This is outside the scope of the patchset, but I thought I'd mention it
>> as I was curious to see how other arches were currently handling their
>> relocation updates.
>
> True... I suspect your klp-convert selftests will catch that?
>
Indeed. Actually I had hacked enough livepatch code support on ARM to
see what happened when converting and loading the test patches :)
-- Joe