2021-12-10 06:09:24

by Dmitry Torokhov

[permalink] [raw]
Subject: [PATCH v3] module: add in-kernel support for decompressing

Current scheme of having userspace decompress kernel modules before
loading them into the kernel runs afoul of LoadPin security policy, as
it loses link between the source of kernel module on the disk and binary
blob that is being loaded into the kernel. To solve this issue let's
implement decompression in kernel, so that we can pass a file descriptor
of compressed module file into finit_module() which will keep LoadPin
happy.

To let userspace know what compression/decompression scheme kernel
supports it will create /sys/module/compression attribute. kmod can read
this attribute and decide if it can pass compressed file to
finit_module(). New MODULE_INIT_COMPRESSED_DATA flag indicates that the
kernel should attempt to decompress the data read from file descriptor
prior to trying load the module.

To simplify things kernel will only implement single decompression
method matching compression method selected when generating modules.
This patch implements gzip and xz; more can be added later,

Signed-off-by: Dmitry Torokhov <[email protected]>
---

v3:
- fix missing Kconfig dependency

v2:
- changed flag MODULE_INIT_COMPRESSED_DATA ->
MODULE_INIT_COMPRESSED_FLAG per Kees' request

include/uapi/linux/module.h | 1 +
init/Kconfig | 13 ++
kernel/Makefile | 1 +
kernel/module-internal.h | 18 +++
kernel/module.c | 35 +++--
kernel/module_decompress.c | 271 ++++++++++++++++++++++++++++++++++++
6 files changed, 328 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h
index 50d98ec5e866..03a33ffffcba 100644
--- a/include/uapi/linux/module.h
+++ b/include/uapi/linux/module.h
@@ -5,5 +5,6 @@
/* Flags for sys_finit_module: */
#define MODULE_INIT_IGNORE_MODVERSIONS 1
#define MODULE_INIT_IGNORE_VERMAGIC 2
+#define MODULE_INIT_COMPRESSED_FILE 4

#endif /* _UAPI_LINUX_MODULE_H */
diff --git a/init/Kconfig b/init/Kconfig
index cd23faa163d1..d90774ff7610 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD

endchoice

+config MODULE_DECOMPRESS
+ bool "Support in-kernel module decompression"
+ depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
+ select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
+ select XZ_DEC if MODULE_COMPRESS_XZ
+ help
+
+ Support for decompressing kernel modules by the kernel itself
+ instead of relying on userspace to perform this task. Useful when
+ load pinning security policy is enabled.
+
+ If unsure, say N.
+
config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
bool "Allow loading of modules with missing namespace imports"
help
diff --git a/kernel/Makefile b/kernel/Makefile
index 186c49582f45..56f4ee97f328 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -67,6 +67,7 @@ obj-y += up.o
endif
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
+obj-$(CONFIG_MODULE_DECOMPRESS) += module_decompress.o
obj-$(CONFIG_MODULE_SIG) += module_signing.o
obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/module-internal.h b/kernel/module-internal.h
index 33783abc377b..3c1143d2c8c7 100644
--- a/kernel/module-internal.h
+++ b/kernel/module-internal.h
@@ -22,6 +22,11 @@ struct load_info {
bool sig_ok;
#ifdef CONFIG_KALLSYMS
unsigned long mod_kallsyms_init_off;
+#endif
+#ifdef CONFIG_MODULE_DECOMPRESS
+ struct page **pages;
+ unsigned int max_pages;
+ unsigned int used_pages;
#endif
struct {
unsigned int sym, str, mod, vers, info, pcpu;
@@ -29,3 +34,16 @@ struct load_info {
};

extern int mod_verify_sig(const void *mod, struct load_info *info);
+
+#ifdef CONFIG_MODULE_DECOMPRESS
+int module_decompress(struct load_info *info, const void *buf, size_t size);
+void module_decompress_cleanup(struct load_info *info);
+#else
+int module_decompress(struct load_info *info, const void *buf, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+void module_decompress_cleanup(struct load_info *info)
+{
+}
+#endif
diff --git a/kernel/module.c b/kernel/module.c
index 84a9141a5e15..a7c5490b41e9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3174,9 +3174,12 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
return err;
}

-static void free_copy(struct load_info *info)
+static void free_copy(struct load_info *info, int flags)
{
- vfree(info->hdr);
+ if (flags & MODULE_INIT_COMPRESSED_FILE)
+ module_decompress_cleanup(info);
+ else
+ vfree(info->hdr);
}

static int rewrite_section_headers(struct load_info *info, int flags)
@@ -4125,7 +4128,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
}

/* Get rid of temporary copy. */
- free_copy(info);
+ free_copy(info, flags);

/* Done! */
trace_module_load(mod);
@@ -4174,7 +4177,7 @@ static int load_module(struct load_info *info, const char __user *uargs,

module_deallocate(mod, info);
free_copy:
- free_copy(info);
+ free_copy(info, flags);
return err;
}

@@ -4201,7 +4204,8 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
{
struct load_info info = { };
- void *hdr = NULL;
+ void *buf = NULL;
+ int len;
int err;

err = may_init_module();
@@ -4211,15 +4215,24 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);

if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
- |MODULE_INIT_IGNORE_VERMAGIC))
+ |MODULE_INIT_IGNORE_VERMAGIC
+ |MODULE_INIT_COMPRESSED_FILE))
return -EINVAL;

- err = kernel_read_file_from_fd(fd, 0, &hdr, INT_MAX, NULL,
+ len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL,
READING_MODULE);
- if (err < 0)
- return err;
- info.hdr = hdr;
- info.len = err;
+ if (len < 0)
+ return len;
+
+ if (flags & MODULE_INIT_COMPRESSED_FILE) {
+ err = module_decompress(&info, buf, len);
+ vfree(buf); /* compressed data is no longer needed */
+ if (err)
+ return err;
+ } else {
+ info.hdr = buf;
+ info.len = len;
+ }

return load_module(&info, uargs, flags);
}
diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c
new file mode 100644
index 000000000000..590ca00aa098
--- /dev/null
+++ b/kernel/module_decompress.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/kobject.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/vmalloc.h>
+
+#include "module-internal.h"
+
+static int module_extend_max_pages(struct load_info *info, unsigned int extent)
+{
+ struct page **new_pages;
+
+ new_pages = kvmalloc_array(info->max_pages + extent,
+ sizeof(info->pages), GFP_KERNEL);
+ if (!new_pages)
+ return -ENOMEM;
+
+ memcpy(new_pages, info->pages, info->max_pages * sizeof(info->pages));
+ kvfree(info->pages);
+ info->pages = new_pages;
+ info->max_pages += extent;
+
+ return 0;
+}
+
+static struct page *module_get_next_page(struct load_info *info)
+{
+ struct page *page;
+ int error;
+
+ if (info->max_pages == info->used_pages) {
+ error = module_extend_max_pages(info, info->used_pages);
+ if (error)
+ return ERR_PTR(error);
+ }
+
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
+
+ info->pages[info->used_pages++] = page;
+ return page;
+}
+
+#ifdef CONFIG_MODULE_COMPRESS_GZIP
+#include <linux/zlib.h>
+#define MODULE_COMPRESSION gzip
+#define MODULE_DECOMPRESS_FN module_gzip_decompress
+
+/*
+ * Calculate length of the header which consists of signature, header
+ * flags, time stamp and operating system ID (10 bytes total), plus
+ * an optional filename.
+ */
+static size_t module_gzip_header_len(const u8 *buf, size_t size)
+{
+ const u8 signature[] = { 0x1f, 0x8b, 0x08 };
+ size_t len = 10;
+
+ if (size < len || memcmp(buf, signature, sizeof(signature)))
+ return 0;
+
+ if (buf[3] & 0x08) {
+ do {
+ /*
+ * If we can't find the end of the file name we must
+ * be dealing with a corrupted file.
+ */
+ if (len == size)
+ return 0;
+ } while (buf[len++] != '\0');
+ }
+
+ return len;
+}
+
+static ssize_t module_gzip_decompress(struct load_info *info,
+ const void *buf, size_t size)
+{
+ struct z_stream_s s = { 0 };
+ size_t new_size = 0;
+ size_t gzip_hdr_len;
+ ssize_t retval;
+ int rc;
+
+ gzip_hdr_len = module_gzip_header_len(buf, size);
+ if (!gzip_hdr_len) {
+ pr_err("not a gzip compressed module\n");
+ return -EINVAL;
+ }
+
+ s.next_in = buf + gzip_hdr_len;
+ s.avail_in = size - gzip_hdr_len;
+
+ s.workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
+ if (!s.workspace)
+ return -ENOMEM;
+
+ rc = zlib_inflateInit2(&s, -MAX_WBITS);
+ if (rc != Z_OK) {
+ pr_err("failed to initialize decompresser: %d\n", rc);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ do {
+ struct page *page = module_get_next_page(info);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out_inflate_end;
+ }
+
+ s.next_out = kmap(page);
+ s.avail_out = PAGE_SIZE;
+ rc = zlib_inflate(&s, 0);
+ kunmap(page);
+
+ new_size += PAGE_SIZE - s.avail_out;
+ } while (rc == Z_OK);
+
+ if (rc != Z_STREAM_END) {
+ pr_err("decompression failed with status %d\n", rc);
+ retval = -EINVAL;
+ goto out_inflate_end;
+ }
+
+ retval = new_size;
+
+out_inflate_end:
+ zlib_inflateEnd(&s);
+out:
+ kfree(s.workspace);
+ return retval;
+}
+#elif CONFIG_MODULE_COMPRESS_XZ
+#include <linux/xz.h>
+#define MODULE_COMPRESSION xz
+#define MODULE_DECOMPRESS_FN module_xz_decompress
+
+static ssize_t module_xz_decompress(struct load_info *info,
+ const void *buf, size_t size)
+{
+ static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
+ struct xz_dec *xz_dec;
+ struct xz_buf xz_buf;
+ enum xz_ret xz_ret;
+ size_t new_size = 0;
+ ssize_t retval;
+
+ if (size < sizeof(signature) ||
+ memcmp(buf, signature, sizeof(signature))) {
+ pr_err("not an xz compressed module\n");
+ return -EINVAL;
+ }
+
+ xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
+ if (!xz_dec)
+ return -ENOMEM;
+
+ xz_buf.in_size = size;
+ xz_buf.in = buf;
+ xz_buf.in_pos = 0;
+
+ do {
+ struct page *page = module_get_next_page(info);
+ if (!page) {
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ xz_buf.out = kmap(page);
+ xz_buf.out_pos = 0;
+ xz_buf.out_size = PAGE_SIZE;
+ xz_ret = xz_dec_run(xz_dec, &xz_buf);
+ kunmap(page);
+
+ new_size += xz_buf.out_pos;
+ } while (xz_buf.out_pos == PAGE_SIZE && xz_ret == XZ_OK);
+
+ if (xz_ret != XZ_STREAM_END) {
+ pr_err("decompression failed with status %d\n", xz_ret);
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = new_size;
+
+ out:
+ xz_dec_end(xz_dec);
+ return retval;
+}
+#else
+#error "Unexpected configuration for CONFIG_MODULE_DECOMPRESS"
+#endif
+
+int module_decompress(struct load_info *info, const void *buf, size_t size)
+{
+ unsigned int n_pages;
+ ssize_t data_size;
+ int error;
+
+ /*
+ * Start with number of pages twice as big as needed for
+ * compressed data.
+ */
+ n_pages = DIV_ROUND_UP(size, PAGE_SIZE) * 2;
+ error = module_extend_max_pages(info, n_pages);
+
+ data_size = MODULE_DECOMPRESS_FN(info, buf, size);
+ if (data_size < 0) {
+ error = data_size;
+ goto err;
+ }
+
+ info->hdr = vmap(info->pages, info->used_pages, VM_MAP, PAGE_KERNEL);
+ if (!info->hdr) {
+ error = -ENOMEM;
+ goto err;
+ }
+
+ info->len = data_size;
+ return 0;
+
+err:
+ module_decompress_cleanup(info);
+ return error;
+}
+
+void module_decompress_cleanup(struct load_info *info)
+{
+ int i;
+
+ if (info->hdr)
+ vunmap(info->hdr);
+
+ for (i = 0; i < info->used_pages; i++)
+ __free_page(info->pages[i]);
+
+ kvfree(info->pages);
+
+ info->pages = NULL;
+ info->max_pages = info->used_pages = 0;
+}
+
+static ssize_t compression_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", __stringify(MODULE_COMPRESSION));
+}
+static struct kobj_attribute module_compression_attr = __ATTR_RO(compression);
+
+static int __init module_decompress_sysfs_init(void)
+{
+ int error;
+
+ error = sysfs_create_file(&module_kset->kobj,
+ &module_compression_attr.attr);
+ if (error)
+ pr_warn("Failed to create 'compression' attribute");
+
+ return 0;
+}
+module_init(module_decompress_sysfs_init);
--
2.34.1.173.g76aa8bc2d0-goog


--
Dmitry


2021-12-10 22:05:21

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> +static ssize_t module_xz_decompress(struct load_info *info,
> + const void *buf, size_t size)
> +{
> + static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
> + struct xz_dec *xz_dec;
> + struct xz_buf xz_buf;
> + enum xz_ret xz_ret;
> + size_t new_size = 0;
> + ssize_t retval;
> +
> + if (size < sizeof(signature) ||
> + memcmp(buf, signature, sizeof(signature))) {
> + pr_err("not an xz compressed module\n");
> + return -EINVAL;
> + }
> +
> + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
> + if (!xz_dec)
> + return -ENOMEM;
> +
> + xz_buf.in_size = size;
> + xz_buf.in = buf;
> + xz_buf.in_pos = 0;
> +
> + do {
> + struct page *page = module_get_next_page(info);
> + if (!page) {
> + retval = -ENOMEM;
> + goto out;
> + }

This looks very similar to fw_decompress_xz_pages() on
drivers/base/firmware_loader/main.c

Is there any sharing possible with decompression between the two?

Luis

2021-12-10 23:21:14

by Dmitry Torokhov

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Fri, Dec 10, 2021 at 02:05:17PM -0800, Luis Chamberlain wrote:
> On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > +static ssize_t module_xz_decompress(struct load_info *info,
> > + const void *buf, size_t size)
> > +{
> > + static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
> > + struct xz_dec *xz_dec;
> > + struct xz_buf xz_buf;
> > + enum xz_ret xz_ret;
> > + size_t new_size = 0;
> > + ssize_t retval;
> > +
> > + if (size < sizeof(signature) ||
> > + memcmp(buf, signature, sizeof(signature))) {
> > + pr_err("not an xz compressed module\n");
> > + return -EINVAL;
> > + }
> > +
> > + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
> > + if (!xz_dec)
> > + return -ENOMEM;
> > +
> > + xz_buf.in_size = size;
> > + xz_buf.in = buf;
> > + xz_buf.in_pos = 0;
> > +
> > + do {
> > + struct page *page = module_get_next_page(info);
> > + if (!page) {
> > + retval = -ENOMEM;
> > + goto out;
> > + }
>
> This looks very similar to fw_decompress_xz_pages() on
> drivers/base/firmware_loader/main.c
>
> Is there any sharing possible with decompression between the two?

Ideally we'd get proper crypto API allowing this (page by page)
decompression and then we can switch to it in both places and stop
calling into particular algorithms directly. Unfortunately this is not
something I can sign up for at this time.

As it is I am not sure if it worth to try to split out XZ code and share
it between module and firmware loader.

Thanks.

--
Dmitry

2021-12-10 23:35:45

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Fri, Dec 10, 2021 at 03:21:03PM -0800, Dmitry Torokhov wrote:
> On Fri, Dec 10, 2021 at 02:05:17PM -0800, Luis Chamberlain wrote:
> > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > +static ssize_t module_xz_decompress(struct load_info *info,
> > > + const void *buf, size_t size)
> > > +{
> > > + static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
> > > + struct xz_dec *xz_dec;
> > > + struct xz_buf xz_buf;
> > > + enum xz_ret xz_ret;
> > > + size_t new_size = 0;
> > > + ssize_t retval;
> > > +
> > > + if (size < sizeof(signature) ||
> > > + memcmp(buf, signature, sizeof(signature))) {
> > > + pr_err("not an xz compressed module\n");
> > > + return -EINVAL;
> > > + }
> > > +
> > > + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
> > > + if (!xz_dec)
> > > + return -ENOMEM;
> > > +
> > > + xz_buf.in_size = size;
> > > + xz_buf.in = buf;
> > > + xz_buf.in_pos = 0;
> > > +
> > > + do {
> > > + struct page *page = module_get_next_page(info);
> > > + if (!page) {
> > > + retval = -ENOMEM;
> > > + goto out;
> > > + }
> >
> > This looks very similar to fw_decompress_xz_pages() on
> > drivers/base/firmware_loader/main.c
> >
> > Is there any sharing possible with decompression between the two?
>
> Ideally we'd get proper crypto API allowing this (page by page)
> decompression and then we can switch to it in both places and stop
> calling into particular algorithms directly. Unfortunately this is not
> something I can sign up for at this time.
>
> As it is I am not sure if it worth to try to split out XZ code and share
> it between module and firmware loader.

Got it, makes sense.

What type of tests have you implemented for this ? kmod has quite a bit
of tests. Can you add some for this?

Luis

2021-12-10 23:47:30

by Dmitry Torokhov

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Fri, Dec 10, 2021 at 03:35:30PM -0800, Luis Chamberlain wrote:
> On Fri, Dec 10, 2021 at 03:21:03PM -0800, Dmitry Torokhov wrote:
> > On Fri, Dec 10, 2021 at 02:05:17PM -0800, Luis Chamberlain wrote:
> > > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > > +static ssize_t module_xz_decompress(struct load_info *info,
> > > > + const void *buf, size_t size)
> > > > +{
> > > > + static const u8 signature[] = { 0xfd, '7', 'z', 'X', 'Z', 0 };
> > > > + struct xz_dec *xz_dec;
> > > > + struct xz_buf xz_buf;
> > > > + enum xz_ret xz_ret;
> > > > + size_t new_size = 0;
> > > > + ssize_t retval;
> > > > +
> > > > + if (size < sizeof(signature) ||
> > > > + memcmp(buf, signature, sizeof(signature))) {
> > > > + pr_err("not an xz compressed module\n");
> > > > + return -EINVAL;
> > > > + }
> > > > +
> > > > + xz_dec = xz_dec_init(XZ_DYNALLOC, (u32)-1);
> > > > + if (!xz_dec)
> > > > + return -ENOMEM;
> > > > +
> > > > + xz_buf.in_size = size;
> > > > + xz_buf.in = buf;
> > > > + xz_buf.in_pos = 0;
> > > > +
> > > > + do {
> > > > + struct page *page = module_get_next_page(info);
> > > > + if (!page) {
> > > > + retval = -ENOMEM;
> > > > + goto out;
> > > > + }
> > >
> > > This looks very similar to fw_decompress_xz_pages() on
> > > drivers/base/firmware_loader/main.c
> > >
> > > Is there any sharing possible with decompression between the two?
> >
> > Ideally we'd get proper crypto API allowing this (page by page)
> > decompression and then we can switch to it in both places and stop
> > calling into particular algorithms directly. Unfortunately this is not
> > something I can sign up for at this time.
> >
> > As it is I am not sure if it worth to try to split out XZ code and share
> > it between module and firmware loader.
>
> Got it, makes sense.
>
> What type of tests have you implemented for this ? kmod has quite a bit
> of tests. Can you add some for this?

OK, will do.

Thanks.

--
Dmitry

2021-12-11 00:11:28

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> diff --git a/init/Kconfig b/init/Kconfig
> index cd23faa163d1..d90774ff7610 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
>
> endchoice
>
> +config MODULE_DECOMPRESS
> + bool "Support in-kernel module decompression"
> + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> + select XZ_DEC if MODULE_COMPRESS_XZ

What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
These are not mutually exclusive.

> + help
> +
> + Support for decompressing kernel modules by the kernel itself
> + instead of relying on userspace to perform this task. Useful when
> + load pinning security policy is enabled.

Shouldn't kernel decompression be faster too? If so, what's the
point of doing it in userspace?

> diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c
> new file mode 100644
> index 000000000000..590ca00aa098
> --- /dev/null
> +++ b/kernel/module_decompress.c
> @@ -0,0 +1,271 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright 2021 Google LLC.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/highmem.h>
> +#include <linux/kobject.h>
> +#include <linux/mm.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/sysfs.h>
> +#include <linux/vmalloc.h>
> +
> +#include "module-internal.h"
> +
> +static int module_extend_max_pages(struct load_info *info, unsigned int extent)
> +{
> + struct page **new_pages;
> +
> + new_pages = kvmalloc_array(info->max_pages + extent,
> + sizeof(info->pages), GFP_KERNEL);
> + if (!new_pages)
> + return -ENOMEM;
> +
> + memcpy(new_pages, info->pages, info->max_pages * sizeof(info->pages));
> + kvfree(info->pages);
> + info->pages = new_pages;
> + info->max_pages += extent;
> +
> + return 0;
> +}
> +
> +static struct page *module_get_next_page(struct load_info *info)
> +{
> + struct page *page;
> + int error;
> +
> + if (info->max_pages == info->used_pages) {
> + error = module_extend_max_pages(info, info->used_pages);
> + if (error)
> + return ERR_PTR(error);
> + }
> +
> + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
> + if (!page)
> + return ERR_PTR(-ENOMEM);
> +
> + info->pages[info->used_pages++] = page;
> + return page;
> +}
> +
> +#ifdef CONFIG_MODULE_COMPRESS_GZIP
> +#include <linux/zlib.h>
> +#define MODULE_COMPRESSION gzip
> +#define MODULE_DECOMPRESS_FN module_gzip_decompress

So gzip is assumed if your kernel has both gzip and xz. That seems odd.

<-- snip -->

> +#elif CONFIG_MODULE_COMPRESS_XZ
> +#include <linux/xz.h>
> +#define MODULE_COMPRESSION xz
> +#define MODULE_DECOMPRESS_FN module_xz_decompress
> +#else

<-- snip -->

> +#error "Unexpected configuration for CONFIG_MODULE_DECOMPRESS"

Using "depends on" logic on the kconfig symbol would resolve this and
make this not needed.

Why can't we just inspect the module and determine? Or, why not just add
a new kconfig symbol under MODULE_DECOMPRESS which lets you specify the
MODULE_COMPRESSION_TYPE. This way this is explicit.

> +module_init(module_decompress_sysfs_init);

This seems odd, altough it works, can you use late_initcall instead()?

Luis

2021-12-11 01:09:30

by Dmitry Torokhov

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Fri, Dec 10, 2021 at 04:11:21PM -0800, Luis Chamberlain wrote:
> On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > diff --git a/init/Kconfig b/init/Kconfig
> > index cd23faa163d1..d90774ff7610 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
> >
> > endchoice
> >
> > +config MODULE_DECOMPRESS
> > + bool "Support in-kernel module decompression"
> > + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> > + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> > + select XZ_DEC if MODULE_COMPRESS_XZ
>
> What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
> These are not mutually exclusive.

They are mutually exclusive, the kernel uses the same (one) compression
method for all kernel modules that it generates (i.e we do not compress
drivers/usb/... with gzip while drivers/net/... with xz).

The idea here is to allow the kernel consume the same format that was
used when generating modules. Supporting multiple formats at once is
overkill IMO.

>
> > + help
> > +
> > + Support for decompressing kernel modules by the kernel itself
> > + instead of relying on userspace to perform this task. Useful when
> > + load pinning security policy is enabled.
>
> Shouldn't kernel decompression be faster too? If so, what's the
> point of doing it in userspace?

Make the kernel smaller? Have more flexibility with exotic compression
formats?

>
> > diff --git a/kernel/module_decompress.c b/kernel/module_decompress.c
> > new file mode 100644
> > index 000000000000..590ca00aa098
> > --- /dev/null
> > +++ b/kernel/module_decompress.c
> > @@ -0,0 +1,271 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright 2021 Google LLC.
> > + */
> > +
> > +#include <linux/init.h>
> > +#include <linux/highmem.h>
> > +#include <linux/kobject.h>
> > +#include <linux/mm.h>
> > +#include <linux/module.h>
> > +#include <linux/slab.h>
> > +#include <linux/sysfs.h>
> > +#include <linux/vmalloc.h>
> > +
> > +#include "module-internal.h"
> > +
> > +static int module_extend_max_pages(struct load_info *info, unsigned int extent)
> > +{
> > + struct page **new_pages;
> > +
> > + new_pages = kvmalloc_array(info->max_pages + extent,
> > + sizeof(info->pages), GFP_KERNEL);
> > + if (!new_pages)
> > + return -ENOMEM;
> > +
> > + memcpy(new_pages, info->pages, info->max_pages * sizeof(info->pages));
> > + kvfree(info->pages);
> > + info->pages = new_pages;
> > + info->max_pages += extent;
> > +
> > + return 0;
> > +}
> > +
> > +static struct page *module_get_next_page(struct load_info *info)
> > +{
> > + struct page *page;
> > + int error;
> > +
> > + if (info->max_pages == info->used_pages) {
> > + error = module_extend_max_pages(info, info->used_pages);
> > + if (error)
> > + return ERR_PTR(error);
> > + }
> > +
> > + page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
> > + if (!page)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + info->pages[info->used_pages++] = page;
> > + return page;
> > +}
> > +
> > +#ifdef CONFIG_MODULE_COMPRESS_GZIP
> > +#include <linux/zlib.h>
> > +#define MODULE_COMPRESSION gzip
> > +#define MODULE_DECOMPRESS_FN module_gzip_decompress
>
> So gzip is assumed if your kernel has both gzip and xz. That seems odd.

No, gzuo is used when CONFIG_MODULE_COMPRESS_GZIP is enabled. That means
that CONFIG_MODULE_COMPRESS_XZ is not enabled.

>
> <-- snip -->
>
> > +#elif CONFIG_MODULE_COMPRESS_XZ
> > +#include <linux/xz.h>
> > +#define MODULE_COMPRESSION xz
> > +#define MODULE_DECOMPRESS_FN module_xz_decompress
> > +#else
>
> <-- snip -->
>
> > +#error "Unexpected configuration for CONFIG_MODULE_DECOMPRESS"
>
> Using "depends on" logic on the kconfig symbol would resolve this and
> make this not needed.
>
> Why can't we just inspect the module and determine? Or, why not just add
> a new kconfig symbol under MODULE_DECOMPRESS which lets you specify the
> MODULE_COMPRESSION_TYPE. This way this is explicit.

It is a possibility, I just wanted to decompression scheme match
compression one so there is less potential for misconfiguration.

>
> > +module_init(module_decompress_sysfs_init);
>
> This seems odd, altough it works, can you use late_initcall instead()?

Yes, I can change this.

Thanks.

--
Dmitry

2021-12-20 16:52:37

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Fri, Dec 10, 2021 at 05:09:23PM -0800, Dmitry Torokhov wrote:
> On Fri, Dec 10, 2021 at 04:11:21PM -0800, Luis Chamberlain wrote:
> > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > diff --git a/init/Kconfig b/init/Kconfig
> > > index cd23faa163d1..d90774ff7610 100644
> > > --- a/init/Kconfig
> > > +++ b/init/Kconfig
> > > @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
> > >
> > > endchoice
> > >
> > > +config MODULE_DECOMPRESS
> > > + bool "Support in-kernel module decompression"
> > > + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> > > + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> > > + select XZ_DEC if MODULE_COMPRESS_XZ
> >
> > What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
> > These are not mutually exclusive.
>
> They are mutually exclusive, the kernel uses the same (one) compression
> method for all kernel modules that it generates (i.e we do not compress
> drivers/usb/... with gzip while drivers/net/... with xz).

Ah yes I failed to see the choice/prompt for it.

> The idea here is to allow the kernel consume the same format that was
> used when generating modules. Supporting multiple formats at once is
> overkill IMO.

Indeed.

> > > + help
> > > +
> > > + Support for decompressing kernel modules by the kernel itself
> > > + instead of relying on userspace to perform this task. Useful when
> > > + load pinning security policy is enabled.
> >
> > Shouldn't kernel decompression be faster too? If so, what's the
> > point of doing it in userspace?
>
> Make the kernel smaller?

Yes this I buy.

> Have more flexibility with exotic compression
> formats?

I just have a hunch that doing module decompression in the kernel will
speed things quite a bit... any chance you can provide some before and
after systemd-analyze ?

Luis

2021-12-21 03:18:29

by Dmitry Torokhov

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Mon, Dec 20, 2021 at 08:52:30AM -0800, Luis Chamberlain wrote:
> On Fri, Dec 10, 2021 at 05:09:23PM -0800, Dmitry Torokhov wrote:
> > On Fri, Dec 10, 2021 at 04:11:21PM -0800, Luis Chamberlain wrote:
> > > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > > diff --git a/init/Kconfig b/init/Kconfig
> > > > index cd23faa163d1..d90774ff7610 100644
> > > > --- a/init/Kconfig
> > > > +++ b/init/Kconfig
> > > > @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
> > > >
> > > > endchoice
> > > >
> > > > +config MODULE_DECOMPRESS
> > > > + bool "Support in-kernel module decompression"
> > > > + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> > > > + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> > > > + select XZ_DEC if MODULE_COMPRESS_XZ
> > >
> > > What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
> > > These are not mutually exclusive.
> >
> > They are mutually exclusive, the kernel uses the same (one) compression
> > method for all kernel modules that it generates (i.e we do not compress
> > drivers/usb/... with gzip while drivers/net/... with xz).
>
> Ah yes I failed to see the choice/prompt for it.
>
> > The idea here is to allow the kernel consume the same format that was
> > used when generating modules. Supporting multiple formats at once is
> > overkill IMO.
>
> Indeed.
>
> > > > + help
> > > > +
> > > > + Support for decompressing kernel modules by the kernel itself
> > > > + instead of relying on userspace to perform this task. Useful when
> > > > + load pinning security policy is enabled.
> > >
> > > Shouldn't kernel decompression be faster too? If so, what's the
> > > point of doing it in userspace?
> >
> > Make the kernel smaller?
>
> Yes this I buy.
>
> > Have more flexibility with exotic compression
> > formats?
>
> I just have a hunch that doing module decompression in the kernel will
> speed things quite a bit... any chance you can provide some before and
> after systemd-analyze ?

If you insist I can try running it, but it should be slower unless your
memory controller is so slow that reading file from disk and dealing
with page by page decompression is quicker than copying already
decompressed data from userspace. We still reading and uncompressing
file in kmod (to make sure the format is valid) and we can uncompress
using large buffers (we are not concerned with using unswappable kernel
memory).

Maybe in the future when we have streaming and accelerated in-kernel
decompression API we could optimize for that in kmod and see some
savings on very large modules.

Thanks.

--
Dmitry

2021-12-21 21:59:52

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Mon, Dec 20, 2021 at 07:17:57PM -0800, Dmitry Torokhov wrote:
> On Mon, Dec 20, 2021 at 08:52:30AM -0800, Luis Chamberlain wrote:
> > On Fri, Dec 10, 2021 at 05:09:23PM -0800, Dmitry Torokhov wrote:
> > > On Fri, Dec 10, 2021 at 04:11:21PM -0800, Luis Chamberlain wrote:
> > > > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > > > diff --git a/init/Kconfig b/init/Kconfig
> > > > > index cd23faa163d1..d90774ff7610 100644
> > > > > --- a/init/Kconfig
> > > > > +++ b/init/Kconfig
> > > > > @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
> > > > >
> > > > > endchoice
> > > > >
> > > > > +config MODULE_DECOMPRESS
> > > > > + bool "Support in-kernel module decompression"
> > > > > + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> > > > > + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> > > > > + select XZ_DEC if MODULE_COMPRESS_XZ
> > > >
> > > > What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
> > > > These are not mutually exclusive.
> > >
> > > They are mutually exclusive, the kernel uses the same (one) compression
> > > method for all kernel modules that it generates (i.e we do not compress
> > > drivers/usb/... with gzip while drivers/net/... with xz).
> >
> > Ah yes I failed to see the choice/prompt for it.
> >
> > > The idea here is to allow the kernel consume the same format that was
> > > used when generating modules. Supporting multiple formats at once is
> > > overkill IMO.
> >
> > Indeed.
> >
> > > > > + help
> > > > > +
> > > > > + Support for decompressing kernel modules by the kernel itself
> > > > > + instead of relying on userspace to perform this task. Useful when
> > > > > + load pinning security policy is enabled.
> > > >
> > > > Shouldn't kernel decompression be faster too? If so, what's the
> > > > point of doing it in userspace?
> > >
> > > Make the kernel smaller?
> >
> > Yes this I buy.
> >
> > > Have more flexibility with exotic compression
> > > formats?
> >
> > I just have a hunch that doing module decompression in the kernel will
> > speed things quite a bit... any chance you can provide some before and
> > after systemd-analyze ?
>
> If you insist I can try running it,

If you can run the test, yes it would be appreciated.

> but it should be slower unless your
> memory controller is so slow that reading file from disk and dealing
> with page by page decompression is quicker than copying already
> decompressed data from userspace.

With userspace decompression I'd imagine we also have more context switches.

> We still reading and uncompressing
> file in kmod (to make sure the format is valid)

I don't understand, that seems wasteful.

> and we can uncompress
> using large buffers (we are not concerned with using unswappable kernel
> memory).
>
> Maybe in the future when we have streaming and accelerated in-kernel
> decompression API we could optimize for that in kmod and see some
> savings on very large modules.

That would be very nice.

Luis

2022-01-03 03:01:11

by Dmitry Torokhov

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

Hi Luis,

Happy New Year!

On Tue, Dec 21, 2021 at 01:59:46PM -0800, Luis Chamberlain wrote:
> On Mon, Dec 20, 2021 at 07:17:57PM -0800, Dmitry Torokhov wrote:
> > On Mon, Dec 20, 2021 at 08:52:30AM -0800, Luis Chamberlain wrote:
> > > On Fri, Dec 10, 2021 at 05:09:23PM -0800, Dmitry Torokhov wrote:
> > > > On Fri, Dec 10, 2021 at 04:11:21PM -0800, Luis Chamberlain wrote:
> > > > > On Thu, Dec 09, 2021 at 10:09:17PM -0800, Dmitry Torokhov wrote:
> > > > > > diff --git a/init/Kconfig b/init/Kconfig
> > > > > > index cd23faa163d1..d90774ff7610 100644
> > > > > > --- a/init/Kconfig
> > > > > > +++ b/init/Kconfig
> > > > > > @@ -2305,6 +2305,19 @@ config MODULE_COMPRESS_ZSTD
> > > > > >
> > > > > > endchoice
> > > > > >
> > > > > > +config MODULE_DECOMPRESS
> > > > > > + bool "Support in-kernel module decompression"
> > > > > > + depends on MODULE_COMPRESS_GZIP || MODULE_COMPRESS_XZ
> > > > > > + select ZLIB_INFLATE if MODULE_COMPRESS_GZIP
> > > > > > + select XZ_DEC if MODULE_COMPRESS_XZ
> > > > >
> > > > > What if MODULE_COMPRESS_GZIP and MODULE_COMPRESS_XZ are enabled?
> > > > > These are not mutually exclusive.
> > > >
> > > > They are mutually exclusive, the kernel uses the same (one) compression
> > > > method for all kernel modules that it generates (i.e we do not compress
> > > > drivers/usb/... with gzip while drivers/net/... with xz).
> > >
> > > Ah yes I failed to see the choice/prompt for it.
> > >
> > > > The idea here is to allow the kernel consume the same format that was
> > > > used when generating modules. Supporting multiple formats at once is
> > > > overkill IMO.
> > >
> > > Indeed.
> > >
> > > > > > + help
> > > > > > +
> > > > > > + Support for decompressing kernel modules by the kernel itself
> > > > > > + instead of relying on userspace to perform this task. Useful when
> > > > > > + load pinning security policy is enabled.
> > > > >
> > > > > Shouldn't kernel decompression be faster too? If so, what's the
> > > > > point of doing it in userspace?
> > > >
> > > > Make the kernel smaller?
> > >
> > > Yes this I buy.
> > >
> > > > Have more flexibility with exotic compression
> > > > formats?
> > >
> > > I just have a hunch that doing module decompression in the kernel will
> > > speed things quite a bit... any chance you can provide some before and
> > > after systemd-analyze ?
> >
> > If you insist I can try running it,
>
> If you can run the test, yes it would be appreciated.

OK, so I finally got around to doing it and the differences are pretty
much noise, as I expected:

5.16.0-rc7: Startup finished in 5.022s (firmware) + 6.106s (loader) + 1.370s (kernel) + 5.685s (initrd) + 10.842s (userspace) = 29.026s
5.16.0-rc7-patched: Startup finished in 4.958s (firmware) + 6.701s (loader) + 1.382s (kernel) + 5.278s (initrd) + 10.822s (userspace) = 29.145s
5.16.0-rc7-patched: Startup finished in 4.953s (firmware) + 5.912s (loader) + 1.385s (kernel) + 5.327s (initrd) + 10.457s (userspace) = 28.036s

Also see attached.

>
> > but it should be slower unless your
> > memory controller is so slow that reading file from disk and dealing
> > with page by page decompression is quicker than copying already
> > decompressed data from userspace.
>
> With userspace decompression I'd imagine we also have more context switches.

We have plenty of rescheduling points when we allocate new pages, so I
expect we'll be getting preempted while in kernel as well if needed.

>
> > We still reading and uncompressing
> > file in kmod (to make sure the format is valid)
>
> I don't understand, that seems wasteful.

This way we can make sure we are not feeding kernel garbage and abort
early. Yes, we could just check signature and hope that the data is good
(and if it is not the kernel will reject it) but this is not a hot path
at all and amount of data we decompress is relatively small, so I do not
think trying to optimize this makes much sense (as shown by the numbers
above).

>
> > and we can uncompress
> > using large buffers (we are not concerned with using unswappable kernel
> > memory).
> >
> > Maybe in the future when we have streaming and accelerated in-kernel
> > decompression API we could optimize for that in kmod and see some
> > savings on very large modules.
>
> That would be very nice.

Again, practical benefit of doing this is pretty much close to 0 in this
particular case.

Thanks.

--
Dmitry

2022-01-11 15:42:14

by Luis Chamberlain

[permalink] [raw]
Subject: Re: [PATCH v3] module: add in-kernel support for decompressing

On Sun, Jan 02, 2022 at 06:58:15PM -0800, Dmitry Torokhov wrote:
> OK, so I finally got around to doing it and the differences are pretty
> much noise, as I expected:
>
> 5.16.0-rc7: Startup finished in 5.022s (firmware) + 6.106s (loader) + 1.370s (kernel) + 5.685s (initrd) + 10.842s (userspace) = 29.026s
> 5.16.0-rc7-patched: Startup finished in 4.958s (firmware) + 6.701s (loader) + 1.382s (kernel) + 5.278s (initrd) + 10.822s (userspace) = 29.145s
> 5.16.0-rc7-patched: Startup finished in 4.953s (firmware) + 5.912s (loader) + 1.385s (kernel) + 5.327s (initrd) + 10.457s (userspace) = 28.036s
>
> Also see attached.

If kmod didn't do the decompression I suspect things might be slightly
different, but I agree that given the different with kernel compression
now being done, removing userespace compression might just be noise as
well.

> > > We still reading and uncompressing
> > > file in kmod (to make sure the format is valid)
> >
> > I don't understand, that seems wasteful.
>
> This way we can make sure we are not feeding kernel garbage and abort
> early. Yes, we could just check signature and hope that the data is good
> (and if it is not the kernel will reject it) but this is not a hot path
> at all and amount of data we decompress is relatively small, so I do not
> think trying to optimize this makes much sense (as shown by the numbers
> above).

Sure. And if an LSM is used, one would assume the LSM does its own finit
module checks.

> > > and we can uncompress
> > > using large buffers (we are not concerned with using unswappable kernel
> > > memory).
> > >
> > > Maybe in the future when we have streaming and accelerated in-kernel
> > > decompression API we could optimize for that in kmod and see some
> > > savings on very large modules.
> >
> > That would be very nice.
>
> Again, practical benefit of doing this is pretty much close to 0 in this
> particular case.

Based on what is observed so far, I agree.

Luis