2023-06-27 21:09:10

by Yuxiao Zhang

[permalink] [raw]
Subject: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits based on page size. Currently even we
allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.

Since we don't need physical contiguous memory for pmsg buffer
, we can use kvmalloc to avoid such limitation.

Signed-off-by: Yuxiao Zhang <[email protected]>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>

#include "internal.h"

@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;

- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,

ret = record->psi->write(record);

- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;

@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;

/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>

#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);

- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)

out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>

#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)

void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.162.gfafddb0af9-goog


2023-06-28 08:56:47

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

On Tue, Jun 27, 2023 at 01:25:41PM -0700, Yuxiao Zhang wrote:
> Current pmsg implementation is using kmalloc for pmsg record buffer,
> which has max size limits based on page size.

What is that max size?

> Currently even we
> allocate enough space with pmsg-size, pmsg will still fail if the
> file size is larger than what kmalloc allowed.
>
> Since we don't need physical contiguous memory for pmsg buffer
> , we can use kvmalloc to avoid such limitation.

Odd placement of the ',' character :)

Anyway, thanks for getting this sent out.

But, what in-kernel user is hitting this in the pstore implementation?
How big of a buffer is it trying to create? Is this a bug in older
kernels with the in-kernel drivers as well? If so, should it go to
stable releases and how far back?

thanks,

greg k-h

2023-06-28 17:27:00

by Yuxiao Zhang

[permalink] [raw]
Subject: Re: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

Thanks for reviewing the patch.

On 28 Jun 2023 07:30:16 +0200, Greg KH wrote:
>What is that max size?

The max size is arch dependent, it should be 2^(PAGE_SIZE+MAX_ORDER). In our environment it is 4M.

>what in-kernel user is hitting this in the pstore implementation?

We are trying to use pmsg to hold a core dump file, so we have pmsg-size=32M and thus hit this issue.

Other than us, here is one I found that trying to save dmesg beyond kmalloc limitaton:
https://lore.kernel.org/lkml/[email protected]/T/

Thanks,
-Yuxiao Zhang

2023-06-28 18:04:09

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

On Tue, Jun 27, 2023 at 01:25:41PM -0700, Yuxiao Zhang wrote:
> Current pmsg implementation is using kmalloc for pmsg record buffer,
> which has max size limits based on page size. Currently even we
> allocate enough space with pmsg-size, pmsg will still fail if the
> file size is larger than what kmalloc allowed.
>
> Since we don't need physical contiguous memory for pmsg buffer
> , we can use kvmalloc to avoid such limitation.

Conceptually, I am fine with this change. I need a little time to trace
down the allocations. At first glance, I thought this patch only needed
to cover pstore_write_user_compat(), but I guess the read side needs to
be adjusted as well?

I'll double-check.

And yes, Greg's questions are all good -- fixing syntax and adding size
details in the commit log would be appreciated.

-Kees

--
Kees Cook

2023-06-28 18:40:22

by Guilherme G. Piccoli

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

On 28/06/2023 19:10, Yuxiao Zhang wrote:
> Thanks for reviewing the patch.
>
> On 28 Jun 2023 07:30:16 +0200, Greg KH wrote:
>> What is that max size?
>
> The max size is arch dependent, it should be 2^(PAGE_SIZE+MAX_ORDER). In our environment it is 4M.
>
>> what in-kernel user is hitting this in the pstore implementation?
>
> We are trying to use pmsg to hold a core dump file, so we have pmsg-size=32M and thus hit this issue.
>
> Other than us, here is one I found that trying to save dmesg beyond kmalloc limitaton:
> https://lore.kernel.org/lkml/[email protected]/T/

Hi Yuxiao Zhang, thanks for the improvement! And also, thanks for
mentioning the thread above - I tested your patch today and was soon to
write you this message heh

So, first of all, the patch works for the Steam Deck case - kernel 6.4
with or without your patch behaved the same, i.e., pstore/ramoops worked
and it was possible to collect the dmesg.

But when I tried to increase the record size in ramoops, I got this
error: https://termbin.com/b12e

This is the same error as mentioned in the thread above. And it happens
when I try to bump the record size to 4MB, the biggest working value is
still 2MB. Maybe a missing spot, which remained using kmalloc() instead
of the virtual variant?

Also - Kees certainly knows that way better, but - are we 100% sure that
the region for pstore can be non-contiguous? For some reason, I always
thought this was a requirement - grepping the code, I found this
(wrong?) comment:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3

Cheers,


Guilherme

2023-06-28 23:51:15

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

On Wed, Jun 28, 2023 at 03:12:10PM -0300, Guilherme G. Piccoli wrote:
> Also - Kees certainly knows that way better, but - are we 100% sure that
> the region for pstore can be non-contiguous? For some reason, I always
> thought this was a requirement - grepping the code, I found this
> (wrong?) comment:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3

The contiguous requirements are entirely for the RAM backend's storage,
so these intermediate memory regions can use non-contiguous physical
backing memory (i.e. vmalloc).

Even the special case of crash-dumping should be fine for the large
buffer used to hold the crash before doing compression.

There are effectively 4 types of allocations in pstore:

1- a physical -> virtual mapping for the RAM backend
2- the allocations (if any) for non-RAM backends to hold a copy of pstore
records when extracted from the backend storage (e.g NVRAM, EFI vars,
etc).
3- incoming allocations that are used temporarily to hand data to the
backend (e.g. the write_compat used in this patch)
4- the allocation used for holding the pstorefs data contents (which I
need to double-check, but is entirely defined by the backends)

Changes aren't needed for (1), it's fine on its own. This patch changes
allocations for (2) and (3) for pmsg and the RAM backend, if I'm reading
it correctly. I think (4) is included as part of (2), but I need to
check. As long as all paths use kvfree() for the record buffers,
everything should Just Work for RAM. Switching the other backends to
also use kvalloc() would allow for greater flexibility, though.

Anyway, it's on my list to review and test. I'm still working through
other things related to the merge window opening, so I may be a bit slow
for the next week. :)

-Kees

--
Kees Cook

2023-06-29 19:27:07

by Guilherme G. Piccoli

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

On 28/06/2023 20:24, Kees Cook wrote:
> On Wed, Jun 28, 2023 at 03:12:10PM -0300, Guilherme G. Piccoli wrote:
>> Also - Kees certainly knows that way better, but - are we 100% sure that
>> the region for pstore can be non-contiguous? For some reason, I always
>> thought this was a requirement - grepping the code, I found this
>> (wrong?) comment:
>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/pstore/zone.c#n3
>
> The contiguous requirements are entirely for the RAM backend's storage,
> so these intermediate memory regions can use non-contiguous physical
> backing memory (i.e. vmalloc).
>
> Even the special case of crash-dumping should be fine for the large
> buffer used to hold the crash before doing compression.
>
> There are effectively 4 types of allocations in pstore:
>
> 1- a physical -> virtual mapping for the RAM backend
> 2- the allocations (if any) for non-RAM backends to hold a copy of pstore
> records when extracted from the backend storage (e.g NVRAM, EFI vars,
> etc).
> 3- incoming allocations that are used temporarily to hand data to the
> backend (e.g. the write_compat used in this patch)
> 4- the allocation used for holding the pstorefs data contents (which I
> need to double-check, but is entirely defined by the backends)
>
> Changes aren't needed for (1), it's fine on its own. This patch changes
> allocations for (2) and (3) for pmsg and the RAM backend, if I'm reading
> it correctly. I think (4) is included as part of (2), but I need to
> check. As long as all paths use kvfree() for the record buffers,
> everything should Just Work for RAM. Switching the other backends to
> also use kvalloc() would allow for greater flexibility, though.
>
> Anyway, it's on my list to review and test. I'm still working through
> other things related to the merge window opening, so I may be a bit slow
> for the next week. :)
>
> -Kees
>

Thanks a bunch for the clarification Kees, much appreciated!
Now I understand it way better =)

2023-06-30 21:03:49

by Yuxiao Zhang

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

Sorry forgot to add subject header in msg which messed up email client,
resending it again

Added size details to commit message and fixed the format. See the new
patch below.

Thanks,
-Yuxiao


From cd3ec6155a3cf0e198afdf2d040c73ee146b696f Mon Sep 17 00:00:00 2001
From: Yuxiao Zhang <[email protected]>
Date: Fri, 30 Jun 2023 10:45:21 -0700
Subject: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc
limitation

Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits of 2^(MAX_ORDER + PAGE_SHIFT). Currently even
we allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.

Since we don't need physical contiguous memory for pmsg buffer,
we can use kvmalloc to avoid such limitation.

Signed-off-by: Yuxiao Zhang <[email protected]>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>

#include "internal.h"

@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;

- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,

ret = record->psi->write(record);

- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;

@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;

/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>

#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);

- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)

out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>

#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)

void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.255.g8b1d071c50-goog


2023-06-30 21:21:29

by Yuxiao Zhang

[permalink] [raw]
Subject: Re: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc limitation

Thanks,
-Yuxiao


From cd3ec6155a3cf0e198afdf2d040c73ee146b696f Mon Sep 17 00:00:00 2001
From: Yuxiao Zhang <[email protected]>
Date: Fri, 30 Jun 2023 10:45:21 -0700
Subject: [PATCH] pstore: ramoops: support pmsg size larger than kmalloc
limitation

Current pmsg implementation is using kmalloc for pmsg record buffer,
which has max size limits of 2^(MAX_ORDER + PAGE_SHIFT). Currently even
we allocate enough space with pmsg-size, pmsg will still fail if the
file size is larger than what kmalloc allowed.

Since we don't need physical contiguous memory for pmsg buffer,
we can use kvmalloc to avoid such limitation.

Signed-off-by: Yuxiao Zhang <[email protected]>
---
fs/pstore/inode.c | 2 +-
fs/pstore/platform.c | 9 +++++----
fs/pstore/ram.c | 5 +++--
fs/pstore/ram_core.c | 3 ++-
4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..df7fb2ad4599 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..f51e9460ac9d 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -32,6 +32,7 @@
#include <linux/uaccess.h>
#include <linux/jiffies.h>
#include <linux/workqueue.h>
+#include <linux/mm.h>

#include "internal.h"

@@ -549,7 +550,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;

- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +558,7 @@ static int pstore_write_user_compat(struct pstore_record *record,

ret = record->psi->write(record);

- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;

@@ -730,7 +731,7 @@ static void decompress_record(struct pstore_record *record)
return;

/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -783,7 +784,7 @@ void pstore_get_backend_records(struct pstore_info *psi,
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ade66dbe5f39..296465b14fa9 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>

#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);

- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)

out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 966191d3a5ba..3453d493ec27 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>

#include "ram_internal.h"
@@ -385,7 +386,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)

void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
--
2.41.0.255.g8b1d071c50-goog


2023-07-18 20:43:27

by Yuxiao Zhang

[permalink] [raw]