The current coredump implementation uses vmalloc area to copy
all the segments. But this might put strain on low memory targets
as the firmware size sometimes is in tens of MBs. The situation
becomes worse if there are multiple remote processors undergoing
recovery at the same time. This patch adds inline coredump
functionality that avoids extra memory usage. This requires
recovery to be halted until data is read by userspace and free
function is called.
Signed-off-by: Rishabh Bhatnagar <[email protected]>
---
drivers/remoteproc/remoteproc_coredump.c | 129 +++++++++++++++++++++++++++++--
include/linux/remoteproc.h | 15 ++++
2 files changed, 139 insertions(+), 5 deletions(-)
diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c
index ded0244..de75e89 100644
--- a/drivers/remoteproc/remoteproc_coredump.c
+++ b/drivers/remoteproc/remoteproc_coredump.c
@@ -12,6 +12,12 @@
#include "remoteproc_internal.h"
#include "remoteproc_elf_helpers.h"
+struct rproc_coredump_state {
+ struct rproc *rproc;
+ void *header;
+ struct completion dump_done;
+};
+
/**
* rproc_coredump_cleanup() - clean up dump_segments list
* @rproc: the remote processor handle
@@ -114,12 +120,96 @@ int rproc_coredump_set_elf_info(struct rproc *rproc, u8 class, u16 machine)
}
EXPORT_SYMBOL(rproc_coredump_set_elf_info);
+static void rproc_coredump_free(void *data)
+{
+ struct rproc_coredump_state *dump_state = data;
+
+ complete(&dump_state->dump_done);
+ vfree(dump_state->header);
+}
+
+static unsigned long rproc_coredump_find_segment(loff_t user_offset,
+ struct list_head *segments,
+ size_t *data_left)
+{
+ struct rproc_dump_segment *segment;
+
+ list_for_each_entry(segment, segments, node) {
+ if (user_offset < segment->size) {
+ *data_left = segment->size - user_offset;
+ return segment->da + user_offset;
+ }
+ user_offset -= segment->size;
+ }
+
+ *data_left = 0;
+ return 0;
+}
+
+static ssize_t rproc_coredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t header_sz)
+{
+ void *device_mem;
+ size_t seg_data;
+ size_t copy_sz, bytes_left = count;
+ unsigned long addr;
+ struct rproc_coredump_state *dump_state = data;
+ struct rproc *rproc = dump_state->rproc;
+ void *elfcore = dump_state->header;
+
+ /* Copy the vmalloc'ed header first. */
+ if (offset < header_sz) {
+ copy_sz = memory_read_from_buffer(buffer, count, &offset,
+ elfcore, header_sz);
+ if (copy_sz < 0)
+ return -EINVAL;
+
+ return copy_sz;
+ }
+
+ /* Find out the segment memory chunk to be copied based on offset.
+ * Keep copying data until count bytes are read.
+ */
+ while (bytes_left) {
+ addr = rproc_coredump_find_segment(offset - header_sz,
+ &rproc->dump_segments,
+ &seg_data);
+ /* EOF check */
+ if (seg_data == 0) {
+ dev_info(&rproc->dev, "Ramdump done, %lld bytes read",
+ offset);
+ break;
+ }
+
+ copy_sz = min_t(size_t, bytes_left, seg_data);
+
+ device_mem = rproc_da_to_va(rproc, addr, copy_sz);
+ if (!device_mem) {
+ dev_err(&rproc->dev, "Coredump: %lx with size %zd out of remoteproc carveout\n",
+ addr, copy_sz);
+ return -ENOMEM;
+ }
+ memcpy(buffer, device_mem, copy_sz);
+
+ offset += copy_sz;
+ buffer += copy_sz;
+ bytes_left -= copy_sz;
+ }
+
+ return count - bytes_left;
+}
+
/**
* rproc_coredump() - perform coredump
* @rproc: rproc handle
*
* This function will generate an ELF header for the registered segments
- * and create a devcoredump device associated with rproc.
+ * and create a devcoredump device associated with rproc. Based on the
+ * coredump configuration this function will directly copy the segments
+ * from device memory to userspace or copy segments from device memory to
+ * a separate buffer, which can then be read by userspace.
+ * The first approach avoids using extra vmalloc memory. But it will stall
+ * recovery flow until dump is read by userspace.
*/
void rproc_coredump(struct rproc *rproc)
{
@@ -132,8 +222,10 @@ void rproc_coredump(struct rproc *rproc)
void *ptr;
u8 class = rproc->elf_class;
int phnum = 0;
+ struct rproc_coredump_state dump_state;
+ enum rproc_dump_mechanism dump_conf = rproc->dump_conf;
- if (list_empty(&rproc->dump_segments))
+ if (list_empty(&rproc->dump_segments) || dump_conf == COREDUMP_DISABLED)
return;
if (class == ELFCLASSNONE) {
@@ -143,7 +235,14 @@ void rproc_coredump(struct rproc *rproc)
data_size = elf_size_of_hdr(class);
list_for_each_entry(segment, &rproc->dump_segments, node) {
- data_size += elf_size_of_phdr(class) + segment->size;
+
+ /* For default configuration buffer includes headers & segments.
+ * For inline dump buffer just includes headers as segments are
+ * directly read from device memory.
+ */
+ data_size += elf_size_of_phdr(class);
+ if (dump_conf == COREDUMP_DEFAULT)
+ data_size += segment->size;
phnum++;
}
@@ -182,6 +281,10 @@ void rproc_coredump(struct rproc *rproc)
elf_phdr_set_p_flags(class, phdr, PF_R | PF_W | PF_X);
elf_phdr_set_p_align(class, phdr, 0);
+ /* No need to copy segments for inline dumps */
+ if (dump_conf == COREDUMP_INLINE)
+ goto next_seg;
+
if (segment->dump) {
segment->dump(rproc, segment, data + offset);
} else {
@@ -195,10 +298,26 @@ void rproc_coredump(struct rproc *rproc)
memcpy(data + offset, ptr, segment->size);
}
}
-
+next_seg:
offset += elf_phdr_get_p_filesz(class, phdr);
phdr += elf_size_of_phdr(class);
}
- dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+ if (dump_conf == COREDUMP_DEFAULT) {
+ dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
+ return;
+ }
+
+ /* Initialize the dump state struct to be used by rproc_coredump_read */
+ dump_state.rproc = rproc;
+ dump_state.header = data;
+ init_completion(&dump_state.dump_done);
+
+ dev_coredumpm(&rproc->dev, NULL, &dump_state, data_size, GFP_KERNEL,
+ rproc_coredump_read, rproc_coredump_free);
+
+ /* Wait until the dump is read and free is called. Data is freed
+ * by devcoredump framework automatically after 5 minutes.
+ */
+ wait_for_completion(&dump_state.dump_done);
}
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 0468be4..ab2b9b7 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -435,6 +435,19 @@ enum rproc_crash_type {
};
/**
+ * enum rproc_dump_mechanism - Coredump options for core
+ * @COREDUMP_DEFAULT: Copy dump to separate buffer and carry on with recovery
+ * @COREDUMP_INLINE: Read segments directly from device memory. Stall
+ recovery until all segments are read
+ * @COREDUMP_DISABLED: Don't perform any dump
+ */
+enum rproc_dump_mechanism {
+ COREDUMP_DEFAULT,
+ COREDUMP_INLINE,
+ COREDUMP_DISABLED,
+};
+
+/**
* struct rproc_dump_segment - segment info from ELF header
* @node: list node related to the rproc segment list
* @da: device address of the segment
@@ -466,6 +479,7 @@ struct rproc_dump_segment {
* @dev: virtual device for refcounting and common remoteproc behavior
* @power: refcount of users who need this rproc powered up
* @state: state of the device
+ * @dump_conf: Currenlty selected coredump configuration
* @lock: lock which protects concurrent manipulations of the rproc
* @dbg_dir: debugfs directory of this rproc device
* @traces: list of trace buffers
@@ -499,6 +513,7 @@ struct rproc {
struct device dev;
atomic_t power;
unsigned int state;
+ enum rproc_dump_mechanism dump_conf;
struct mutex lock;
struct dentry *dbg_dir;
struct list_head traces;
--
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
On Thu 14 May 13:07 PDT 2020, Rishabh Bhatnagar wrote:
> The current coredump implementation uses vmalloc area to copy
> all the segments. But this might put strain on low memory targets
> as the firmware size sometimes is in tens of MBs. The situation
> becomes worse if there are multiple remote processors undergoing
> recovery at the same time. This patch adds inline coredump
> functionality that avoids extra memory usage. This requires
> recovery to be halted until data is read by userspace and free
> function is called.
>
Overall I think this looks really good now, but I spotted an issue with
INLINE dumps not using segment->dump().
Also there's 3 checkpatch --strict warnings, please fix those.
> Signed-off-by: Rishabh Bhatnagar <[email protected]>
> ---
> drivers/remoteproc/remoteproc_coredump.c | 129 +++++++++++++++++++++++++++++--
> include/linux/remoteproc.h | 15 ++++
> 2 files changed, 139 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c
[..]
> +static ssize_t rproc_coredump_read(char *buffer, loff_t offset, size_t count,
> + void *data, size_t header_sz)
> +{
> + void *device_mem;
> + size_t seg_data;
> + size_t copy_sz, bytes_left = count;
> + unsigned long addr;
> + struct rproc_coredump_state *dump_state = data;
> + struct rproc *rproc = dump_state->rproc;
> + void *elfcore = dump_state->header;
> +
> + /* Copy the vmalloc'ed header first. */
> + if (offset < header_sz) {
> + copy_sz = memory_read_from_buffer(buffer, count, &offset,
> + elfcore, header_sz);
> + if (copy_sz < 0)
> + return -EINVAL;
> +
> + return copy_sz;
> + }
> +
> + /* Find out the segment memory chunk to be copied based on offset.
> + * Keep copying data until count bytes are read.
> + */
/*
* Multiline comments start on the second line throughout
* remoteproc, please follow this.
*/
> + while (bytes_left) {
> + addr = rproc_coredump_find_segment(offset - header_sz,
> + &rproc->dump_segments,
> + &seg_data);
> + /* EOF check */
> + if (seg_data == 0) {
> + dev_info(&rproc->dev, "Ramdump done, %lld bytes read",
> + offset);
> + break;
> + }
> +
> + copy_sz = min_t(size_t, bytes_left, seg_data);
> +
> + device_mem = rproc_da_to_va(rproc, addr, copy_sz);
> + if (!device_mem) {
> + dev_err(&rproc->dev, "Coredump: %lx with size %zd out of remoteproc carveout\n",
> + addr, copy_sz);
> + return -ENOMEM;
I think it would be best to maintain the same behavior between INLINE
and DEFAULT here.
> + }
> + memcpy(buffer, device_mem, copy_sz);
This won't work for modem on e.g. SDM845, because we need to do some
special tricks to make the memory readable, that's why we invoke
segment->dump() in the DEFAULT scenario. Doing a memcpy here instead
will result in a security violation.
Perhaps this snippet can be extracted to a separate helper function,
which would allow you to avoid the next_seg goto label below.
> +
> + offset += copy_sz;
> + buffer += copy_sz;
> + bytes_left -= copy_sz;
> + }
> +
> + return count - bytes_left;
> +}
[..]
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 0468be4..ab2b9b7 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -435,6 +435,19 @@ enum rproc_crash_type {
> };
>
> /**
> + * enum rproc_dump_mechanism - Coredump options for core
> + * @COREDUMP_DEFAULT: Copy dump to separate buffer and carry on with recovery
> + * @COREDUMP_INLINE: Read segments directly from device memory. Stall
> + recovery until all segments are read
> + * @COREDUMP_DISABLED: Don't perform any dump
> + */
> +enum rproc_dump_mechanism {
> + COREDUMP_DEFAULT,
> + COREDUMP_INLINE,
> + COREDUMP_DISABLED,
Please prefix these with RPROC_, as "coredump" has a meaning outside
remoteproc as well.
Regards,
Bjorn
Hi Rishab,
On Thu, May 14, 2020 at 01:07:35PM -0700, Rishabh Bhatnagar wrote:
> The current coredump implementation uses vmalloc area to copy
> all the segments. But this might put strain on low memory targets
> as the firmware size sometimes is in tens of MBs. The situation
> becomes worse if there are multiple remote processors undergoing
> recovery at the same time. This patch adds inline coredump
> functionality that avoids extra memory usage. This requires
> recovery to be halted until data is read by userspace and free
> function is called.
>
> Signed-off-by: Rishabh Bhatnagar <[email protected]>
> ---
> drivers/remoteproc/remoteproc_coredump.c | 129 +++++++++++++++++++++++++++++--
> include/linux/remoteproc.h | 15 ++++
> 2 files changed, 139 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c
> index ded0244..de75e89 100644
> --- a/drivers/remoteproc/remoteproc_coredump.c
> +++ b/drivers/remoteproc/remoteproc_coredump.c
> @@ -12,6 +12,12 @@
> #include "remoteproc_internal.h"
> #include "remoteproc_elf_helpers.h"
>
> +struct rproc_coredump_state {
> + struct rproc *rproc;
> + void *header;
> + struct completion dump_done;
> +};
> +
To use struct completion and related function linux/completion.h needs to be
included.
> /**
> * rproc_coredump_cleanup() - clean up dump_segments list
> * @rproc: the remote processor handle
> @@ -114,12 +120,96 @@ int rproc_coredump_set_elf_info(struct rproc *rproc, u8 class, u16 machine)
> }
> EXPORT_SYMBOL(rproc_coredump_set_elf_info);
>
> +static void rproc_coredump_free(void *data)
> +{
> + struct rproc_coredump_state *dump_state = data;
> +
> + complete(&dump_state->dump_done);
> + vfree(dump_state->header);
> +}
> +
> +static unsigned long rproc_coredump_find_segment(loff_t user_offset,
> + struct list_head *segments,
> + size_t *data_left)
> +{
> + struct rproc_dump_segment *segment;
> +
> + list_for_each_entry(segment, segments, node) {
> + if (user_offset < segment->size) {
> + *data_left = segment->size - user_offset;
> + return segment->da + user_offset;
> + }
> + user_offset -= segment->size;
> + }
> +
> + *data_left = 0;
> + return 0;
> +}
> +
> +static ssize_t rproc_coredump_read(char *buffer, loff_t offset, size_t count,
> + void *data, size_t header_sz)
> +{
> + void *device_mem;
> + size_t seg_data;
> + size_t copy_sz, bytes_left = count;
> + unsigned long addr;
> + struct rproc_coredump_state *dump_state = data;
> + struct rproc *rproc = dump_state->rproc;
> + void *elfcore = dump_state->header;
> +
> + /* Copy the vmalloc'ed header first. */
> + if (offset < header_sz) {
> + copy_sz = memory_read_from_buffer(buffer, count, &offset,
> + elfcore, header_sz);
> + if (copy_sz < 0)
> + return -EINVAL;
> +
> + return copy_sz;
> + }
> +
> + /* Find out the segment memory chunk to be copied based on offset.
> + * Keep copying data until count bytes are read.
> + */
Wrong format for multi-line comment, the following is the standard format:
/*
* Find out the segment memory chunk to be copied based on offset.
* Keep copying data until count bytes are read.
*/
> + while (bytes_left) {
> + addr = rproc_coredump_find_segment(offset - header_sz,
> + &rproc->dump_segments,
> + &seg_data);
> + /* EOF check */
> + if (seg_data == 0) {
> + dev_info(&rproc->dev, "Ramdump done, %lld bytes read",
> + offset);
> + break;
> + }
> +
> + copy_sz = min_t(size_t, bytes_left, seg_data);
> +
> + device_mem = rproc_da_to_va(rproc, addr, copy_sz);
> + if (!device_mem) {
> + dev_err(&rproc->dev, "Coredump: %lx with size %zd out of remoteproc carveout\n",
> + addr, copy_sz);
> + return -ENOMEM;
> + }
> + memcpy(buffer, device_mem, copy_sz);
> +
> + offset += copy_sz;
> + buffer += copy_sz;
> + bytes_left -= copy_sz;
> + }
> +
> + return count - bytes_left;
> +}
> +
> /**
> * rproc_coredump() - perform coredump
> * @rproc: rproc handle
> *
> * This function will generate an ELF header for the registered segments
> - * and create a devcoredump device associated with rproc.
> + * and create a devcoredump device associated with rproc. Based on the
> + * coredump configuration this function will directly copy the segments
> + * from device memory to userspace or copy segments from device memory to
> + * a separate buffer, which can then be read by userspace.
> + * The first approach avoids using extra vmalloc memory. But it will stall
> + * recovery flow until dump is read by userspace.
> */
> void rproc_coredump(struct rproc *rproc)
> {
> @@ -132,8 +222,10 @@ void rproc_coredump(struct rproc *rproc)
> void *ptr;
> u8 class = rproc->elf_class;
> int phnum = 0;
> + struct rproc_coredump_state dump_state;
> + enum rproc_dump_mechanism dump_conf = rproc->dump_conf;
>
> - if (list_empty(&rproc->dump_segments))
> + if (list_empty(&rproc->dump_segments) || dump_conf == COREDUMP_DISABLED)
> return;
>
> if (class == ELFCLASSNONE) {
> @@ -143,7 +235,14 @@ void rproc_coredump(struct rproc *rproc)
>
> data_size = elf_size_of_hdr(class);
> list_for_each_entry(segment, &rproc->dump_segments, node) {
> - data_size += elf_size_of_phdr(class) + segment->size;
> +
> + /* For default configuration buffer includes headers & segments.
> + * For inline dump buffer just includes headers as segments are
> + * directly read from device memory.
> + */
Same
> + data_size += elf_size_of_phdr(class);
> + if (dump_conf == COREDUMP_DEFAULT)
> + data_size += segment->size;
>
> phnum++;
> }
> @@ -182,6 +281,10 @@ void rproc_coredump(struct rproc *rproc)
> elf_phdr_set_p_flags(class, phdr, PF_R | PF_W | PF_X);
> elf_phdr_set_p_align(class, phdr, 0);
>
> + /* No need to copy segments for inline dumps */
> + if (dump_conf == COREDUMP_INLINE)
> + goto next_seg;
> +
> if (segment->dump) {
> segment->dump(rproc, segment, data + offset);
> } else {
> @@ -195,10 +298,26 @@ void rproc_coredump(struct rproc *rproc)
> memcpy(data + offset, ptr, segment->size);
> }
> }
> -
> +next_seg:
> offset += elf_phdr_get_p_filesz(class, phdr);
> phdr += elf_size_of_phdr(class);
> }
>
> - dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> + if (dump_conf == COREDUMP_DEFAULT) {
> + dev_coredumpv(&rproc->dev, data, data_size, GFP_KERNEL);
> + return;
> + }
> +
> + /* Initialize the dump state struct to be used by rproc_coredump_read */
> + dump_state.rproc = rproc;
> + dump_state.header = data;
> + init_completion(&dump_state.dump_done);
> +
> + dev_coredumpm(&rproc->dev, NULL, &dump_state, data_size, GFP_KERNEL,
> + rproc_coredump_read, rproc_coredump_free);
> +
> + /* Wait until the dump is read and free is called. Data is freed
> + * by devcoredump framework automatically after 5 minutes.
> + */
Same
With the above:
Reviewed-by: Mathieu Poirier <[email protected]>
> + wait_for_completion(&dump_state.dump_done);
> }
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 0468be4..ab2b9b7 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -435,6 +435,19 @@ enum rproc_crash_type {
> };
>
> /**
> + * enum rproc_dump_mechanism - Coredump options for core
> + * @COREDUMP_DEFAULT: Copy dump to separate buffer and carry on with recovery
> + * @COREDUMP_INLINE: Read segments directly from device memory. Stall
> + recovery until all segments are read
> + * @COREDUMP_DISABLED: Don't perform any dump
> + */
> +enum rproc_dump_mechanism {
> + COREDUMP_DEFAULT,
> + COREDUMP_INLINE,
> + COREDUMP_DISABLED,
> +};
> +
> +/**
> * struct rproc_dump_segment - segment info from ELF header
> * @node: list node related to the rproc segment list
> * @da: device address of the segment
> @@ -466,6 +479,7 @@ struct rproc_dump_segment {
> * @dev: virtual device for refcounting and common remoteproc behavior
> * @power: refcount of users who need this rproc powered up
> * @state: state of the device
> + * @dump_conf: Currenlty selected coredump configuration
> * @lock: lock which protects concurrent manipulations of the rproc
> * @dbg_dir: debugfs directory of this rproc device
> * @traces: list of trace buffers
> @@ -499,6 +513,7 @@ struct rproc {
> struct device dev;
> atomic_t power;
> unsigned int state;
> + enum rproc_dump_mechanism dump_conf;
> struct mutex lock;
> struct dentry *dbg_dir;
> struct list_head traces;
> --
> The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
> a Linux Foundation Collaborative Project