From: "Steven Rostedt (Google)" <[email protected]>
Add an option to the trace_instance kernel command line parameter that
allows it to use the reserved memory from memmap boot parameter.
memmap=12M$0x284500000 trace_instance=boot_mapped@0x284500000:12M
The above will reserves 12 megs at the physical address 0x284500000.
The second parameter will create a "boot_mapped" instance and use the
memory reserved as the memory for the ring buffer.
That will create an instance called "boot_mapped":
/sys/kernel/tracing/instances/boot_mapped
Note, because the ring buffer is using a defined memory ranged, it will
act just like a memory mapped ring buffer. It will not have a snapshot
buffer, as it can't swap out the buffer. The snapshot files as well as any
tracers that uses a snapshot will not be present in the boot_mapped
instance.
Cc: [email protected]
Signed-off-by: Steven Rostedt (Google) <[email protected]>
---
.../admin-guide/kernel-parameters.txt | 9 +++
kernel/trace/trace.c | 75 +++++++++++++++++--
2 files changed, 78 insertions(+), 6 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b600df82669d..ff26b6094e79 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6754,6 +6754,15 @@
the same thing would happen if it was left off). The irq_handler_entry
event, and all events under the "initcall" system.
+ If memory has been reserved (see memmap for x86), the instance
+ can use that memory:
+
+ memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
+
+ The above will create a "boot_map" instance that uses the physical
+ memory at 0x284500000 that is 12Megs. The per CPU buffers of that
+ instance will be split up accordingly.
+
trace_options=[option-list]
[FTRACE] Enable or disable tracer options at boot.
The option-list is a comma delimited list of options
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 622fe670949d..13e89023f33b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9504,6 +9504,31 @@ static int instance_mkdir(const char *name)
return ret;
}
+static u64 map_pages(u64 start, u64 size)
+{
+ struct page **pages;
+ phys_addr_t page_start;
+ unsigned int page_count;
+ unsigned int i;
+ void *vaddr;
+
+ page_count = DIV_ROUND_UP(size, PAGE_SIZE);
+
+ page_start = start;
+ pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
+ if (!pages)
+ return 0;
+
+ for (i = 0; i < page_count; i++) {
+ phys_addr_t addr = page_start + i * PAGE_SIZE;
+ pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
+ }
+ vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
+ kfree(pages);
+
+ return (u64)(unsigned long)vaddr;
+}
+
/**
* trace_array_get_by_name - Create/Lookup a trace array, given its name.
* @name: The name of the trace array to be looked up/created.
@@ -10350,6 +10375,7 @@ __init static void enable_instances(void)
{
struct trace_array *tr;
char *curr_str;
+ char *name;
char *str;
char *tok;
@@ -10358,19 +10384,56 @@ __init static void enable_instances(void)
str = boot_instance_info;
while ((curr_str = strsep(&str, "\t"))) {
+ unsigned long start = 0;
+ unsigned long size = 0;
+ unsigned long addr = 0;
tok = strsep(&curr_str, ",");
+ name = strsep(&tok, "@");
+ if (tok) {
+ start = memparse(tok, &tok);
+ if (!start) {
+ pr_warn("Tracing: Invalid boot instance address for %s\n",
+ name);
+ continue;
+ }
+ }
- if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
- do_allocate_snapshot(tok);
+ if (start) {
+ if (*tok != ':') {
+ pr_warn("Tracing: No size specified for instance %s\n", name);
+ continue;
+ }
+ tok++;
+ size = memparse(tok, &tok);
+ if (!size) {
+ pr_warn("Tracing: Invalid boot instance size for %s\n",
+ name);
+ continue;
+ }
+ addr = map_pages(start, size);
+ if (addr) {
+ pr_info("Tracing: mapped boot instance %s at physical memory 0x%lx of size 0x%lx\n",
+ name, start, size);
+ } else {
+ pr_warn("Tracing: Failed to map boot instance %s\n", name);
+ continue;
+ }
+ } else {
+ /* Only non mapped buffers have snapshot buffers */
+ if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+ do_allocate_snapshot(tok);
+ }
- tr = trace_array_get_by_name(tok, NULL);
+ tr = trace_array_create_systems(name, NULL, addr, size);
if (!tr) {
- pr_warn("Failed to create instance buffer %s\n", curr_str);
+ pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
continue;
}
- /* Allow user space to delete it */
- trace_array_put(tr);
+
+ /* Only allow non mapped buffers to be deleted */
+ if (!start)
+ trace_array_put(tr);
while ((tok = strsep(&curr_str, ","))) {
early_enable_events(tr, tok, true);
--
2.43.0
Memory management folks. Please review this patch.
Specifically the "map_pages()" function below.
On Thu, 06 Jun 2024 17:17:43 -0400
Steven Rostedt <[email protected]> wrote:
> From: "Steven Rostedt (Google)" <[email protected]>
>
> Add an option to the trace_instance kernel command line parameter that
> allows it to use the reserved memory from memmap boot parameter.
>
> memmap=12M$0x284500000 trace_instance=boot_mapped@0x284500000:12M
>
> The above will reserves 12 megs at the physical address 0x284500000.
> The second parameter will create a "boot_mapped" instance and use the
> memory reserved as the memory for the ring buffer.
>
> That will create an instance called "boot_mapped":
>
> /sys/kernel/tracing/instances/boot_mapped
>
> Note, because the ring buffer is using a defined memory ranged, it will
> act just like a memory mapped ring buffer. It will not have a snapshot
> buffer, as it can't swap out the buffer. The snapshot files as well as any
> tracers that uses a snapshot will not be present in the boot_mapped
> instance.
>
> Cc: [email protected]
> Signed-off-by: Steven Rostedt (Google) <[email protected]>
> ---
> .../admin-guide/kernel-parameters.txt | 9 +++
> kernel/trace/trace.c | 75 +++++++++++++++++--
> 2 files changed, 78 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index b600df82669d..ff26b6094e79 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6754,6 +6754,15 @@
> the same thing would happen if it was left off). The irq_handler_entry
> event, and all events under the "initcall" system.
>
> + If memory has been reserved (see memmap for x86), the instance
> + can use that memory:
> +
> + memmap=12M$0x284500000 trace_instance=boot_map@0x284500000:12M
> +
> + The above will create a "boot_map" instance that uses the physical
> + memory at 0x284500000 that is 12Megs. The per CPU buffers of that
> + instance will be split up accordingly.
> +
> trace_options=[option-list]
> [FTRACE] Enable or disable tracer options at boot.
> The option-list is a comma delimited list of options
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 622fe670949d..13e89023f33b 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -9504,6 +9504,31 @@ static int instance_mkdir(const char *name)
> return ret;
> }
>
> +static u64 map_pages(u64 start, u64 size)
> +{
> + struct page **pages;
> + phys_addr_t page_start;
> + unsigned int page_count;
> + unsigned int i;
> + void *vaddr;
> +
> + page_count = DIV_ROUND_UP(size, PAGE_SIZE);
> +
> + page_start = start;
> + pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
> + if (!pages)
> + return 0;
> +
> + for (i = 0; i < page_count; i++) {
> + phys_addr_t addr = page_start + i * PAGE_SIZE;
> + pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
> + }
> + vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
> + kfree(pages);
> +
> + return (u64)(unsigned long)vaddr;
> +}
If for some reason the memmap=nn$ss fails, but this still gets called,
will the above just map over any memory. That is, is it possible that
the kernel could have used this memory?
Is there a way to detect this? That is, I don't want this to succeed if
the memory location it's about to map to is used by the kernel, or will
be used by user space.
-- Steve
> +
> /**
> * trace_array_get_by_name - Create/Lookup a trace array, given its name.
> * @name: The name of the trace array to be looked up/created.
> @@ -10350,6 +10375,7 @@ __init static void enable_instances(void)
> {
> struct trace_array *tr;
> char *curr_str;
> + char *name;
> char *str;
> char *tok;
>
> @@ -10358,19 +10384,56 @@ __init static void enable_instances(void)
> str = boot_instance_info;
>
> while ((curr_str = strsep(&str, "\t"))) {
> + unsigned long start = 0;
> + unsigned long size = 0;
> + unsigned long addr = 0;
>
> tok = strsep(&curr_str, ",");
> + name = strsep(&tok, "@");
> + if (tok) {
> + start = memparse(tok, &tok);
> + if (!start) {
> + pr_warn("Tracing: Invalid boot instance address for %s\n",
> + name);
> + continue;
> + }
> + }
>
> - if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
> - do_allocate_snapshot(tok);
> + if (start) {
> + if (*tok != ':') {
> + pr_warn("Tracing: No size specified for instance %s\n", name);
> + continue;
> + }
> + tok++;
> + size = memparse(tok, &tok);
> + if (!size) {
> + pr_warn("Tracing: Invalid boot instance size for %s\n",
> + name);
> + continue;
> + }
> + addr = map_pages(start, size);
> + if (addr) {
> + pr_info("Tracing: mapped boot instance %s at physical memory 0x%lx of size 0x%lx\n",
> + name, start, size);
> + } else {
> + pr_warn("Tracing: Failed to map boot instance %s\n", name);
> + continue;
> + }
> + } else {
> + /* Only non mapped buffers have snapshot buffers */
> + if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
> + do_allocate_snapshot(tok);
> + }
>
> - tr = trace_array_get_by_name(tok, NULL);
> + tr = trace_array_create_systems(name, NULL, addr, size);
> if (!tr) {
> - pr_warn("Failed to create instance buffer %s\n", curr_str);
> + pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
> continue;
> }
> - /* Allow user space to delete it */
> - trace_array_put(tr);
> +
> + /* Only allow non mapped buffers to be deleted */
> + if (!start)
> + trace_array_put(tr);
>
> while ((tok = strsep(&curr_str, ","))) {
> early_enable_events(tr, tok, true);