Here's an early RFC for a patch series on improving ftrace throughput with
ramoops. I am hoping to get some early comments so I'm releasing it in advance.
It is functional and tested.
Currently ramoops uses a single zone to store function traces. To make this
work, it has to uses locking to synchronize accesses to the buffers. Recently
the synchronization was completely moved from a cmpxchg mechanism to raw
spinlocks due to difficulties in using cmpxchg on uncached memory and also on
RAMs behind PCIe. [1] This change further dropped the peformance of ramoops
pstore backend by more than half in my tests.
This patch series improves the situation dramatically by around 280% from what
it is now by creating a ramoops persistent zone for each CPU and avoiding use of
locking altogether for ftrace. At init time, the persistent zones are then
merged together.
Here are some tests to show the improvements. Tested using a qemu quad core
x86_64 instance with -mem-path to persist the guest RAM to a file. I measured
avergage throughput of dd over 30 seconds:
dd if=/dev/zero | pv | dd of=/dev/null
Without this patch series: 24MB/s
With per-cpu buffers and counter increment: 91.5 MB/s (improvement by ~ 281%)
with per-cpu buffers and trace_clock: 51.9 MB/s
Some more considerations:
1. Inorder to do the merge of the individual buffers, I am using racy counters
since I didn't want to sacrifice throughput for perfect time stamps.
trace_clock() for timestamps although did the job but was almost half the
throughput of using counter based timestamp.
2. Since the patches divide the available ftrace persistent space by the number
of CPUs, lesser space will now be available per-CPU however the user is free to
disable per CPU behavior and revert to the old behavior by specifying
PSTORE_PER_CPU flag. Its a space vs performance trade-off so if user has
enough space and not a lot of CPUs, then using per-CPU persistent buffers make
sense for better performance.
3. Without using any counters or timestamps, the improvement is even more
(~140MB/s) but the buffers cannot be merged.
[1] https://lkml.org/lkml/2016/9/8/375
Joel Fernandes (7):
pstore: Make spinlock per zone instead of global
pstore: locking: dont lock unless caller asks to
pstore: Remove case of PSTORE_TYPE_PMSG write using deprecated
function
pstore: Make ramoops_init_przs generic for other prz arrays
ramoops: Split ftrace buffer space into per-CPU zones
pstore: Add support to store timestamp counter in ftrace records
pstore: Merge per-CPU ftrace zones into one zone for output
fs/pstore/ftrace.c | 3 +
fs/pstore/inode.c | 7 +-
fs/pstore/internal.h | 34 -------
fs/pstore/ram.c | 234 +++++++++++++++++++++++++++++++++++----------
fs/pstore/ram_core.c | 30 +++---
include/linux/pstore.h | 69 +++++++++++++
include/linux/pstore_ram.h | 6 +-
7 files changed, 280 insertions(+), 103 deletions(-)
--
2.7.4
Currently pstore has a global spinlock for all zones. Since the zones are
independent and modify different areas of memory, there's no need to have
a global lock, make the lock per-zone to protect the respective zone.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram_core.c | 11 +++++------
include/linux/pstore_ram.h | 1 +
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 3975dee..cb92055 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -48,8 +48,6 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
return atomic_read(&prz->buffer->start);
}
-static DEFINE_RAW_SPINLOCK(buffer_lock);
-
/* increase and wrap the start pointer, returning the old value */
static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
{
@@ -57,7 +55,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
int new;
unsigned long flags;
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->start);
new = old + a;
@@ -65,7 +63,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
new -= prz->buffer_size;
atomic_set(&prz->buffer->start, new);
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
return old;
}
@@ -77,7 +75,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
size_t new;
unsigned long flags;
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->size);
if (old == prz->buffer_size)
@@ -89,7 +87,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
atomic_set(&prz->buffer->size, new);
exit:
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
}
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -493,6 +491,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
prz->buffer->sig = sig;
persistent_ram_zap(prz);
+ prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
return 0;
}
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index c668c86..244d242 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -40,6 +40,7 @@ struct persistent_ram_zone {
void *vaddr;
struct persistent_ram_buffer *buffer;
size_t buffer_size;
+ raw_spinlock_t buffer_lock;
/* ECC correction */
char *par_buffer;
--
2.7.4
In preparation of not locking at all for certain buffers depending on if
there's contention, make locking optional depending if caller requested it.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram.c | 10 +++++-----
fs/pstore/ram_core.c | 27 ++++++++++++++++-----------
include/linux/pstore_ram.h | 2 +-
3 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index f1219af..751197d 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -260,7 +260,7 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
compressed ? 'C' : 'D');
WARN_ON_ONCE(!hdr);
len = hdr ? strlen(hdr) : 0;
- persistent_ram_write(prz, hdr, len);
+ persistent_ram_write(prz, hdr, len, 1);
kfree(hdr);
return len;
@@ -280,17 +280,17 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
if (type == PSTORE_TYPE_CONSOLE) {
if (!cxt->cprz)
return -ENOMEM;
- persistent_ram_write(cxt->cprz, buf, size);
+ persistent_ram_write(cxt->cprz, buf, size, 1);
return 0;
} else if (type == PSTORE_TYPE_FTRACE) {
if (!cxt->fprz)
return -ENOMEM;
- persistent_ram_write(cxt->fprz, buf, size);
+ persistent_ram_write(cxt->fprz, buf, size, 1);
return 0;
} else if (type == PSTORE_TYPE_PMSG) {
if (!cxt->mprz)
return -ENOMEM;
- persistent_ram_write(cxt->mprz, buf, size);
+ persistent_ram_write(cxt->mprz, buf, size, 1);
return 0;
}
@@ -324,7 +324,7 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
hlen = ramoops_write_kmsg_hdr(prz, compressed);
if (size + hlen > prz->buffer_size)
size = prz->buffer_size - hlen;
- persistent_ram_write(prz, buf, size);
+ persistent_ram_write(prz, buf, size, 1);
cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index cb92055..c4722f0 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -49,13 +49,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
}
/* increase and wrap the start pointer, returning the old value */
-static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
+static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a,
+ int lock)
{
int old;
int new;
unsigned long flags;
- raw_spin_lock_irqsave(&prz->buffer_lock, flags);
+ if (lock)
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->start);
new = old + a;
@@ -63,19 +65,21 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
new -= prz->buffer_size;
atomic_set(&prz->buffer->start, new);
- raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
+ if (lock)
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
return old;
}
/* increase the size counter until it hits the max size */
-static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
+static void buffer_size_add(struct persistent_ram_zone *prz, size_t a, int lock)
{
size_t old;
size_t new;
unsigned long flags;
- raw_spin_lock_irqsave(&prz->buffer_lock, flags);
+ if (lock)
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->size);
if (old == prz->buffer_size)
@@ -87,7 +91,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
atomic_set(&prz->buffer->size, new);
exit:
- raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
+ if (lock)
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
}
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
@@ -300,7 +305,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
}
int notrace persistent_ram_write(struct persistent_ram_zone *prz,
- const void *s, unsigned int count)
+ const void *s, unsigned int count, int lock)
{
int rem;
int c = count;
@@ -311,9 +316,9 @@ int notrace persistent_ram_write(struct persistent_ram_zone *prz,
c = prz->buffer_size;
}
- buffer_size_add(prz, c);
+ buffer_size_add(prz, c, lock);
- start = buffer_start_add(prz, c);
+ start = buffer_start_add(prz, c, lock);
rem = prz->buffer_size - start;
if (unlikely(rem < c)) {
@@ -342,9 +347,9 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
c = prz->buffer_size;
}
- buffer_size_add(prz, c);
+ buffer_size_add(prz, c, 1);
- start = buffer_start_add(prz, c);
+ start = buffer_start_add(prz, c, 1);
rem = prz->buffer_size - start;
if (unlikely(rem < c)) {
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 244d242..782af68 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -61,7 +61,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz);
void persistent_ram_zap(struct persistent_ram_zone *prz);
int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
- unsigned int count);
+ unsigned int count, int lock);
int persistent_ram_write_user(struct persistent_ram_zone *prz,
const void __user *s, unsigned int count);
--
2.7.4
PMSG now uses ramoops_pstore_write_buf_user instead of ramoops_pstore_write_buf
Remove the case where we check PSTORE_TYPE_PMSG case.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 751197d..519404c 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -287,11 +287,6 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
return -ENOMEM;
persistent_ram_write(cxt->fprz, buf, size, 1);
return 0;
- } else if (type == PSTORE_TYPE_PMSG) {
- if (!cxt->mprz)
- return -ENOMEM;
- persistent_ram_write(cxt->mprz, buf, size, 1);
- return 0;
}
if (type != PSTORE_TYPE_DMESG)
--
2.7.4
Up until this patch, each of the per CPU buffers appear as a separate
ftrace-ramoops-N file. In this patch we merge all the zones into one and
populate the ftrace-ramoops-0 file.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 85 insertions(+), 6 deletions(-)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 2e29d6b..cc37ec4 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -181,6 +181,53 @@ static bool prz_ok(struct persistent_ram_zone *prz)
persistent_ram_ecc_string(prz, NULL, 0));
}
+static void ftrace_old_log_combine(struct persistent_ram_zone *dest,
+ struct persistent_ram_zone *src)
+{
+ int dest_size, src_size, total, destoff, srcoff, i = 0, j = 0, k = 0;
+ void *mbuf;
+ struct pstore_ftrace_record *drec, *srec, *mrec;
+ int record_size = sizeof(struct pstore_ftrace_record);
+
+ destoff = dest->old_log_size % record_size;
+ dest_size = dest->old_log_size - destoff;
+
+ srcoff = src->old_log_size % record_size;
+ src_size = src->old_log_size - srcoff;
+
+ total = dest_size + src_size;
+ mbuf = kmalloc(total, GFP_KERNEL);
+
+ drec = (struct pstore_ftrace_record *)(dest->old_log + destoff);
+ srec = (struct pstore_ftrace_record *)(src->old_log + srcoff);
+ mrec = (struct pstore_ftrace_record *)(mbuf);
+
+ while (dest_size > 0 && src_size > 0) {
+ if (pstore_ftrace_read_timestamp(&drec[i]) <
+ pstore_ftrace_read_timestamp(&srec[j])) {
+ mrec[k++] = drec[i++];
+ dest_size -= record_size;
+ } else {
+ mrec[k++] = srec[j++];
+ src_size -= record_size;
+ }
+ }
+
+ while (dest_size > 0) {
+ mrec[k++] = drec[i++];
+ dest_size -= record_size;
+ }
+
+ while (src_size > 0) {
+ mrec[k++] = srec[j++];
+ src_size -= record_size;
+ }
+
+ kfree(dest->old_log);
+ dest->old_log = mbuf;
+ dest->old_log_size = total;
+}
+
static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *time,
char **buf, bool *compressed,
@@ -190,7 +237,7 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
ssize_t size;
struct ramoops_context *cxt = psi->data;
struct persistent_ram_zone *prz = NULL;
- int header_length = 0;
+ int header_length = 0, free_prz = 0;
/* Ramoops headers provide time stamps for PSTORE_TYPE_DMESG, but
* PSTORE_TYPE_CONSOLE and PSTORE_TYPE_FTRACE don't currently have
@@ -221,13 +268,39 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
1, id, type, PSTORE_TYPE_CONSOLE, 0);
if (!prz_ok(prz)) {
- int max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
- while (cxt->ftrace_read_cnt < max && !prz) {
+ if (!(cxt->flags & FTRACE_PER_CPU)) {
prz = ramoops_get_next_prz(cxt->fprzs,
- &cxt->ftrace_read_cnt, max, id, type,
+ &cxt->ftrace_read_cnt, 1, id, type,
PSTORE_TYPE_FTRACE, 0);
- if (!prz_ok(prz))
- continue;
+ } else {
+ /*
+ * Build a new dummy zone which combines all the
+ * per-cpu zones and accumulate the zone data and ecc
+ * info.
+ */
+ int max;
+ struct persistent_ram_zone *tmp_prz, *prz_next;
+
+ tmp_prz = kzalloc(sizeof(struct persistent_ram_zone),
+ GFP_KERNEL);
+ max = nr_cpu_ids;
+ while (cxt->ftrace_read_cnt < max) {
+ prz_next = ramoops_get_next_prz(cxt->fprzs,
+ &cxt->ftrace_read_cnt, max, id,
+ type, PSTORE_TYPE_FTRACE, 0);
+
+ if (!prz_ok(prz_next))
+ continue;
+
+ tmp_prz->ecc_info = prz_next->ecc_info;
+ tmp_prz->corrected_bytes +=
+ prz_next->corrected_bytes;
+ tmp_prz->bad_blocks += prz_next->bad_blocks;
+ ftrace_old_log_combine(tmp_prz, prz_next);
+ }
+ *id = 0;
+ prz = tmp_prz;
+ free_prz = 1;
}
}
@@ -247,6 +320,12 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
return -ENOMEM;
memcpy(*buf, (char *)persistent_ram_old(prz) + header_length, size);
+
+ if (free_prz) {
+ kfree(prz->old_log);
+ kfree(prz);
+ }
+
persistent_ram_ecc_string(prz, *buf + size, *ecc_notice_size + 1);
return size;
--
2.7.4
In preparation for merging the per CPU buffers into one buffer when we retrieve
the pstore ftrace data, we store the timestamp as a counter in the ftrace
pstore record. We store the CPU number as well if !PSTORE_CPU_IN_IP, in this
case we shift the counter and may lose ordering there but we preserve the same
record size. The timestamp counter is also racy, and not doing any locking or
synchronization here results in the benefit of lower overhead. Since we don't
care much here for exact ordering of function traces across CPUs, we don't
synchronize and may lose some counter updates but I'm ok with that.
Using trace_clock() results in much lower performance so avoid using it since
we don't want accuracy in timestamp and need a rough ordering to perform merge.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ftrace.c | 3 +++
fs/pstore/inode.c | 7 ++---
fs/pstore/internal.h | 34 -------------------------
include/linux/pstore.h | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 76 insertions(+), 37 deletions(-)
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index d488770..1c6cf86 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -27,6 +27,8 @@
#include <asm/barrier.h>
#include "internal.h"
+static u64 pstore_ftrace_stamp;
+
static void notrace pstore_ftrace_call(unsigned long ip,
unsigned long parent_ip,
struct ftrace_ops *op,
@@ -42,6 +44,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
rec.ip = ip;
rec.parent_ip = parent_ip;
+ pstore_ftrace_write_timestamp(&rec, pstore_ftrace_stamp++);
pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
0, sizeof(rec), psinfo);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ec9ddef..fcc5d1a 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -107,9 +107,10 @@ static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
struct pstore_ftrace_seq_data *data = v;
struct pstore_ftrace_record *rec = (void *)(ps->data + data->off);
- seq_printf(s, "%d %08lx %08lx %pf <- %pF\n",
- pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip,
- (void *)rec->ip, (void *)rec->parent_ip);
+ seq_printf(s, "CPU:%d ts:%llu %08lx %08lx %pf <- %pF\n",
+ pstore_ftrace_decode_cpu(rec), pstore_ftrace_read_timestamp(rec),
+ rec->ip, rec->parent_ip, (void *)rec->ip,
+ (void *)rec->parent_ip);
return 0;
}
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index e38a22b..da416e6 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -5,40 +5,6 @@
#include <linux/time.h>
#include <linux/pstore.h>
-#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
-#define PSTORE_CPU_IN_IP 0x1
-#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
-#define PSTORE_CPU_IN_IP 0x3
-#endif
-
-struct pstore_ftrace_record {
- unsigned long ip;
- unsigned long parent_ip;
-#ifndef PSTORE_CPU_IN_IP
- unsigned int cpu;
-#endif
-};
-
-static inline void
-pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
-{
-#ifndef PSTORE_CPU_IN_IP
- rec->cpu = cpu;
-#else
- rec->ip |= cpu;
-#endif
-}
-
-static inline unsigned int
-pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
-{
-#ifndef PSTORE_CPU_IN_IP
- return rec->cpu;
-#else
- return rec->ip & PSTORE_CPU_IN_IP;
-#endif
-}
-
#ifdef CONFIG_PSTORE_FTRACE
extern void pstore_register_ftrace(void);
extern void pstore_unregister_ftrace(void);
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 92013cc..4e7fc3c 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -89,4 +89,73 @@ extern int pstore_register(struct pstore_info *);
extern void pstore_unregister(struct pstore_info *);
extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason);
+struct pstore_ftrace_record {
+ unsigned long ip;
+ unsigned long parent_ip;
+ u64 ts;
+};
+
+/*
+ * ftrace related stuff: Both backends and frontends need these so expose them
+ */
+
+#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
+#define PSTORE_CPU_IN_IP 0x1
+#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
+#define PSTORE_CPU_IN_IP 0x3
+#endif
+
+#define TS_CPU_SHIFT 8
+#define TS_CPU_MASK (BIT(TS_CPU_SHIFT) - 1)
+
+/*
+ * If CPU number can be stored in IP, store it there
+ * else store it in the time stamp.
+ */
+#ifdef PSTORE_CPU_IN_IP
+static inline void
+pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
+{
+ rec->ip |= cpu;
+}
+
+static inline unsigned int
+pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
+{
+ return rec->ip & PSTORE_CPU_IN_IP;
+}
+
+static inline u64 pstore_ftrace_read_timestamp(struct pstore_ftrace_record *rec)
+{
+ return rec->ts;
+}
+
+static inline void pstore_ftrace_write_timestamp(struct pstore_ftrace_record *rec, u64 val)
+{
+ rec->ts = val;
+}
+#else
+static inline void
+pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
+{
+ rec->ts &= ~(TS_CPU_MASK);
+ rec->ts |= cpu;
+}
+
+static inline unsigned int
+pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
+{
+ return rec->ts & TS_CPU_MASK;
+}
+
+static inline u64 pstore_ftrace_read_timestamp(struct pstore_ftrace_record *rec)
+{
+ return rec->ts >> TS_CPU_SHIFT;
+}
+
+static inline void pstore_ftrace_write_timestamp(struct pstore_ftrace_record *rec, u64 val)
+{
+ rec->ts = (rec->ts & TS_CPU_MASK) | (val << TS_CPU_SHIFT);
+}
+#endif
#endif /*_LINUX_PSTORE_H*/
--
2.7.4
If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
multiple zones depending on the number of CPUs.
This speeds up the performance of function tracing by about 280% in my tests as
we avoid the locking. The trade off being lesser space available per CPU. Let
the ramoops user decide which option they want based on pdata flag.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
include/linux/pstore_ram.h | 3 ++
2 files changed, 56 insertions(+), 17 deletions(-)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index a796f49..2e29d6b 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(ramoops_ecc,
struct ramoops_context {
struct persistent_ram_zone **przs;
struct persistent_ram_zone *cprz;
- struct persistent_ram_zone *fprz;
+ struct persistent_ram_zone **fprzs;
struct persistent_ram_zone *mprz;
phys_addr_t phys_addr;
unsigned long size;
@@ -97,6 +97,7 @@ struct ramoops_context {
size_t ftrace_size;
size_t pmsg_size;
int dump_oops;
+ int flags;
struct persistent_ram_ecc_info ecc_info;
unsigned int max_dump_cnt;
unsigned int dump_write_cnt;
@@ -219,9 +220,17 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
if (!prz_ok(prz))
prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt,
1, id, type, PSTORE_TYPE_CONSOLE, 0);
- if (!prz_ok(prz))
- prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt,
- 1, id, type, PSTORE_TYPE_FTRACE, 0);
+ if (!prz_ok(prz)) {
+ int max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
+ while (cxt->ftrace_read_cnt < max && !prz) {
+ prz = ramoops_get_next_prz(cxt->fprzs,
+ &cxt->ftrace_read_cnt, max, id, type,
+ PSTORE_TYPE_FTRACE, 0);
+ if (!prz_ok(prz))
+ continue;
+ }
+ }
+
if (!prz_ok(prz))
prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt,
1, id, type, PSTORE_TYPE_PMSG, 0);
@@ -283,9 +292,23 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
persistent_ram_write(cxt->cprz, buf, size, 1);
return 0;
} else if (type == PSTORE_TYPE_FTRACE) {
- if (!cxt->fprz)
+ int zonenum, lock;
+
+ if (!cxt->fprzs)
return -ENOMEM;
- persistent_ram_write(cxt->fprz, buf, size, 1);
+ /*
+ * If per-cpu buffers, don't lock. Otherwise there's only
+ * 1 zone for ftrace (zone 0) and all CPUs share it, so lock.
+ */
+ if (cxt->flags & FTRACE_PER_CPU) {
+ zonenum = smp_processor_id();
+ lock = 0;
+ } else {
+ zonenum = 0;
+ lock = 1;
+ }
+
+ persistent_ram_write(cxt->fprzs[zonenum], buf, size, lock);
return 0;
}
@@ -349,6 +372,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
{
struct ramoops_context *cxt = psi->data;
struct persistent_ram_zone *prz;
+ int max;
switch (type) {
case PSTORE_TYPE_DMESG:
@@ -360,7 +384,10 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count,
prz = cxt->cprz;
break;
case PSTORE_TYPE_FTRACE:
- prz = cxt->fprz;
+ max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1;
+ if (id >= max)
+ return -EINVAL;
+ prz = cxt->fprzs[id];
break;
case PSTORE_TYPE_PMSG:
prz = cxt->mprz;
@@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
- if (!cxt->przs)
- return;
+ /* Free dump PRZs */
+ if (cxt->przs) {
+ for (i = 0; i < cxt->max_dump_cnt; i++)
+ persistent_ram_free(cxt->przs[i]);
- for (i = 0; i < cxt->max_dump_cnt; i++)
- persistent_ram_free(cxt->przs[i]);
+ kfree(cxt->przs);
+ cxt->max_dump_cnt = 0;
+ }
- kfree(cxt->przs);
- cxt->max_dump_cnt = 0;
+ /* Free ftrace PRZs */
+ if (cxt->fprzs) {
+ for (i = 0; i < nr_cpu_ids; i++)
+ persistent_ram_free(cxt->przs[i]);
+ kfree(cxt->fprzs);
+ }
}
static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
@@ -615,6 +649,7 @@ static int ramoops_probe(struct platform_device *pdev)
cxt->ftrace_size = pdata->ftrace_size;
cxt->pmsg_size = pdata->pmsg_size;
cxt->dump_oops = pdata->dump_oops;
+ cxt->flags = pdata->flags;
cxt->ecc_info = pdata->ecc_info;
paddr = cxt->phys_addr;
@@ -630,8 +665,9 @@ static int ramoops_probe(struct platform_device *pdev)
if (err)
goto fail_init_cprz;
- err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size,
- LINUX_VERSION_CODE);
+ err = ramoops_init_przs(dev, cxt, &cxt->fprzs, &paddr, cxt->ftrace_size,
+ (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1,
+ LINUX_VERSION_CODE);
if (err)
goto fail_init_fprz;
@@ -695,7 +731,6 @@ fail_clear:
cxt->pstore.bufsize = 0;
persistent_ram_free(cxt->mprz);
fail_init_mprz:
- persistent_ram_free(cxt->fprz);
fail_init_fprz:
persistent_ram_free(cxt->cprz);
fail_init_cprz:
@@ -714,7 +749,6 @@ static int ramoops_remove(struct platform_device *pdev)
cxt->pstore.bufsize = 0;
persistent_ram_free(cxt->mprz);
- persistent_ram_free(cxt->fprz);
persistent_ram_free(cxt->cprz);
ramoops_free_przs(cxt);
@@ -756,6 +790,8 @@ static void ramoops_register_dummy(void)
dummy_data->ftrace_size = ramoops_ftrace_size;
dummy_data->pmsg_size = ramoops_pmsg_size;
dummy_data->dump_oops = dump_oops;
+ dummy_data->flags = FTRACE_PER_CPU;
+
/*
* For backwards compatibility ramoops.ecc=1 means 16 bytes ECC
* (using 1 byte for ECC isn't much of use anyway).
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 782af68..a30573b 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -78,6 +78,8 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
* @mem_address physical memory address to contain ramoops
*/
+#define FTRACE_PER_CPU BIT(0)
+
struct ramoops_platform_data {
unsigned long mem_size;
phys_addr_t mem_address;
@@ -87,6 +89,7 @@ struct ramoops_platform_data {
unsigned long ftrace_size;
unsigned long pmsg_size;
int dump_oops;
+ int flags;
struct persistent_ram_ecc_info ecc_info;
};
--
2.7.4
Currently ramoops_init_przs is hard wired only for panic dump zone array. In
preparation for the ftrace zone array (one zone per-cpu), make the function
more generic to be able to handle this case.
Add a new ramoops_init_dump_przs function and use the generic function for the
dump prz array.
Signed-off-by: Joel Fernandes <[email protected]>
---
fs/pstore/ram.c | 72 ++++++++++++++++++++++++++++++++++++---------------------
1 file changed, 46 insertions(+), 26 deletions(-)
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 519404c..a796f49 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -402,54 +402,74 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
}
static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
- phys_addr_t *paddr, size_t dump_mem_sz)
+ struct persistent_ram_zone ***przs,
+ phys_addr_t *paddr, size_t mem_sz,
+ unsigned int cnt, u32 sig)
{
int err = -ENOMEM;
int i;
+ size_t zone_sz;
+ struct persistent_ram_zone **prz_ar;
- if (!cxt->record_size)
- return 0;
-
- if (*paddr + dump_mem_sz - cxt->phys_addr > cxt->size) {
- dev_err(dev, "no room for dumps\n");
+ if (*paddr + mem_sz - cxt->phys_addr > cxt->size) {
+ dev_err(dev, "no room for mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n",
+ mem_sz, (unsigned long long)*paddr,
+ cxt->size, (unsigned long long)cxt->phys_addr);
return -ENOMEM;
}
- cxt->max_dump_cnt = dump_mem_sz / cxt->record_size;
- if (!cxt->max_dump_cnt)
+ zone_sz = mem_sz / cnt;
+ if (!zone_sz)
return -ENOMEM;
- cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt,
- GFP_KERNEL);
- if (!cxt->przs) {
- dev_err(dev, "failed to initialize a prz array for dumps\n");
- goto fail_mem;
+ prz_ar = kzalloc(sizeof(**przs) * cnt, GFP_KERNEL);
+ if (!prz_ar) {
+ dev_err(dev, "Failed to initialize prz array\n");
+ return -ENOMEM;
}
- for (i = 0; i < cxt->max_dump_cnt; i++) {
- cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
+ for (i = 0; i < cnt; i++) {
+ prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
&cxt->ecc_info,
cxt->memtype);
- if (IS_ERR(cxt->przs[i])) {
- err = PTR_ERR(cxt->przs[i]);
+ if (IS_ERR(prz_ar[i])) {
+ err = PTR_ERR(prz_ar[i]);
dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
cxt->record_size, (unsigned long long)*paddr, err);
while (i > 0) {
i--;
- persistent_ram_free(cxt->przs[i]);
+ persistent_ram_free(prz_ar[i]);
}
- goto fail_prz;
+ kfree(prz_ar);
+ return err;
}
- *paddr += cxt->record_size;
+ *paddr += zone_sz;
}
+ *przs = prz_ar;
return 0;
-fail_prz:
- kfree(cxt->przs);
-fail_mem:
- cxt->max_dump_cnt = 0;
- return err;
+}
+
+static int ramoops_init_dump_przs(struct device *dev,
+ struct ramoops_context *cxt,
+ phys_addr_t *paddr, size_t mem_sz)
+{
+ int ret;
+
+ if (!cxt->record_size)
+ return 0;
+
+ cxt->max_dump_cnt = mem_sz / cxt->record_size;
+ if (!cxt->max_dump_cnt)
+ return -ENOMEM;
+
+ ret = ramoops_init_przs(dev, cxt, &cxt->przs, paddr, mem_sz,
+ cxt->max_dump_cnt, 0);
+ if (ret)
+ cxt->max_dump_cnt = 0;
+
+ return ret;
}
static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
@@ -601,7 +621,7 @@ static int ramoops_probe(struct platform_device *pdev)
dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
- cxt->pmsg_size;
- err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz);
+ err = ramoops_init_dump_przs(dev, cxt, &paddr, dump_mem_sz);
if (err)
goto fail_out;
--
2.7.4
Apologies that not everyone got CC'd on the cover letter, like the
rest of the patch series:
https://lkml.org/lkml/2016/10/8/12
Also, this patch series is an RFC, I screwed up the subject line.
Thanks,
Joel
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>
> Currently pstore has a global spinlock for all zones. Since the zones are
> independent and modify different areas of memory, there's no need to have
> a global lock, make the lock per-zone to protect the respective zone.
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram_core.c | 11 +++++------
> include/linux/pstore_ram.h | 1 +
> 2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
> index 3975dee..cb92055 100644
> --- a/fs/pstore/ram_core.c
> +++ b/fs/pstore/ram_core.c
> @@ -48,8 +48,6 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
> return atomic_read(&prz->buffer->start);
> }
>
> -static DEFINE_RAW_SPINLOCK(buffer_lock);
> -
> /* increase and wrap the start pointer, returning the old value */
> static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> {
> @@ -57,7 +55,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> int new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&buffer_lock, flags);
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->start);
> new = old + a;
> @@ -65,7 +63,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> new -= prz->buffer_size;
> atomic_set(&prz->buffer->start, new);
>
> - raw_spin_unlock_irqrestore(&buffer_lock, flags);
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
>
> return old;
> }
> @@ -77,7 +75,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> size_t new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&buffer_lock, flags);
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->size);
> if (old == prz->buffer_size)
> @@ -89,7 +87,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> atomic_set(&prz->buffer->size, new);
>
> exit:
> - raw_spin_unlock_irqrestore(&buffer_lock, flags);
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
> }
>
> static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
> @@ -493,6 +491,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
>
> prz->buffer->sig = sig;
> persistent_ram_zap(prz);
> + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
>
> return 0;
> }
> diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
> index c668c86..244d242 100644
> --- a/include/linux/pstore_ram.h
> +++ b/include/linux/pstore_ram.h
> @@ -40,6 +40,7 @@ struct persistent_ram_zone {
> void *vaddr;
> struct persistent_ram_buffer *buffer;
> size_t buffer_size;
> + raw_spinlock_t buffer_lock;
>
> /* ECC correction */
> char *par_buffer;
> --
> 2.7.4
>
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
> multiple zones depending on the number of CPUs.
>
> This speeds up the performance of function tracing by about 280% in my tests as
> we avoid the locking. The trade off being lesser space available per CPU. Let
> the ramoops user decide which option they want based on pdata flag.
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
> include/linux/pstore_ram.h | 3 ++
> 2 files changed, 56 insertions(+), 17 deletions(-)
>
> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
[..]
> @@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
> {
> int i;
>
> - if (!cxt->przs)
> - return;
> + /* Free dump PRZs */
> + if (cxt->przs) {
> + for (i = 0; i < cxt->max_dump_cnt; i++)
> + persistent_ram_free(cxt->przs[i]);
>
> - for (i = 0; i < cxt->max_dump_cnt; i++)
> - persistent_ram_free(cxt->przs[i]);
> + kfree(cxt->przs);
> + cxt->max_dump_cnt = 0;
> + }
>
> - kfree(cxt->przs);
> - cxt->max_dump_cnt = 0;
> + /* Free ftrace PRZs */
> + if (cxt->fprzs) {
> + for (i = 0; i < nr_cpu_ids; i++)
> + persistent_ram_free(cxt->przs[i]);
I am supposed to free fprzs[i] here, instead of przs[i]. Also need to
check flag and free correct number of zones. Will fix for v2, sorry
about that.
Thanks,
Joel
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
> In preparation of not locking at all for certain buffers depending on if
> there's contention, make locking optional depending if caller requested it.
This should be a bool argument (or enum), with named values so it's
more readable. For example, these don't immediately tell me what the
argument does:
persistent_ram_write(cxt->cprz, buf, size, 1);
persistent_ram_write(cxt->cprz, buf, size, true);
But this does:
persistent_ram_write(cxt->cprz, buf, size, PSTORE_REQUIRE_LOCKING);
As part of this, can you document in the pstore structure which types
of front-ends require locking and if they don't, why?
-Kees
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram.c | 10 +++++-----
> fs/pstore/ram_core.c | 27 ++++++++++++++++-----------
> include/linux/pstore_ram.h | 2 +-
> 3 files changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> index f1219af..751197d 100644
> --- a/fs/pstore/ram.c
> +++ b/fs/pstore/ram.c
> @@ -260,7 +260,7 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz,
> compressed ? 'C' : 'D');
> WARN_ON_ONCE(!hdr);
> len = hdr ? strlen(hdr) : 0;
> - persistent_ram_write(prz, hdr, len);
> + persistent_ram_write(prz, hdr, len, 1);
> kfree(hdr);
>
> return len;
> @@ -280,17 +280,17 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
> if (type == PSTORE_TYPE_CONSOLE) {
> if (!cxt->cprz)
> return -ENOMEM;
> - persistent_ram_write(cxt->cprz, buf, size);
> + persistent_ram_write(cxt->cprz, buf, size, 1);
> return 0;
> } else if (type == PSTORE_TYPE_FTRACE) {
> if (!cxt->fprz)
> return -ENOMEM;
> - persistent_ram_write(cxt->fprz, buf, size);
> + persistent_ram_write(cxt->fprz, buf, size, 1);
> return 0;
> } else if (type == PSTORE_TYPE_PMSG) {
> if (!cxt->mprz)
> return -ENOMEM;
> - persistent_ram_write(cxt->mprz, buf, size);
> + persistent_ram_write(cxt->mprz, buf, size, 1);
> return 0;
> }
>
> @@ -324,7 +324,7 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
> hlen = ramoops_write_kmsg_hdr(prz, compressed);
> if (size + hlen > prz->buffer_size)
> size = prz->buffer_size - hlen;
> - persistent_ram_write(prz, buf, size);
> + persistent_ram_write(prz, buf, size, 1);
>
> cxt->dump_write_cnt = (cxt->dump_write_cnt + 1) % cxt->max_dump_cnt;
>
> diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
> index cb92055..c4722f0 100644
> --- a/fs/pstore/ram_core.c
> +++ b/fs/pstore/ram_core.c
> @@ -49,13 +49,15 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
> }
>
> /* increase and wrap the start pointer, returning the old value */
> -static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> +static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a,
> + int lock)
> {
> int old;
> int new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&prz->buffer_lock, flags);
> + if (lock)
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->start);
> new = old + a;
> @@ -63,19 +65,21 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> new -= prz->buffer_size;
> atomic_set(&prz->buffer->start, new);
>
> - raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
> + if (lock)
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
>
> return old;
> }
>
> /* increase the size counter until it hits the max size */
> -static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> +static void buffer_size_add(struct persistent_ram_zone *prz, size_t a, int lock)
> {
> size_t old;
> size_t new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&prz->buffer_lock, flags);
> + if (lock)
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->size);
> if (old == prz->buffer_size)
> @@ -87,7 +91,8 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> atomic_set(&prz->buffer->size, new);
>
> exit:
> - raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
> + if (lock)
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
> }
>
> static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
> @@ -300,7 +305,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
> }
>
> int notrace persistent_ram_write(struct persistent_ram_zone *prz,
> - const void *s, unsigned int count)
> + const void *s, unsigned int count, int lock)
> {
> int rem;
> int c = count;
> @@ -311,9 +316,9 @@ int notrace persistent_ram_write(struct persistent_ram_zone *prz,
> c = prz->buffer_size;
> }
>
> - buffer_size_add(prz, c);
> + buffer_size_add(prz, c, lock);
>
> - start = buffer_start_add(prz, c);
> + start = buffer_start_add(prz, c, lock);
>
> rem = prz->buffer_size - start;
> if (unlikely(rem < c)) {
> @@ -342,9 +347,9 @@ int notrace persistent_ram_write_user(struct persistent_ram_zone *prz,
> c = prz->buffer_size;
> }
>
> - buffer_size_add(prz, c);
> + buffer_size_add(prz, c, 1);
>
> - start = buffer_start_add(prz, c);
> + start = buffer_start_add(prz, c, 1);
>
> rem = prz->buffer_size - start;
> if (unlikely(rem < c)) {
> diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
> index 244d242..782af68 100644
> --- a/include/linux/pstore_ram.h
> +++ b/include/linux/pstore_ram.h
> @@ -61,7 +61,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz);
> void persistent_ram_zap(struct persistent_ram_zone *prz);
>
> int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
> - unsigned int count);
> + unsigned int count, int lock);
> int persistent_ram_write_user(struct persistent_ram_zone *prz,
> const void __user *s, unsigned int count);
>
> --
> 2.7.4
>
--
Kees Cook
Nexus Security
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
> Currently pstore has a global spinlock for all zones. Since the zones are
> independent and modify different areas of memory, there's no need to have
> a global lock, make the lock per-zone to protect the respective zone.
This seems fine to me, though I'd like the commit message to at least
hint at what's next, and why this change is needed, rather than just
the general organizational justification.
-Kees
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram_core.c | 11 +++++------
> include/linux/pstore_ram.h | 1 +
> 2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
> index 3975dee..cb92055 100644
> --- a/fs/pstore/ram_core.c
> +++ b/fs/pstore/ram_core.c
> @@ -48,8 +48,6 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
> return atomic_read(&prz->buffer->start);
> }
>
> -static DEFINE_RAW_SPINLOCK(buffer_lock);
> -
> /* increase and wrap the start pointer, returning the old value */
> static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> {
> @@ -57,7 +55,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> int new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&buffer_lock, flags);
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->start);
> new = old + a;
> @@ -65,7 +63,7 @@ static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
> new -= prz->buffer_size;
> atomic_set(&prz->buffer->start, new);
>
> - raw_spin_unlock_irqrestore(&buffer_lock, flags);
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
>
> return old;
> }
> @@ -77,7 +75,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> size_t new;
> unsigned long flags;
>
> - raw_spin_lock_irqsave(&buffer_lock, flags);
> + raw_spin_lock_irqsave(&prz->buffer_lock, flags);
>
> old = atomic_read(&prz->buffer->size);
> if (old == prz->buffer_size)
> @@ -89,7 +87,7 @@ static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
> atomic_set(&prz->buffer->size, new);
>
> exit:
> - raw_spin_unlock_irqrestore(&buffer_lock, flags);
> + raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
> }
>
> static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
> @@ -493,6 +491,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
>
> prz->buffer->sig = sig;
> persistent_ram_zap(prz);
> + prz->buffer_lock = __RAW_SPIN_LOCK_UNLOCKED(buffer_lock);
>
> return 0;
> }
> diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
> index c668c86..244d242 100644
> --- a/include/linux/pstore_ram.h
> +++ b/include/linux/pstore_ram.h
> @@ -40,6 +40,7 @@ struct persistent_ram_zone {
> void *vaddr;
> struct persistent_ram_buffer *buffer;
> size_t buffer_size;
> + raw_spinlock_t buffer_lock;
>
> /* ECC correction */
> char *par_buffer;
> --
> 2.7.4
>
--
Kees Cook
Nexus Security
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
> PMSG now uses ramoops_pstore_write_buf_user instead of ramoops_pstore_write_buf
> Remove the case where we check PSTORE_TYPE_PMSG case.
Ah yeah, good point. Can you actually improve this to add a
ratelimited WARN() to both _write_buf and write_buf_user when an
unhandled type is encountered?
-Kees
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram.c | 5 -----
> 1 file changed, 5 deletions(-)
>
> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> index 751197d..519404c 100644
> --- a/fs/pstore/ram.c
> +++ b/fs/pstore/ram.c
> @@ -287,11 +287,6 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type,
> return -ENOMEM;
> persistent_ram_write(cxt->fprz, buf, size, 1);
> return 0;
> - } else if (type == PSTORE_TYPE_PMSG) {
> - if (!cxt->mprz)
> - return -ENOMEM;
> - persistent_ram_write(cxt->mprz, buf, size, 1);
> - return 0;
> }
>
> if (type != PSTORE_TYPE_DMESG)
> --
> 2.7.4
>
--
Kees Cook
Nexus Security
On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
> Currently ramoops_init_przs is hard wired only for panic dump zone array. In
> preparation for the ftrace zone array (one zone per-cpu), make the function
> more generic to be able to handle this case.
>
> Add a new ramoops_init_dump_przs function and use the generic function for the
> dump prz array.
Something similar was suggested in the recent "multiple pmsg" series
that was posted too. Making this generic is good, though see my nit
below...
>
> Signed-off-by: Joel Fernandes <[email protected]>
> ---
> fs/pstore/ram.c | 72 ++++++++++++++++++++++++++++++++++++---------------------
> 1 file changed, 46 insertions(+), 26 deletions(-)
>
> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> index 519404c..a796f49 100644
> --- a/fs/pstore/ram.c
> +++ b/fs/pstore/ram.c
> @@ -402,54 +402,74 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
> }
>
> static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
> - phys_addr_t *paddr, size_t dump_mem_sz)
> + struct persistent_ram_zone ***przs,
> + phys_addr_t *paddr, size_t mem_sz,
> + unsigned int cnt, u32 sig)
> {
> int err = -ENOMEM;
> int i;
> + size_t zone_sz;
> + struct persistent_ram_zone **prz_ar;
>
> - if (!cxt->record_size)
> - return 0;
> -
> - if (*paddr + dump_mem_sz - cxt->phys_addr > cxt->size) {
> - dev_err(dev, "no room for dumps\n");
> + if (*paddr + mem_sz - cxt->phys_addr > cxt->size) {
> + dev_err(dev, "no room for mem region (0x%zx@0x%llx) in (0x%lx@0x%llx)\n",
> + mem_sz, (unsigned long long)*paddr,
> + cxt->size, (unsigned long long)cxt->phys_addr);
> return -ENOMEM;
> }
>
> - cxt->max_dump_cnt = dump_mem_sz / cxt->record_size;
> - if (!cxt->max_dump_cnt)
> + zone_sz = mem_sz / cnt;
> + if (!zone_sz)
> return -ENOMEM;
>
> - cxt->przs = kzalloc(sizeof(*cxt->przs) * cxt->max_dump_cnt,
> - GFP_KERNEL);
> - if (!cxt->przs) {
> - dev_err(dev, "failed to initialize a prz array for dumps\n");
> - goto fail_mem;
> + prz_ar = kzalloc(sizeof(**przs) * cnt, GFP_KERNEL);
While I realize this is mainly a refactoring, as long as we're in
here, can you make this kcalloc instead?
> + if (!prz_ar) {
> + dev_err(dev, "Failed to initialize prz array\n");
> + return -ENOMEM;
> }
>
> - for (i = 0; i < cxt->max_dump_cnt; i++) {
> - cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
> + for (i = 0; i < cnt; i++) {
> + prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
> &cxt->ecc_info,
> cxt->memtype);
> - if (IS_ERR(cxt->przs[i])) {
> - err = PTR_ERR(cxt->przs[i]);
> + if (IS_ERR(prz_ar[i])) {
> + err = PTR_ERR(prz_ar[i]);
> dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
> cxt->record_size, (unsigned long long)*paddr, err);
>
> while (i > 0) {
> i--;
> - persistent_ram_free(cxt->przs[i]);
> + persistent_ram_free(prz_ar[i]);
> }
> - goto fail_prz;
> + kfree(prz_ar);
> + return err;
> }
> - *paddr += cxt->record_size;
> + *paddr += zone_sz;
> }
>
> + *przs = prz_ar;
> return 0;
> -fail_prz:
> - kfree(cxt->przs);
> -fail_mem:
> - cxt->max_dump_cnt = 0;
> - return err;
> +}
> +
> +static int ramoops_init_dump_przs(struct device *dev,
> + struct ramoops_context *cxt,
> + phys_addr_t *paddr, size_t mem_sz)
> +{
> + int ret;
> +
> + if (!cxt->record_size)
> + return 0;
> +
> + cxt->max_dump_cnt = mem_sz / cxt->record_size;
> + if (!cxt->max_dump_cnt)
> + return -ENOMEM;
> +
> + ret = ramoops_init_przs(dev, cxt, &cxt->przs, paddr, mem_sz,
> + cxt->max_dump_cnt, 0);
> + if (ret)
> + cxt->max_dump_cnt = 0;
> +
> + return ret;
> }
>
> static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
> @@ -601,7 +621,7 @@ static int ramoops_probe(struct platform_device *pdev)
>
> dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size
> - cxt->pmsg_size;
> - err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz);
> + err = ramoops_init_dump_przs(dev, cxt, &paddr, dump_mem_sz);
> if (err)
> goto fail_out;
>
> --
> 2.7.4
>
-Kees
--
Kees Cook
Nexus Security
On Sun, Oct 9, 2016 at 10:15 AM, Joel Fernandes <[email protected]> wrote:
> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
>> multiple zones depending on the number of CPUs.
>>
>> This speeds up the performance of function tracing by about 280% in my tests as
>> we avoid the locking. The trade off being lesser space available per CPU. Let
>> the ramoops user decide which option they want based on pdata flag.
>>
>> Signed-off-by: Joel Fernandes <[email protected]>
>> ---
>> fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
>> include/linux/pstore_ram.h | 3 ++
>> 2 files changed, 56 insertions(+), 17 deletions(-)
>>
>> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
> [..]
>> @@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
>> {
>> int i;
>>
>> - if (!cxt->przs)
>> - return;
>> + /* Free dump PRZs */
>> + if (cxt->przs) {
>> + for (i = 0; i < cxt->max_dump_cnt; i++)
>> + persistent_ram_free(cxt->przs[i]);
>>
>> - for (i = 0; i < cxt->max_dump_cnt; i++)
>> - persistent_ram_free(cxt->przs[i]);
>> + kfree(cxt->przs);
>> + cxt->max_dump_cnt = 0;
>> + }
>>
>> - kfree(cxt->przs);
>> - cxt->max_dump_cnt = 0;
>> + /* Free ftrace PRZs */
>> + if (cxt->fprzs) {
>> + for (i = 0; i < nr_cpu_ids; i++)
>> + persistent_ram_free(cxt->przs[i]);
>
> I am supposed to free fprzs[i] here, instead of przs[i]. Also need to
> check flag and free correct number of zones. Will fix for v2, sorry
> about that.
I think the cpu id needs to be bounds-checked against the size of the
allocation. In theory, CPU hot-plug could grow the number of CPUs
after pstore is initialized.
-Kees
--
Kees Cook
Nexus Security
On Mon, Oct 10, 2016 at 4:59 PM, Kees Cook <[email protected]> wrote:
> On Sun, Oct 9, 2016 at 10:15 AM, Joel Fernandes <[email protected]> wrote:
>> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>>> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
>>> multiple zones depending on the number of CPUs.
>>>
>>> This speeds up the performance of function tracing by about 280% in my tests as
>>> we avoid the locking. The trade off being lesser space available per CPU. Let
>>> the ramoops user decide which option they want based on pdata flag.
>>>
>>> Signed-off-by: Joel Fernandes <[email protected]>
>>> ---
>>> fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
>>> include/linux/pstore_ram.h | 3 ++
>>> 2 files changed, 56 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
>> [..]
>>> @@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
>>> {
>>> int i;
>>>
>>> - if (!cxt->przs)
>>> - return;
>>> + /* Free dump PRZs */
>>> + if (cxt->przs) {
>>> + for (i = 0; i < cxt->max_dump_cnt; i++)
>>> + persistent_ram_free(cxt->przs[i]);
>>>
>>> - for (i = 0; i < cxt->max_dump_cnt; i++)
>>> - persistent_ram_free(cxt->przs[i]);
>>> + kfree(cxt->przs);
>>> + cxt->max_dump_cnt = 0;
>>> + }
>>>
>>> - kfree(cxt->przs);
>>> - cxt->max_dump_cnt = 0;
>>> + /* Free ftrace PRZs */
>>> + if (cxt->fprzs) {
>>> + for (i = 0; i < nr_cpu_ids; i++)
>>> + persistent_ram_free(cxt->przs[i]);
>>
>> I am supposed to free fprzs[i] here, instead of przs[i]. Also need to
>> check flag and free correct number of zones. Will fix for v2, sorry
>> about that.
>
> I think the cpu id needs to be bounds-checked against the size of the
> allocation. In theory, CPU hot-plug could grow the number of CPUs
> after pstore is initialized.
Oh, also, this new flag needs to be explosed as a module parameter and
Device Tree flag as well, so it's available for more than just the
dummy driver. :)
-Kees
--
Kees Cook
Nexus Security
On Fri, 7 Oct 2016 22:28:27 -0700
Joel Fernandes <[email protected]> wrote:
> Here's an early RFC for a patch series on improving ftrace throughput with
> ramoops. I am hoping to get some early comments so I'm releasing it in advance.
> It is functional and tested.
>
> Currently ramoops uses a single zone to store function traces. To make this
> work, it has to uses locking to synchronize accesses to the buffers. Recently
> the synchronization was completely moved from a cmpxchg mechanism to raw
> spinlocks due to difficulties in using cmpxchg on uncached memory and also on
> RAMs behind PCIe. [1] This change further dropped the peformance of ramoops
> pstore backend by more than half in my tests.
>
> This patch series improves the situation dramatically by around 280% from what
> it is now by creating a ramoops persistent zone for each CPU and avoiding use of
> locking altogether for ftrace. At init time, the persistent zones are then
> merged together.
>
> Here are some tests to show the improvements. Tested using a qemu quad core
> x86_64 instance with -mem-path to persist the guest RAM to a file. I measured
> avergage throughput of dd over 30 seconds:
>
> dd if=/dev/zero | pv | dd of=/dev/null
>
> Without this patch series: 24MB/s
> With per-cpu buffers and counter increment: 91.5 MB/s (improvement by ~ 281%)
> with per-cpu buffers and trace_clock: 51.9 MB/s
>
> Some more considerations:
> 1. Inorder to do the merge of the individual buffers, I am using racy counters
> since I didn't want to sacrifice throughput for perfect time stamps.
> trace_clock() for timestamps although did the job but was almost half the
> throughput of using counter based timestamp.
>
> 2. Since the patches divide the available ftrace persistent space by the number
> of CPUs, lesser space will now be available per-CPU however the user is free to
> disable per CPU behavior and revert to the old behavior by specifying
> PSTORE_PER_CPU flag. Its a space vs performance trade-off so if user has
> enough space and not a lot of CPUs, then using per-CPU persistent buffers make
> sense for better performance.
>
> 3. Without using any counters or timestamps, the improvement is even more
> (~140MB/s) but the buffers cannot be merged.
>
> [1] https://lkml.org/lkml/2016/9/8/375
>From a tracing point of view, I have no qualms with this patch set.
-- Steve
>
> Joel Fernandes (7):
> pstore: Make spinlock per zone instead of global
> pstore: locking: dont lock unless caller asks to
> pstore: Remove case of PSTORE_TYPE_PMSG write using deprecated
> function
> pstore: Make ramoops_init_przs generic for other prz arrays
> ramoops: Split ftrace buffer space into per-CPU zones
> pstore: Add support to store timestamp counter in ftrace records
> pstore: Merge per-CPU ftrace zones into one zone for output
>
> fs/pstore/ftrace.c | 3 +
> fs/pstore/inode.c | 7 +-
> fs/pstore/internal.h | 34 -------
> fs/pstore/ram.c | 234 +++++++++++++++++++++++++++++++++++----------
> fs/pstore/ram_core.c | 30 +++---
> include/linux/pstore.h | 69 +++++++++++++
> include/linux/pstore_ram.h | 6 +-
> 7 files changed, 280 insertions(+), 103 deletions(-)
>
On Mon, Oct 10, 2016 at 4:52 PM, Kees Cook <[email protected]> wrote:
> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>> PMSG now uses ramoops_pstore_write_buf_user instead of ramoops_pstore_write_buf
>> Remove the case where we check PSTORE_TYPE_PMSG case.
>
> Ah yeah, good point. Can you actually improve this to add a
> ratelimited WARN() to both _write_buf and write_buf_user when an
> unhandled type is encountered?
Sure, I'll add that. I'll also use the kcalloc as you suggest in the
other thread and add module parameter, DT entry.
Thanks,
Joel
On Mon, Oct 10, 2016 at 4:48 PM, Kees Cook <[email protected]> wrote:
> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>> In preparation of not locking at all for certain buffers depending on if
>> there's contention, make locking optional depending if caller requested it.
>
> This should be a bool argument (or enum), with named values so it's
> more readable. For example, these don't immediately tell me what the
> argument does:
>
> persistent_ram_write(cxt->cprz, buf, size, 1);
> persistent_ram_write(cxt->cprz, buf, size, true);
>
> But this does:
>
> persistent_ram_write(cxt->cprz, buf, size, PSTORE_REQUIRE_LOCKING);
>
> As part of this, can you document in the pstore structure which types
> of front-ends require locking and if they don't, why?
Sure, I will make it more descriptive as you suggested.
Thanks,
Joel
Hi Kees,
On Mon, Oct 10, 2016 at 4:59 PM, Kees Cook <[email protected]> wrote:
> On Sun, Oct 9, 2016 at 10:15 AM, Joel Fernandes <[email protected]> wrote:
>> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>>> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
>>> multiple zones depending on the number of CPUs.
>>>
>>> This speeds up the performance of function tracing by about 280% in my tests as
>>> we avoid the locking. The trade off being lesser space available per CPU. Let
>>> the ramoops user decide which option they want based on pdata flag.
>>>
>>> Signed-off-by: Joel Fernandes <[email protected]>
>>> ---
>>> fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
>>> include/linux/pstore_ram.h | 3 ++
>>> 2 files changed, 56 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
>> [..]
>>> @@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
>>> {
>>> int i;
>>>
>>> - if (!cxt->przs)
>>> - return;
>>> + /* Free dump PRZs */
>>> + if (cxt->przs) {
>>> + for (i = 0; i < cxt->max_dump_cnt; i++)
>>> + persistent_ram_free(cxt->przs[i]);
>>>
>>> - for (i = 0; i < cxt->max_dump_cnt; i++)
>>> - persistent_ram_free(cxt->przs[i]);
>>> + kfree(cxt->przs);
>>> + cxt->max_dump_cnt = 0;
>>> + }
>>>
>>> - kfree(cxt->przs);
>>> - cxt->max_dump_cnt = 0;
>>> + /* Free ftrace PRZs */
>>> + if (cxt->fprzs) {
>>> + for (i = 0; i < nr_cpu_ids; i++)
>>> + persistent_ram_free(cxt->przs[i]);
>>
>> I am supposed to free fprzs[i] here, instead of przs[i]. Also need to
>> check flag and free correct number of zones. Will fix for v2, sorry
>> about that.
>
> I think the cpu id needs to be bounds-checked against the size of the
> allocation. In theory, CPU hot-plug could grow the number of CPUs
> after pstore is initialized.
nr_cpu_ids is fixed to the number of possible CPUs regardless of if
hotplug is being used or not. I did a hotplug test as well to confirm
this. So if I boot on 4 CPU machine with only 2 CPUs online, then
nr_cpu_ids is 4.
Thanks,
Joel
On Sun, Oct 16, 2016 at 10:40 AM, Joel Fernandes <[email protected]> wrote:
> Hi Kees,
>
> On Mon, Oct 10, 2016 at 4:59 PM, Kees Cook <[email protected]> wrote:
>> On Sun, Oct 9, 2016 at 10:15 AM, Joel Fernandes <[email protected]> wrote:
>>> On Fri, Oct 7, 2016 at 10:28 PM, Joel Fernandes <[email protected]> wrote:
>>>> If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into
>>>> multiple zones depending on the number of CPUs.
>>>>
>>>> This speeds up the performance of function tracing by about 280% in my tests as
>>>> we avoid the locking. The trade off being lesser space available per CPU. Let
>>>> the ramoops user decide which option they want based on pdata flag.
>>>>
>>>> Signed-off-by: Joel Fernandes <[email protected]>
>>>> ---
>>>> fs/pstore/ram.c | 70 +++++++++++++++++++++++++++++++++++-----------
>>>> include/linux/pstore_ram.h | 3 ++
>>>> 2 files changed, 56 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
>>> [..]
>>>> @@ -391,14 +418,21 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
>>>> {
>>>> int i;
>>>>
>>>> - if (!cxt->przs)
>>>> - return;
>>>> + /* Free dump PRZs */
>>>> + if (cxt->przs) {
>>>> + for (i = 0; i < cxt->max_dump_cnt; i++)
>>>> + persistent_ram_free(cxt->przs[i]);
>>>>
>>>> - for (i = 0; i < cxt->max_dump_cnt; i++)
>>>> - persistent_ram_free(cxt->przs[i]);
>>>> + kfree(cxt->przs);
>>>> + cxt->max_dump_cnt = 0;
>>>> + }
>>>>
>>>> - kfree(cxt->przs);
>>>> - cxt->max_dump_cnt = 0;
>>>> + /* Free ftrace PRZs */
>>>> + if (cxt->fprzs) {
>>>> + for (i = 0; i < nr_cpu_ids; i++)
>>>> + persistent_ram_free(cxt->przs[i]);
>>>
>>> I am supposed to free fprzs[i] here, instead of przs[i]. Also need to
>>> check flag and free correct number of zones. Will fix for v2, sorry
>>> about that.
>>
>> I think the cpu id needs to be bounds-checked against the size of the
>> allocation. In theory, CPU hot-plug could grow the number of CPUs
>> after pstore is initialized.
>
> nr_cpu_ids is fixed to the number of possible CPUs regardless of if
> hotplug is being used or not. I did a hotplug test as well to confirm
> this. So if I boot on 4 CPU machine with only 2 CPUs online, then
> nr_cpu_ids is 4.
Ah-ha, okay. I wasn't sure if there was some way to grow nr_cpu_ids after boot.
Thanks for checking!
-Kees
--
Kees Cook
Nexus Security