Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S933643AbcJTHfG (ORCPT ); Thu, 20 Oct 2016 03:35:06 -0400 Received: from mail-pf0-f176.google.com ([209.85.192.176]:33373 "EHLO mail-pf0-f176.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932859AbcJTHeZ (ORCPT ); Thu, 20 Oct 2016 03:34:25 -0400 From: Joel Fernandes To: linux-kernel@vger.kernel.org Cc: Kees Cook , Joel Fernandes , Anton Vorontsov , Colin Cross , Tony Luck Subject: [PATCH v2 5/7] ramoops: Split ftrace buffer space into per-CPU zones Date: Thu, 20 Oct 2016 00:34:04 -0700 Message-Id: <1476948846-15006-6-git-send-email-joelaf@google.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1476948846-15006-1-git-send-email-joelaf@google.com> References: <1476948846-15006-1-git-send-email-joelaf@google.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 6266 Lines: 199 If FTRACE_PER_CPU flag is passed to ramoops pdata, split the space into multiple zones depending on the number of CPUs. This speeds up the performance of function tracing by about 280% in my tests as we avoid the locking. The trade off being lesser space available per CPU. Let the ramoops user decide which option they want based on pdata flag. Signed-off-by: Joel Fernandes --- fs/pstore/ram.c | 72 +++++++++++++++++++++++++++++++++++----------- include/linux/pstore_ram.h | 3 ++ 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 5caa512..5e96f78 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -87,7 +87,7 @@ MODULE_PARM_DESC(ramoops_ecc, struct ramoops_context { struct persistent_ram_zone **przs; struct persistent_ram_zone *cprz; - struct persistent_ram_zone *fprz; + struct persistent_ram_zone **fprzs; struct persistent_ram_zone *mprz; phys_addr_t phys_addr; unsigned long size; @@ -97,6 +97,7 @@ struct ramoops_context { size_t ftrace_size; size_t pmsg_size; int dump_oops; + int flags; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; unsigned int dump_write_cnt; @@ -219,9 +220,17 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 1, id, type, PSTORE_TYPE_CONSOLE, 0); - if (!prz_ok(prz)) - prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, - 1, id, type, PSTORE_TYPE_FTRACE, 0); + if (!prz_ok(prz)) { + int max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1; + while (cxt->ftrace_read_cnt < max && !prz) { + prz = ramoops_get_next_prz(cxt->fprzs, + &cxt->ftrace_read_cnt, max, id, type, + PSTORE_TYPE_FTRACE, 0); + if (!prz_ok(prz)) + continue; + } + } + if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, 1, id, type, PSTORE_TYPE_PMSG, 0); @@ -283,9 +292,23 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, persistent_ram_write(cxt->cprz, buf, size, PSTORE_RAM_LOCK); return 0; } else if (type == PSTORE_TYPE_FTRACE) { - if (!cxt->fprz) + int zonenum, lock; + + if (!cxt->fprzs) return -ENOMEM; - persistent_ram_write(cxt->fprz, buf, size, PSTORE_RAM_LOCK); + /* + * If per-cpu buffers, don't lock. Otherwise there's only + * 1 zone for ftrace (zone 0) and all CPUs share it, so lock. + */ + if (cxt->flags & FTRACE_PER_CPU) { + zonenum = smp_processor_id(); + lock = PSTORE_RAM_NOLOCK; + } else { + zonenum = 0; + lock = PSTORE_RAM_LOCK; + } + + persistent_ram_write(cxt->fprzs[zonenum], buf, size, lock); return 0; } else if (type == PSTORE_TYPE_PMSG) { pr_warn_ratelimited("ramoops: warning: PMSG shouldn't call %s\n", @@ -352,6 +375,7 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, { struct ramoops_context *cxt = psi->data; struct persistent_ram_zone *prz; + int max; switch (type) { case PSTORE_TYPE_DMESG: @@ -363,7 +387,10 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, prz = cxt->cprz; break; case PSTORE_TYPE_FTRACE: - prz = cxt->fprz; + max = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1; + if (id >= max) + return -EINVAL; + prz = cxt->fprzs[id]; break; case PSTORE_TYPE_PMSG: prz = cxt->mprz; @@ -394,14 +421,23 @@ static void ramoops_free_przs(struct ramoops_context *cxt) { int i; - if (!cxt->przs) - return; + /* Free dump PRZs */ + if (cxt->przs) { + for (i = 0; i < cxt->max_dump_cnt; i++) + persistent_ram_free(cxt->przs[i]); - for (i = 0; i < cxt->max_dump_cnt; i++) - persistent_ram_free(cxt->przs[i]); + kfree(cxt->przs); + cxt->max_dump_cnt = 0; + } - kfree(cxt->przs); - cxt->max_dump_cnt = 0; + /* Free ftrace PRZs */ + if (cxt->fprzs) { + int nr = (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1; + + for (i = 0; i < nr; i++) + persistent_ram_free(cxt->fprzs[i]); + kfree(cxt->fprzs); + } } static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, @@ -618,6 +654,7 @@ static int ramoops_probe(struct platform_device *pdev) cxt->ftrace_size = pdata->ftrace_size; cxt->pmsg_size = pdata->pmsg_size; cxt->dump_oops = pdata->dump_oops; + cxt->flags = pdata->flags; cxt->ecc_info = pdata->ecc_info; paddr = cxt->phys_addr; @@ -633,8 +670,9 @@ static int ramoops_probe(struct platform_device *pdev) if (err) goto fail_init_cprz; - err = ramoops_init_prz(dev, cxt, &cxt->fprz, &paddr, cxt->ftrace_size, - LINUX_VERSION_CODE); + err = ramoops_init_przs(dev, cxt, &cxt->fprzs, &paddr, cxt->ftrace_size, + (cxt->flags & FTRACE_PER_CPU) ? nr_cpu_ids : 1, + LINUX_VERSION_CODE); if (err) goto fail_init_fprz; @@ -698,7 +736,6 @@ fail_clear: cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); fail_init_mprz: - persistent_ram_free(cxt->fprz); fail_init_fprz: persistent_ram_free(cxt->cprz); fail_init_cprz: @@ -717,7 +754,6 @@ static int ramoops_remove(struct platform_device *pdev) cxt->pstore.bufsize = 0; persistent_ram_free(cxt->mprz); - persistent_ram_free(cxt->fprz); persistent_ram_free(cxt->cprz); ramoops_free_przs(cxt); @@ -759,6 +795,8 @@ static void ramoops_register_dummy(void) dummy_data->ftrace_size = ramoops_ftrace_size; dummy_data->pmsg_size = ramoops_pmsg_size; dummy_data->dump_oops = dump_oops; + dummy_data->flags = FTRACE_PER_CPU; + /* * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC * (using 1 byte for ECC isn't much of use anyway). diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 69f3487..122f78d 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -86,6 +86,8 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz, * @mem_address physical memory address to contain ramoops */ +#define FTRACE_PER_CPU BIT(0) + struct ramoops_platform_data { unsigned long mem_size; phys_addr_t mem_address; @@ -95,6 +97,7 @@ struct ramoops_platform_data { unsigned long ftrace_size; unsigned long pmsg_size; int dump_oops; + int flags; struct persistent_ram_ecc_info ecc_info; }; -- 2.7.4