2020-05-09 11:18:54

by WeiXiong Liao

[permalink] [raw]
Subject: [PATCH v5 00/12] pstore: mtd: support crash log to block and mtd device

This is a series to support crash log to block and mtd device,
base on v4 of Kees Cook's.

Changes since v4:
patch 1: adapt pstore to kmsg_dump_reason_str().
patch 2: fix bugs that decompress failed and rmmod failed.
use atomic_xchg() on psz_flush_dirty_zone() in case of reload.
patch 3: fix build error
patch 5: flush pmsg zone if it's dirty.
patch 6: use delayed work to cache more data and reduce calling
dirty-flusher times
patch 12: change dev_err() to pr_err() when initialize because it get
no pointer to mtd at that time.

v4: https://lore.kernel.org/lkml/[email protected]/
v3: https://lore.kernel.org/lkml/[email protected]/
v2: https://lore.kernel.org/lkml/[email protected]/
v1: https://lore.kernel.org/lkml/[email protected]/

Kees Cook (1):
printk: pstore: Introduce kmsg_dump_reason_str()

WeiXiong Liao (11):
pstore/zone: Introduce common layer to manage storage zones
pstore/blk: Introduce backend for block devices
pstore/blk: Provide way to choose pstore frontend support
pstore/blk: Add support for pmsg frontend
pstore/blk: Add console frontend support
pstore/blk: Add ftrace frontend support
Documentation: Add details for pstore/blk
pstore/zone: Provide way to skip "broken" zone for MTD devices
pstore/blk: Provide way to query pstore configuration
pstore/blk: Support non-block storage devices
mtd: Support kmsg dumper based on pstore/blk

Documentation/admin-guide/pstore-blk.rst | 243 +++++
MAINTAINERS | 1 +
drivers/mtd/Kconfig | 10 +
drivers/mtd/Makefile | 1 +
drivers/mtd/mtdpstore.c | 563 +++++++++++
fs/pstore/Kconfig | 109 +++
fs/pstore/Makefile | 6 +
fs/pstore/blk.c | 481 ++++++++++
fs/pstore/platform.c | 22 +-
fs/pstore/zone.c | 1508 ++++++++++++++++++++++++++++++
include/linux/kmsg_dump.h | 7 +
include/linux/pstore_blk.h | 77 ++
include/linux/pstore_zone.h | 60 ++
kernel/printk/printk.c | 21 +
14 files changed, 3088 insertions(+), 21 deletions(-)
create mode 100644 Documentation/admin-guide/pstore-blk.rst
create mode 100644 drivers/mtd/mtdpstore.c
create mode 100644 fs/pstore/blk.c
create mode 100644 fs/pstore/zone.c
create mode 100644 include/linux/pstore_blk.h
create mode 100644 include/linux/pstore_zone.h

--
1.9.1


2020-05-09 11:19:13

by WeiXiong Liao

[permalink] [raw]
Subject: [PATCH v5 09/12] pstore/zone: Provide way to skip "broken" zone for MTD devices

One requirement to support MTD devices in pstore/zone is having a
way to declare certain regions as broken. Add this support to
pstore/zone.

The MTD driver should return -ENOMSG when encountering a bad region,
which tells pstore/zone to skip and try the next one.

Signed-off-by: WeiXiong Liao <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Kees Cook <[email protected]>
---
fs/pstore/blk.c | 10 +++++--
fs/pstore/zone.c | 65 ++++++++++++++++++++++++++++++++++++---------
include/linux/pstore_blk.h | 3 ++-
include/linux/pstore_zone.h | 12 ++++++---
4 files changed, 71 insertions(+), 19 deletions(-)

diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 84ca98923d8a..bf27dbd18db2 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -101,9 +101,12 @@
* means error.
* @write: The same as @read, but the following error number:
* -EBUSY means try to write again later.
+ * -ENOMSG means to try next zone.
* @panic_write:The write operation only used for panic case. It's optional
- * if you do not care panic log. The parameters and return value
- * are the same as @read.
+ * if you do not care panic log. The parameters are relative
+ * value to storage.
+ * On success, the number of bytes should be returned, others
+ * excluding -ENOMSG mean error. -ENOMSG means to try next zone.
*/
struct psblk_device {
unsigned long total_size;
@@ -315,6 +318,9 @@ static ssize_t psblk_blk_panic_write(const char *buf, size_t size,
/* size and off must align to SECTOR_SIZE for block device */
ret = blkdev_panic_write(buf, off >> SECTOR_SHIFT,
size >> SECTOR_SHIFT);
+ /* try next zone */
+ if (ret == -ENOMSG)
+ return ret;
return ret ? -EIO : size;
}

diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index a51594f95347..9a9a4a7134d0 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -247,6 +247,9 @@ static int psz_zone_write(struct pstore_zone *zone,

return 0;
dirty:
+ /* no need to mark dirty if going to try next zone */
+ if (wcnt == -ENOMSG)
+ return -ENOMSG;
atomic_set(&zone->dirty, true);
/* flush dirty zones nicely */
if (wcnt == -EBUSY && !is_on_panic())
@@ -389,7 +392,11 @@ static int psz_recover_oops_meta(struct psz_context *cxt)
return -EINVAL;

rcnt = info->read((char *)buf, len, zone->off);
- if (rcnt != len) {
+ if (rcnt == -ENOMSG) {
+ pr_debug("%s with id %lu may be broken, skip\n",
+ zone->name, i);
+ continue;
+ } else if (rcnt != len) {
pr_err("read %s with id %lu failed\n", zone->name, i);
return (int)rcnt < 0 ? (int)rcnt : -EIO;
}
@@ -724,24 +731,58 @@ static void psz_write_kmsg_hdr(struct pstore_zone *zone,
hdr->counter = 0;
}

+/*
+ * In case zone is broken, which may occur to MTD device, we try each zones,
+ * start at cxt->oops_write_cnt.
+ */
static inline int notrace psz_oops_write_record(struct psz_context *cxt,
struct pstore_record *record)
{
+ int ret = -EBUSY;
size_t size, hlen;
struct pstore_zone *zone;
- unsigned int zonenum;
+ unsigned int i;

- zonenum = cxt->oops_write_cnt;
- zone = cxt->opszs[zonenum];
- if (unlikely(!zone))
- return -ENOSPC;
- cxt->oops_write_cnt = (zonenum + 1) % cxt->oops_max_cnt;
+ for (i = 0; i < cxt->oops_max_cnt; i++) {
+ unsigned int zonenum, len;

- pr_debug("write %s to zone id %d\n", zone->name, zonenum);
- psz_write_kmsg_hdr(zone, record);
- hlen = sizeof(struct psz_oops_header);
- size = min_t(size_t, record->size, zone->buffer_size - hlen);
- return psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen);
+ zonenum = (cxt->oops_write_cnt + i) % cxt->oops_max_cnt;
+ zone = cxt->opszs[zonenum];
+ if (unlikely(!zone))
+ return -ENOSPC;
+
+ /* avoid destorying old data, allocate a new one */
+ len = zone->buffer_size + sizeof(*zone->buffer);
+ zone->oldbuf = zone->buffer;
+ zone->buffer = kzalloc(len, GFP_KERNEL);
+ if (!zone->buffer) {
+ zone->buffer = zone->oldbuf;
+ return -ENOMEM;
+ }
+ zone->buffer->sig = zone->oldbuf->sig;
+
+ pr_debug("write %s to zone id %d\n", zone->name, zonenum);
+ psz_write_kmsg_hdr(zone, record);
+ hlen = sizeof(struct psz_oops_header);
+ size = min_t(size_t, record->size, zone->buffer_size - hlen);
+ ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen);
+ if (likely(!ret || ret != -ENOMSG)) {
+ cxt->oops_write_cnt = zonenum + 1;
+ cxt->oops_write_cnt %= cxt->oops_max_cnt;
+ /* no need to try next zone, free last zone buffer */
+ kfree(zone->oldbuf);
+ zone->oldbuf = NULL;
+ return ret;
+ }
+
+ pr_debug("zone %u may be broken, try next dmesg zone\n",
+ zonenum);
+ kfree(zone->buffer);
+ zone->buffer = zone->oldbuf;
+ zone->oldbuf = NULL;
+ }
+
+ return -EBUSY;
}

static int notrace psz_oops_write(struct psz_context *cxt,
diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h
index d8f609e60288..828b0763d477 100644
--- a/include/linux/pstore_blk.h
+++ b/include/linux/pstore_blk.h
@@ -14,7 +14,8 @@
* @start_sect: start sector to block device
* @sects: sectors count on buf
*
- * Return: On success, zero should be returned. Others mean error.
+ * Return: On success, zero should be returned. Others excluding -ENOMSG
+ * mean error. -ENOMSG means to try next zone.
*
* Panic write to block device must be aligned to SECTOR_SIZE.
*/
diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h
index 94f441b8b616..ddb3dfea4ea6 100644
--- a/include/linux/pstore_zone.h
+++ b/include/linux/pstore_zone.h
@@ -23,11 +23,15 @@
* @read: The general read operation. Both of the function parameters
* @size and @offset are relative value to storage.
* On success, the number of bytes should be returned, others
- * means error.
- * @write: The same as @read, but -EBUSY means try to write again later.
+ * mean error.
+ * @write: The same as @read, but the following error number:
+ * -EBUSY means try to write again later.
+ * -ENOMSG means to try next zone.
* @panic_write:The write operation only used for panic case. It's optional
- * if you do not care panic log. The parameters and return value
- * are the same as @read.
+ * if you do not care panic log. The parameters are relative
+ * value to storage.
+ * On success, the number of bytes should be returned, others
+ * excluding -ENOMSG mean error. -ENOMSG means to try next zone.
*/
struct pstore_zone_info {
struct module *owner;
--
1.9.1

2020-05-09 11:19:19

by WeiXiong Liao

[permalink] [raw]
Subject: [PATCH v5 11/12] pstore/blk: Support non-block storage devices

Add support for non-block devices (e.g. MTD). A non-block driver calls
pstore_blk_register_device() to register iself.

In addition, pstore/zone is updated to handle non-block devices,
where an erase must be done before a write. Without this, there is no
way to remove records stored to an MTD.

Signed-off-by: WeiXiong Liao <[email protected]>
Link: https://lore.kernel.org/r/1585126506-18635-11-git-send-email-liaoweixiong@allwinnertech.com
Signed-off-by: Kees Cook <[email protected]>
---
Documentation/admin-guide/pstore-blk.rst | 17 ++++++++---
fs/pstore/blk.c | 52 ++++++++++++--------------------
fs/pstore/zone.c | 8 ++++-
include/linux/pstore_blk.h | 37 +++++++++++++++++++++++
include/linux/pstore_zone.h | 6 ++++
5 files changed, 83 insertions(+), 37 deletions(-)

diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
index 484a1502fb49..2f3602397715 100644
--- a/Documentation/admin-guide/pstore-blk.rst
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -7,8 +7,8 @@ Introduction
------------

pstore block (pstore/blk) is an oops/panic logger that writes its logs to a
-block device before the system crashes. You can get these log files by
-mounting pstore filesystem like::
+block device and non-block device before the system crashes. You can get
+these log files by mounting pstore filesystem like::

mount -t pstore pstore /sys/fs/pstore

@@ -24,8 +24,8 @@ Configurations for user determine how pstore/blk works, such as pmsg_size,
kmsg_size and so on. All of them support both Kconfig and module parameters,
but module parameters have priority over Kconfig.

-Configurations for driver are all about block device, such as total_size
-of block device and read/write operations.
+Configurations for driver are all about block device and non-block device,
+such as total_size of block device and read/write operations.

Configurations for user
-----------------------
@@ -152,6 +152,15 @@ driver uses ``psblk_register_blkdev`` to register to pstore/blk.
.. kernel-doc:: fs/pstore/blk.c
:identifiers: psblk_register_blkdev

+A non-block device driver uses ``psblk_register_device`` with
+``struct psblk_device`` to register to pstore/blk.
+
+.. kernel-doc:: fs/pstore/blk.c
+ :identifiers: psblk_register_device
+
+.. kernel-doc:: include/linux/pstore_blk.h
+ :identifiers: psblk_device
+
Compression and header
----------------------

diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index a6d578f9df2a..45f61d4ae105 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -98,36 +98,15 @@
})

/**
- * struct psblk_device - back-end pstore/blk driver structure.
+ * psblk_register_device() - register non-block device to pstore/blk
*
- * @total_size: The total size in bytes pstore/blk can use. It must be greater
- * than 4096 and be multiple of 4096.
- * @flags: Refer to macro starting with PSTORE_FLAGS defined in
- * linux/pstore.h. It means what front-ends this device support.
- * Zero means all backends for compatible.
- * @read: The general read operation. Both of the function parameters
- * @size and @offset are relative value to bock device (not the
- * whole disk).
- * On success, the number of bytes should be returned, others
- * means error.
- * @write: The same as @read, but the following error number:
- * -EBUSY means try to write again later.
- * -ENOMSG means to try next zone.
- * @panic_write:The write operation only used for panic case. It's optional
- * if you do not care panic log. The parameters are relative
- * value to storage.
- * On success, the number of bytes should be returned, others
- * excluding -ENOMSG mean error. -ENOMSG means to try next zone.
+ * @dev: non-block device information
+ *
+ * Return:
+ * * 0 - OK
+ * * Others - something error.
*/
-struct psblk_device {
- unsigned long total_size;
- unsigned int flags;
- psz_read_op read;
- psz_write_op write;
- psz_write_op panic_write;
-};
-
-static int psblk_register_do(struct psblk_device *dev)
+int psblk_register_device(struct psblk_device *dev)
{
int ret;

@@ -170,6 +149,7 @@ static int psblk_register_do(struct psblk_device *dev)
pstore_zone_info->max_reason = max_reason;
pstore_zone_info->read = dev->read;
pstore_zone_info->write = dev->write;
+ pstore_zone_info->erase = dev->erase;
pstore_zone_info->panic_write = dev->panic_write;
pstore_zone_info->name = MODNAME;
pstore_zone_info->owner = THIS_MODULE;
@@ -182,8 +162,14 @@ static int psblk_register_do(struct psblk_device *dev)
mutex_unlock(&psz_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(psblk_register_device);

-static void psblk_unregister_do(struct psblk_device *dev)
+/**
+ * psblk_unregister_blkdev() - unregister block device from pstore/blk
+ *
+ * @dev: non-block device information
+ */
+void psblk_unregister_device(struct psblk_device *dev)
{
mutex_lock(&psz_lock);
if (pstore_zone_info && pstore_zone_info->read == dev->read) {
@@ -193,6 +179,7 @@ static void psblk_unregister_do(struct psblk_device *dev)
}
mutex_unlock(&psz_lock);
}
+EXPORT_SYMBOL_GPL(psblk_unregister_device);

/**
* psblk_get_bdev() - open block device
@@ -406,11 +393,12 @@ int psblk_register_blkdev(unsigned int major, unsigned int flags,

dev.total_size = psblk_bdev_size(bdev);
dev.flags = flags;
- dev.panic_write = panic_write ? psblk_blk_panic_write : NULL;
dev.read = psblk_generic_blk_read;
dev.write = psblk_generic_blk_write;
+ dev.erase = NULL;
+ dev.panic_write = panic_write ? psblk_blk_panic_write : NULL;

- ret = psblk_register_do(&dev);
+ ret = psblk_register_device(&dev);
if (ret)
goto err_put_bdev;

@@ -436,7 +424,7 @@ void psblk_unregister_blkdev(unsigned int major)
void *holder = blkdev;

if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) {
- psblk_unregister_do(&dev);
+ psblk_unregister_device(&dev);
psblk_put_bdev(psblk_bdev, holder);
blkdev_panic_write = NULL;
psblk_bdev = NULL;
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 9a9a4a7134d0..bc4d4f3041b8 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -659,15 +659,21 @@ static inline int psz_oops_erase(struct psz_context *cxt,
struct psz_buffer *buffer = zone->buffer;
struct psz_oops_header *hdr =
(struct psz_oops_header *)buffer->data;
+ size_t size;

if (unlikely(!psz_ok(zone)))
return 0;
+
/* this zone is already updated, no need to erase */
if (record->count != hdr->counter)
return 0;

+ size = buffer_datalen(zone) + sizeof(*zone->buffer);
atomic_set(&zone->buffer->datalen, 0);
- return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ if (cxt->pstore_zone_info->erase)
+ return cxt->pstore_zone_info->erase(size, zone->off);
+ else
+ return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
}

static inline int psz_record_erase(struct psz_context *cxt,
diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h
index dd5213044e21..43242e343dad 100644
--- a/include/linux/pstore_blk.h
+++ b/include/linux/pstore_blk.h
@@ -8,6 +8,41 @@
#include <linux/pstore_zone.h>

/**
+ * struct psblk_device - back-end pstore/blk driver structure.
+ *
+ * @total_size: The total size in bytes pstore/blk can use. It must be greater
+ * than 4096 and be multiple of 4096.
+ * @flags: Refer to macro starting with PSTORE_FLAGS defined in
+ * linux/pstore.h. It means what front-ends this device support.
+ * Zero means all backends for compatible.
+ * @read: The general read operation. Both of the function parameters
+ * @size and @offset are relative value to bock device (not the
+ * whole disk).
+ * On success, the number of bytes should be returned, others
+ * means error.
+ * @write: The same as @read, but the following error number:
+ * -EBUSY means try to write again later.
+ * -ENOMSG means to try next zone.
+ * @erase: The general erase operation for device with special removing
+ * job. Both of the function parameters @size and @offset are
+ * relative value to storage.
+ * Return 0 on success and others on failure.
+ * @panic_write:The write operation only used for panic case. It's optional
+ * if you do not care panic log. The parameters are relative
+ * value to storage.
+ * On success, the number of bytes should be returned, others
+ * excluding -ENOMSG mean error. -ENOMSG means to try next zone.
+ */
+struct psblk_device {
+ unsigned long total_size;
+ unsigned int flags;
+ psz_read_op read;
+ psz_write_op write;
+ psz_erase_op erase;
+ psz_write_op panic_write;
+};
+
+/**
* typedef psblk_panic_write_op - panic write operation to block device
*
* @buf: the data to write
@@ -22,6 +57,8 @@
typedef int (*psblk_panic_write_op)(const char *buf, sector_t start_sect,
sector_t sects);

+int psblk_register_device(struct psblk_device *dev);
+void psblk_unregister_device(struct psblk_device *dev);
int psblk_register_blkdev(unsigned int major, unsigned int flags,
psblk_panic_write_op panic_write);
void psblk_unregister_blkdev(unsigned int major);
diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h
index ddb3dfea4ea6..2c031a25ee5f 100644
--- a/include/linux/pstore_zone.h
+++ b/include/linux/pstore_zone.h
@@ -7,6 +7,7 @@

typedef ssize_t (*psz_read_op)(char *, size_t, loff_t);
typedef ssize_t (*psz_write_op)(const char *, size_t, loff_t);
+typedef ssize_t (*psz_erase_op)(size_t, loff_t);
/**
* struct pstore_zone_info - pstore/zone back-end driver structure
*
@@ -27,6 +28,10 @@
* @write: The same as @read, but the following error number:
* -EBUSY means try to write again later.
* -ENOMSG means to try next zone.
+ * @erase: The general erase operation for device with special removing
+ * job. Both of the function parameters @size and @offset are
+ * relative value to storage.
+ * Return 0 on success and others on failure.
* @panic_write:The write operation only used for panic case. It's optional
* if you do not care panic log. The parameters are relative
* value to storage.
@@ -45,6 +50,7 @@ struct pstore_zone_info {
unsigned long ftrace_size;
psz_read_op read;
psz_write_op write;
+ psz_erase_op erase;
psz_write_op panic_write;
};

--
1.9.1

2020-05-09 11:20:32

by WeiXiong Liao

[permalink] [raw]
Subject: [PATCH v5 04/12] pstore/blk: Provide way to choose pstore frontend support

Most pstore backends lack support for all the pstore frontends, only
handling kmsg dump and not things like pmsg, console, and ftrace.
Provide a way for drivers using pstore/blk to list which frontends they
expect to support.

Signed-off-by: WeiXiong Liao <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Kees Cook <[email protected]>
---
fs/pstore/blk.c | 18 ++++++++++++++----
include/linux/pstore_blk.h | 4 +++-
2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index e249b22ff55f..57d9d180511e 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -67,6 +67,9 @@
*
* @total_size: The total size in bytes pstore/blk can use. It must be greater
* than 4096 and be multiple of 4096.
+ * @flags: Refer to macro starting with PSTORE_FLAGS defined in
+ * linux/pstore.h. It means what front-ends this device support.
+ * Zero means all backends for compatible.
* @read: The general read operation. Both of the function parameters
* @size and @offset are relative value to bock device (not the
* whole disk).
@@ -79,6 +82,7 @@
*/
struct psblk_device {
unsigned long total_size;
+ unsigned int flags;
psz_read_op read;
psz_write_op write;
psz_write_op panic_write;
@@ -104,8 +108,11 @@ static int psblk_register_do(struct psblk_device *dev)
return -ENOMEM;
}

-#define verify_size(name, alignsize) { \
- long _##name_ = (name); \
+ /* zero means all backends for compatible */
+ if (!dev->flags)
+ dev->flags = UINT_MAX;
+#define verify_size(name, alignsize, enable) { \
+ long _##name_ = (enable) ? (name) : 0; \
_##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024); \
if (_##name_ & ((alignsize) - 1)) { \
pr_info(#name " must align to %d\n", \
@@ -116,7 +123,7 @@ static int psblk_register_do(struct psblk_device *dev)
pstore_zone_info->name = _##name_; \
}

- verify_size(kmsg_size, 4096);
+ verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
#undef verify_size

pstore_zone_info->total_size = dev->total_size;
@@ -312,6 +319,7 @@ static struct bdev_info *psblk_get_bdev_info(void)
* psblk_register_blkdev() - register block device to pstore/blk
*
* @major: the major device number of registering device
+ * @flags: refer to macro starting with PSTORE_FLAGS defined in linux/pstore.h
* @panic_write: the interface for panic case.
*
* Only the matching major to @blkdev can register.
@@ -322,7 +330,8 @@ static struct bdev_info *psblk_get_bdev_info(void)
* * 0 - OK
* * Others - something error.
*/
-int psblk_register_blkdev(unsigned int major, psblk_panic_write_op panic_write)
+int psblk_register_blkdev(unsigned int major, unsigned int flags,
+ psblk_panic_write_op panic_write)
{
struct block_device *bdev;
struct psblk_device dev = {0};
@@ -353,6 +362,7 @@ int psblk_register_blkdev(unsigned int major, psblk_panic_write_op panic_write)
blkdev_panic_write = panic_write;

dev.total_size = psblk_bdev_size(bdev);
+ dev.flags = flags;
dev.panic_write = panic_write ? psblk_blk_panic_write : NULL;
dev.read = psblk_generic_blk_read;
dev.write = psblk_generic_blk_write;
diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h
index 5ff465e3953e..d8f609e60288 100644
--- a/include/linux/pstore_blk.h
+++ b/include/linux/pstore_blk.h
@@ -4,6 +4,7 @@
#define __PSTORE_BLK_H_

#include <linux/types.h>
+#include <linux/pstore.h>
#include <linux/pstore_zone.h>

/**
@@ -20,7 +21,8 @@
typedef int (*psblk_panic_write_op)(const char *buf, sector_t start_sect,
sector_t sects);

-int psblk_register_blkdev(unsigned int major, psblk_panic_write_op panic_write);
+int psblk_register_blkdev(unsigned int major, unsigned int flags,
+ psblk_panic_write_op panic_write);
void psblk_unregister_blkdev(unsigned int major);
int psblk_blkdev_info(dev_t *devt, sector_t *nr_sects, sector_t *start_sect);

--
1.9.1

2020-05-09 11:20:35

by WeiXiong Liao

[permalink] [raw]
Subject: [PATCH v5 05/12] pstore/blk: Add support for pmsg frontend

Add pmsg support to pstore/blk (through pstore/zone). To enable, pmsg_size
must be greater than 0 and a multiple of 4096.

Signed-off-by: WeiXiong Liao <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Kees Cook <[email protected]>
---
fs/pstore/Kconfig | 12 ++
fs/pstore/blk.c | 9 ++
fs/pstore/zone.c | 271 ++++++++++++++++++++++++++++++++++++++++++--
include/linux/pstore_zone.h | 2 +
4 files changed, 283 insertions(+), 11 deletions(-)

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 92ba73bd0b62..f18cd126d83f 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -224,3 +224,15 @@ config PSTORE_BLK_MAX_REASON

NOTE that, both Kconfig and module parameters can configure
pstore/blk, but module parameters have priority over Kconfig.
+
+config PSTORE_BLK_PMSG_SIZE
+ int "Size in Kbytes of pmsg to store"
+ depends on PSTORE_BLK
+ depends on PSTORE_PMSG
+ default 64
+ help
+ This just sets size of pmsg (pmsg_size) for pstore/blk. The size is
+ in KB and must be a multiple of 4.
+
+ NOTE that, both Kconfig and module parameters can configure
+ pstore/blk, but module parameters have priority over Kconfig.
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 57d9d180511e..02c0223f08fc 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -24,6 +24,14 @@
MODULE_PARM_DESC(max_reason,
"maximum reason for kmsg dump (default 2: Oops and Panic)");

+#if IS_ENABLED(CONFIG_PSTORE_PMSG)
+static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE;
+#else
+static long pmsg_size = -1;
+#endif
+module_param(pmsg_size, long, 0400);
+MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes");
+
/*
* blkdev - The block device to use.
*
@@ -124,6 +132,7 @@ static int psblk_register_do(struct psblk_device *dev)
}

verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
+ verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG);
#undef verify_size

pstore_zone_info->total_size = dev->total_size;
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index 7dee4711ef9e..89f7d2c42336 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -23,12 +23,14 @@
*
* @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value)
* @datalen: length of data in @data
+ * @start: offset into @data where the beginning of the stored bytes begin
* @data: zone data.
*/
struct psz_buffer {
#define PSZ_SIG (0x43474244) /* DBGC */
uint32_t sig;
atomic_t datalen;
+ atomic_t start;
uint8_t data[];
};

@@ -84,9 +86,11 @@ struct pstore_zone {
* struct psz_context - all about running state of pstore/zone
*
* @opszs: oops/panic storage zones
+ * @ppsz: pmsg storage zone
* @oops_max_cnt: max count of @opszs
* @oops_read_cnt: counter to read oops zone
* @oops_write_cnt: counter to write
+ * @pmsg_read_cnt: counter to read pmsg zone
* @oops_counter: counter to oops
* @panic_counter: counter to panic
* @recovered: whether finish recovering data from storage
@@ -97,9 +101,11 @@ struct pstore_zone {
*/
struct psz_context {
struct pstore_zone **opszs;
+ struct pstore_zone *ppsz;
unsigned int oops_max_cnt;
unsigned int oops_read_cnt;
unsigned int oops_write_cnt;
+ unsigned int pmsg_read_cnt;
/*
* the counter should be recovered when recover.
* It records the oops/panic times after burning rather than booting.
@@ -139,6 +145,11 @@ static inline int buffer_datalen(struct pstore_zone *zone)
return atomic_read(&zone->buffer->datalen);
}

+static inline int buffer_start(struct pstore_zone *zone)
+{
+ return atomic_read(&zone->buffer->start);
+}
+
static inline bool is_on_panic(void)
{
struct psz_context *cxt = &psz_cxt;
@@ -146,10 +157,10 @@ static inline bool is_on_panic(void)
return atomic_read(&cxt->on_panic);
}

-static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf,
+static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf,
size_t len, unsigned long off)
{
- if (!buf || !zone->buffer)
+ if (!buf || !zone || !zone->buffer)
return -EINVAL;
if (off > zone->buffer_size)
return -EINVAL;
@@ -158,6 +169,18 @@ static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf,
return len;
}

+static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf,
+ size_t len, unsigned long off)
+{
+ if (!buf || !zone || !zone->oldbuf)
+ return -EINVAL;
+ if (off > zone->buffer_size)
+ return -EINVAL;
+ len = min_t(size_t, len, zone->buffer_size - off);
+ memcpy(buf, zone->oldbuf->data + off, len);
+ return 0;
+}
+
static int psz_zone_write(struct pstore_zone *zone,
enum psz_flush_mode flush_mode, const char *buf,
size_t len, unsigned long off)
@@ -413,6 +436,93 @@ static int psz_recover_oops(struct psz_context *cxt)
return ret;
}

+static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone)
+{
+ struct pstore_zone_info *info = cxt->pstore_zone_info;
+ struct psz_buffer *oldbuf, tmpbuf;
+ int ret = 0;
+ char *buf;
+ ssize_t rcnt, len, start, off;
+
+ if (!zone || zone->oldbuf)
+ return 0;
+
+ if (is_on_panic()) {
+ /* save data as much as possible */
+ psz_flush_dirty_zone(zone);
+ return 0;
+ }
+
+ if (unlikely(!info->read))
+ return -EINVAL;
+
+ len = sizeof(struct psz_buffer);
+ rcnt = info->read((char *)&tmpbuf, len, zone->off);
+ if (rcnt != len) {
+ pr_debug("read zone %s failed\n", zone->name);
+ return (int)rcnt < 0 ? (int)rcnt : -EIO;
+ }
+
+ if (tmpbuf.sig != zone->buffer->sig) {
+ pr_debug("no valid data in zone %s\n", zone->name);
+ return 0;
+ }
+
+ if (zone->buffer_size < atomic_read(&tmpbuf.datalen) ||
+ zone->buffer_size < atomic_read(&tmpbuf.start)) {
+ pr_info("found overtop zone: %s: off %lld, size %zu\n",
+ zone->name, zone->off, zone->buffer_size);
+ /* just keep going */
+ return 0;
+ }
+
+ if (!atomic_read(&tmpbuf.datalen)) {
+ pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n",
+ zone->name, zone->off, zone->buffer_size,
+ atomic_read(&tmpbuf.datalen));
+ return 0;
+ }
+
+ pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n",
+ zone->name, zone->off, zone->buffer_size,
+ atomic_read(&tmpbuf.datalen));
+
+ len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf);
+ oldbuf = kzalloc(len, GFP_KERNEL);
+ if (!oldbuf)
+ return -ENOMEM;
+
+ memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf));
+ buf = (char *)oldbuf + sizeof(*oldbuf);
+ len = atomic_read(&oldbuf->datalen);
+ start = atomic_read(&oldbuf->start);
+ off = zone->off + sizeof(*oldbuf);
+
+ /* get part of data */
+ rcnt = info->read(buf, len - start, off + start);
+ if (rcnt != len - start) {
+ pr_err("read zone %s failed\n", zone->name);
+ ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
+ goto free_oldbuf;
+ }
+
+ /* get the rest of data */
+ rcnt = info->read(buf + len - start, start, off);
+ if (rcnt != start) {
+ pr_err("read zone %s failed\n", zone->name);
+ ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
+ goto free_oldbuf;
+ }
+
+ zone->oldbuf = oldbuf;
+ psz_flush_dirty_zone(zone);
+ return 0;
+
+free_oldbuf:
+ kfree(oldbuf);
+ return ret;
+}
+
/**
* psz_recovery() - recover data from storage
* @cxt: the context of pstore/zone
@@ -432,6 +542,10 @@ static inline int psz_recovery(struct psz_context *cxt)
if (ret)
goto recover_fail;

+ ret = psz_recover_zone(cxt, cxt->ppsz);
+ if (ret)
+ goto recover_fail;
+
pr_debug("recover end!\n");
atomic_set(&cxt->recovered, 1);
return 0;
@@ -446,9 +560,17 @@ static int psz_pstore_open(struct pstore_info *psi)
struct psz_context *cxt = psi->data;

cxt->oops_read_cnt = 0;
+ cxt->pmsg_read_cnt = 0;
return 0;
}

+static inline bool psz_old_ok(struct pstore_zone *zone)
+{
+ if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen))
+ return true;
+ return false;
+}
+
static inline bool psz_ok(struct pstore_zone *zone)
{
if (zone && zone->buffer && buffer_datalen(zone))
@@ -473,6 +595,25 @@ static inline int psz_oops_erase(struct psz_context *cxt,
return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
}

+static inline int psz_record_erase(struct psz_context *cxt,
+ struct pstore_zone *zone)
+{
+ if (unlikely(!psz_old_ok(zone)))
+ return 0;
+
+ kfree(zone->oldbuf);
+ zone->oldbuf = NULL;
+ /*
+ * if there are new data in zone buffer, that means the old data
+ * are already invalid. It is no need to flush 0 (erase) to
+ * block device.
+ */
+ if (!buffer_datalen(zone))
+ return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ psz_flush_dirty_zone(zone);
+ return 0;
+}
+
static int psz_pstore_erase(struct pstore_record *record)
{
struct psz_context *cxt = record->psi->data;
@@ -482,6 +623,8 @@ static int psz_pstore_erase(struct pstore_record *record)
if (record->id >= cxt->oops_max_cnt)
return -EINVAL;
return psz_oops_erase(cxt, cxt->opszs[record->id], record);
+ case PSTORE_TYPE_PMSG:
+ return psz_record_erase(cxt, cxt->ppsz);
default:
return -EINVAL;
}
@@ -502,8 +645,10 @@ static void psz_write_kmsg_hdr(struct pstore_zone *zone,
hdr->reason = record->reason;
if (hdr->reason == KMSG_DUMP_OOPS)
hdr->counter = ++cxt->oops_counter;
- else
+ else if (hdr->reason == KMSG_DUMP_PANIC)
hdr->counter = ++cxt->panic_counter;
+ else
+ hdr->counter = 0;
}

static inline int notrace psz_oops_write_record(struct psz_context *cxt,
@@ -545,14 +690,62 @@ static int notrace psz_oops_write(struct psz_context *cxt,

ret = psz_oops_write_record(cxt, record);
if (!ret) {
- pr_debug("try to flush other dirty oops zones\n");
+ pr_debug("try to flush other dirty zones\n");
psz_flush_dirty_zones(cxt->opszs, cxt->oops_max_cnt);
+ psz_flush_dirty_zone(cxt->ppsz);
}

/* always return 0 as we had handled it on buffer */
return 0;
}

+static int notrace psz_record_write(struct pstore_zone *zone,
+ struct pstore_record *record)
+{
+ size_t start, rem;
+ int cnt = record->size;
+ bool is_full_data = false;
+ char *buf = record->buf;
+
+ if (!zone || !record)
+ return -ENOSPC;
+
+ if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size)
+ is_full_data = true;
+
+ if (unlikely(cnt > zone->buffer_size)) {
+ buf += cnt - zone->buffer_size;
+ cnt = zone->buffer_size;
+ }
+
+ start = buffer_start(zone);
+ rem = zone->buffer_size - start;
+ if (unlikely(rem < cnt)) {
+ psz_zone_write(zone, FLUSH_PART, buf, rem, start);
+ buf += rem;
+ cnt -= rem;
+ start = 0;
+ is_full_data = true;
+ }
+
+ atomic_set(&zone->buffer->start, cnt + start);
+ psz_zone_write(zone, FLUSH_PART, buf, cnt, start);
+
+ /**
+ * psz_zone_write will set datalen as start + cnt.
+ * It work if actual data length lesser than buffer size.
+ * If data length greater than buffer size, pmsg will rewrite to
+ * beginning of zone, which make buffer->datalen wrongly.
+ * So we should reset datalen as buffer size once actual data length
+ * greater than buffer size.
+ */
+ if (is_full_data) {
+ atomic_set(&zone->buffer->datalen, zone->buffer_size);
+ psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
+ }
+ return 0;
+}
+
static int notrace psz_pstore_write(struct pstore_record *record)
{
struct psz_context *cxt = record->psi->data;
@@ -564,6 +757,8 @@ static int notrace psz_pstore_write(struct pstore_record *record)
switch (record->type) {
case PSTORE_TYPE_DMESG:
return psz_oops_write(cxt, record);
+ case PSTORE_TYPE_PMSG:
+ return psz_record_write(cxt->ppsz, record);
default:
return -EINVAL;
}
@@ -579,6 +774,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt)
return zone;
}

+ if (cxt->pmsg_read_cnt == 0) {
+ cxt->pmsg_read_cnt++;
+ zone = cxt->ppsz;
+ if (psz_old_ok(zone))
+ return zone;
+ }
+
return NULL;
}

@@ -629,7 +831,7 @@ static ssize_t psz_oops_read(struct pstore_zone *zone,
return -ENOMEM;
}

- size = psz_zone_read(zone, record->buf + hlen, size,
+ size = psz_zone_read_buffer(zone, record->buf + hlen, size,
sizeof(struct psz_oops_header));
if (unlikely(size < 0)) {
kfree(record->buf);
@@ -639,6 +841,32 @@ static ssize_t psz_oops_read(struct pstore_zone *zone,
return size + hlen;
}

+static ssize_t psz_record_read(struct pstore_zone *zone,
+ struct pstore_record *record)
+{
+ size_t len;
+ struct psz_buffer *buf;
+
+ if (!zone || !record)
+ return -ENOSPC;
+
+ buf = (struct psz_buffer *)zone->oldbuf;
+ if (!buf)
+ return -ENOMSG;
+
+ len = atomic_read(&buf->datalen);
+ record->buf = kmalloc(len, GFP_KERNEL);
+ if (!record->buf)
+ return -ENOMEM;
+
+ if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) {
+ kfree(record->buf);
+ return -ENOMSG;
+ }
+
+ return len;
+}
+
static ssize_t psz_pstore_read(struct pstore_record *record)
{
struct psz_context *cxt = record->psi->data;
@@ -663,6 +891,9 @@ static ssize_t psz_pstore_read(struct pstore_record *record)
readop = psz_oops_read;
record->id = cxt->oops_read_cnt - 1;
break;
+ case PSTORE_TYPE_PMSG:
+ readop = psz_record_read;
+ break;
default:
goto next_zone;
}
@@ -719,8 +950,10 @@ static struct pstore_zone *psz_init_zone(enum pstore_type_id type,
zone->type = type;
zone->buffer_size = size - sizeof(struct psz_buffer);
zone->buffer->sig = type ^ PSZ_SIG;
+ zone->oldbuf = NULL;
atomic_set(&zone->dirty, 0);
atomic_set(&zone->buffer->datalen, 0);
+ atomic_set(&zone->buffer->start, 0);

*off += size;

@@ -804,6 +1037,8 @@ static void psz_free_all_zones(struct psz_context *cxt)
{
if (cxt->opszs)
psz_free_zones(&cxt->opszs, &cxt->oops_max_cnt);
+ if (cxt->ppsz)
+ psz_free_zone(&cxt->ppsz);
}

static int psz_alloc_zones(struct psz_context *cxt)
@@ -811,18 +1046,26 @@ static int psz_alloc_zones(struct psz_context *cxt)
struct pstore_zone_info *info = cxt->pstore_zone_info;
loff_t off = 0;
int err;
- size_t size;
+ size_t off_size = 0;

- size = info->total_size;
- cxt->opszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size,
+ off_size += info->pmsg_size;
+ cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size);
+ if (IS_ERR(cxt->ppsz)) {
+ err = PTR_ERR(cxt->ppsz);
+ goto free_out;
+ }
+
+ cxt->opszs = psz_init_zones(PSTORE_TYPE_DMESG, &off,
+ info->total_size - off_size,
info->kmsg_size, &cxt->oops_max_cnt);
if (IS_ERR(cxt->opszs)) {
err = PTR_ERR(cxt->opszs);
- goto fail_out;
+ goto free_out;
}

return 0;
-fail_out:
+free_out:
+ psz_free_all_zones(cxt);
return err;
}

@@ -845,7 +1088,7 @@ int register_pstore_zone(struct pstore_zone_info *info)
return -EINVAL;
}

- if (!info->kmsg_size) {
+ if (!info->kmsg_size && !info->pmsg_size) {
pr_warn("at least one of the records be non-zero\n");
return -EINVAL;
}
@@ -872,6 +1115,7 @@ int register_pstore_zone(struct pstore_zone_info *info)

check_size(total_size, 4096);
check_size(kmsg_size, SECTOR_SIZE);
+ check_size(pmsg_size, SECTOR_SIZE);

#undef check_size

@@ -898,6 +1142,7 @@ int register_pstore_zone(struct pstore_zone_info *info)
pr_debug("register %s with properties:\n", info->name);
pr_debug("\ttotal size : %ld Bytes\n", info->total_size);
pr_debug("\toops size : %ld Bytes\n", info->kmsg_size);
+ pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size);

err = psz_alloc_zones(cxt);
if (err) {
@@ -926,6 +1171,10 @@ int register_pstore_zone(struct pstore_zone_info *info)
pr_cont(",panic_write");
pr_cont(")");
}
+ if (info->pmsg_size) {
+ cxt->pstore.flags |= PSTORE_FLAGS_PMSG;
+ pr_cont(" pmsg");
+ }
pr_cont("\n");

err = pstore_register(&cxt->pstore);
diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h
index a6a79ff1351b..39c2cb944123 100644
--- a/include/linux/pstore_zone.h
+++ b/include/linux/pstore_zone.h
@@ -17,6 +17,7 @@
* @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise,
* it must be multiple of SECTOR_SIZE(512 Bytes).
* @max_reason: Maximum kmsg dump reason to store.
+ * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size.
* @read: The general read operation. Both of the function parameters
* @size and @offset are relative value to storage.
* On success, the number of bytes should be returned, others
@@ -33,6 +34,7 @@ struct pstore_zone_info {
unsigned long total_size;
unsigned long kmsg_size;
int max_reason;
+ unsigned long pmsg_size;
psz_read_op read;
psz_write_op write;
psz_write_op panic_write;
--
1.9.1