Hi,
This series adds DMA operations support to the qemu fw_cfg kernel
module and populates "etc/vmcoreinfo" with vmcoreinfo location
details.
Note: the support for this entry handling has been merged for upcoming
qemu release (2.11).
v8:
- fix ltp again: bring back kmalloc() for DMA memory (see "What memory
is DMA'able?" limitations in Documentation/DMA-API-HOWTO.txt)
Interestingly, it didn't fail when the module was linked in..
v7:
- add a patch to fix driver remove()
- remove DMA operatiom timeout (qemu finishes sync today)
- synchronize the DMA transfer before reading from CPU
- removed kmalloc() use static allocation instead
- drop some r-b tags
v6:
- change acpi_acquire_global_lock() error to return EINVAL
(instead of EBUSY)
- replace 0 as pointer argument for NULL
- add Gabriel r-b/a-b tags
v5:
- resent to CC kdump people on the paddr_vmcoreinfo_note() export patch
v4:
- export paddr_vmcoreinfo_note() to fix fw_cfg.ko build
- fix build with !CONFIG_CRASH_CORE
- replace the unbounded yield() loop with a usleep_range() loop and a
200ms timeout
- do not write vmcoreinfo entry when running the kdump kernel (D. Hatayama)
- drop the experimental sysfs write support patch from this series
v3: (thanks kbuild)
- add "fw_cfg: fix the command line module name" patch
- fix build of "fw_cfg: add DMA register" with CONFIG_FW_CFG_SYSFS_CMDLINE=y
- fix 'Wshift-count-overflow'
v2:
- use platform device for dma mapping
- add etc/vmcoreinfo patch
- some code cleanups
Marc-André Lureau (5):
fw_cfg: fix driver remove
fw_cfg: add DMA register
fw_cfg: do DMA read operation
crash: export paddr_vmcoreinfo_note()
fw_cfg: write vmcoreinfo details
drivers/firmware/qemu_fw_cfg.c | 283 ++++++++++++++++++++++++++++++++++++-----
kernel/crash_core.c | 1 +
2 files changed, 254 insertions(+), 30 deletions(-)
--
2.15.0.277.ga3d2ad2c43
From 1605710912591754523@xxx Wed Jul 11 16:07:36 +0000 2018
X-GM-THRID: 1585342434913349074
X-Gmail-Labels: Inbox,Category Forums,Downloaded_2018-07
Add an optional <dma_off> kernel module (or command line) parameter
using the following syntax:
[qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
or
[qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
and initializes the register address using given or default offset.
Signed-off-by: Marc-André Lureau <[email protected]>
Reviewed-by: Gabriel Somlo <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 53 ++++++++++++++++++++++++++++++++----------
1 file changed, 41 insertions(+), 12 deletions(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index deb483064f53..740df0df2260 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
* and select subsets of aarch64), a Device Tree node (on arm), or using
* a kernel module (or command line) parameter with the following syntax:
*
- * [qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>]
+ * [qemu_fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
* or
- * [qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>]
+ * [qemu_fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
*
* where:
* <size> := size of ioport or mmio range
* <base> := physical base address of ioport or mmio range
* <ctrl_off> := (optional) offset of control register
* <data_off> := (optional) offset of data register
+ * <dma_off> := (optional) offset of dma register
*
* e.g.:
- * qemu_fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ * qemu_fw_cfg.ioport=12@0x510:0:1:4 (the default on x86)
* or
- * qemu_fw_cfg.mmio=0xA@0x9020000:8:0 (the default on arm)
+ * qemu_fw_cfg.mmio=16@0x9020000:8:0:16 (the default on arm)
*/
#include <linux/module.h>
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
static void __iomem *fw_cfg_dev_base;
static void __iomem *fw_cfg_reg_ctrl;
static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
/* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
# define FW_CFG_CTRL_OFF 0x08
# define FW_CFG_DATA_OFF 0x00
+# define FW_CFG_DMA_OFF 0x10
# elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m */
# define FW_CFG_CTRL_OFF 0x00
# define FW_CFG_DATA_OFF 0x02
# elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
# define FW_CFG_CTRL_OFF 0x00
# define FW_CFG_DATA_OFF 0x01
+# define FW_CFG_DMA_OFF 0x04
# else
# error "QEMU FW_CFG not available on this architecture!"
# endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
static int fw_cfg_do_platform_probe(struct platform_device *pdev)
{
char sig[FW_CFG_SIG_SIZE];
- struct resource *range, *ctrl, *data;
+ struct resource *range, *ctrl, *data, *dma;
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+ dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
+ if (dma)
+ fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+ else
+ fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -629,6 +641,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
/* use special scanf/printf modifier for phys_addr_t, resource_size_t */
#define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
":%" __PHYS_ADDR_PREFIX "i" \
+ ":%" __PHYS_ADDR_PREFIX "i%n" \
":%" __PHYS_ADDR_PREFIX "i%n"
#define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -638,12 +651,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
":%" __PHYS_ADDR_PREFIX "u" \
":%" __PHYS_ADDR_PREFIX "u"
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+ ":%" __PHYS_ADDR_PREFIX "u"
+
static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
{
- struct resource res[3] = {};
+ struct resource res[4] = {};
char *str;
phys_addr_t base;
- resource_size_t size, ctrl_off, data_off;
+ resource_size_t size, ctrl_off, data_off, dma_off;
int processed, consumed = 0;
/* only one fw_cfg device can exist system-wide, so if one
@@ -659,19 +675,20 @@ static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
/* consume "<size>" portion of command line argument */
size = memparse(arg, &str);
- /* get "@<base>[:<ctrl_off>:<data_off>]" chunks */
+ /* get "@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]" chunks */
processed = sscanf(str, PH_ADDR_SCAN_FMT,
&base, &consumed,
- &ctrl_off, &data_off, &consumed);
+ &ctrl_off, &data_off, &consumed,
+ &dma_off, &consumed);
- /* sscanf() must process precisely 1 or 3 chunks:
+ /* sscanf() must process precisely 1, 3 or 4 chunks:
* <base> is mandatory, optionally followed by <ctrl_off>
- * and <data_off>;
+ * and <data_off>, and <dma_off>;
* there must be no extra characters after the last chunk,
* so str[consumed] must be '\0'.
*/
if (str[consumed] ||
- (processed != 1 && processed != 3))
+ (processed != 1 && processed != 3 && processed != 4))
return -EINVAL;
res[0].start = base;
@@ -688,6 +705,11 @@ static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
res[2].start = data_off;
res[2].flags = IORESOURCE_REG;
}
+ if (processed > 3) {
+ res[3].name = "dma";
+ res[3].start = dma_off;
+ res[3].flags = IORESOURCE_REG;
+ }
/* "processed" happens to nicely match the number of resources
* we need to pass in to this platform device.
@@ -722,6 +744,13 @@ static int fw_cfg_cmdline_get(char *buf, const struct kernel_param *kp)
fw_cfg_cmdline_dev->resource[0].start,
fw_cfg_cmdline_dev->resource[1].start,
fw_cfg_cmdline_dev->resource[2].start);
+ case 4:
+ return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_4_FMT,
+ resource_size(&fw_cfg_cmdline_dev->resource[0]),
+ fw_cfg_cmdline_dev->resource[0].start,
+ fw_cfg_cmdline_dev->resource[1].start,
+ fw_cfg_cmdline_dev->resource[2].start,
+ fw_cfg_cmdline_dev->resource[3].start);
}
/* Should never get here */
--
2.15.0.277.ga3d2ad2c43
From 1585060111352895887@xxx Sat Nov 25 17:31:58 +0000 2017
X-GM-THRID: 1585008173950843474
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
On driver remove(), all objects created during probe() should be
removed, but sysfs qemu_fw_cfg/rev file was left. Also reorder
functions to match probe() error cleanup code.
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 5cfe39f7a45f..deb483064f53 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -582,9 +582,10 @@ static int fw_cfg_sysfs_remove(struct platform_device *pdev)
{
pr_debug("fw_cfg: unloading.\n");
fw_cfg_sysfs_cache_cleanup();
+ sysfs_remove_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
+ fw_cfg_io_cleanup();
fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
- fw_cfg_io_cleanup();
return 0;
}
--
2.15.0.277.ga3d2ad2c43
From 1590176825759334977@xxx Sun Jan 21 04:59:57 +0000 2018
X-GM-THRID: 1582578719977855403
X-Gmail-Labels: Inbox,Category Forums,Downloaded_2018-01
The following patch is going to use the symbol from the fw_cfg module,
to call the function and write the note location details in the
vmcoreinfo entry, so qemu can produce dumps with the vmcoreinfo note.
CC: Andrew Morton <[email protected]>
CC: Baoquan He <[email protected]>
CC: Dave Young <[email protected]>
CC: Dave Young <[email protected]>
CC: Hari Bathini <[email protected]>
CC: Tony Luck <[email protected]>
CC: Vivek Goyal <[email protected]>
Signed-off-by: Marc-André Lureau <[email protected]>
Acked-by: Gabriel Somlo <[email protected]>
---
kernel/crash_core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b3663896278e..2394f0501c65 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,6 +376,7 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
{
return __pa(vmcoreinfo_note);
}
+EXPORT_SYMBOL(paddr_vmcoreinfo_note);
static int __init crash_save_vmcoreinfo_init(void)
{
--
2.15.0.277.ga3d2ad2c43
From 1584957725868680818@xxx Fri Nov 24 14:24:35 +0000 2017
X-GM-THRID: 1584938985787925683
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread
If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
Signed-off-by: Marc-André Lureau <[email protected]>
Reviewed-by: Gabriel Somlo <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 82 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index b7b5c88a3422..e63fd0fddb61 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -35,6 +35,8 @@
#include <linux/ioport.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
+#include <linux/crash_core.h>
+#include <linux/crash_dump.h>
MODULE_AUTHOR("Gabriel L. Somlo <[email protected]>");
MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -58,6 +60,8 @@ MODULE_LICENSE("GPL");
/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
#define FW_CFG_MAX_FILE_PATH 56
+#define VMCOREINFO_FORMAT_ELF 0x1
+
/* platform device for dma mapping */
static struct device *dev;
@@ -120,7 +124,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
- (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+ (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+ (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
if (address && length) {
dma_addr = dma_map_single(dev, address, length, dir);
@@ -217,6 +222,48 @@ static ssize_t fw_cfg_read_blob(u16 key,
return ret;
}
+#ifdef CONFIG_CRASH_CORE
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+ void *buf, loff_t pos, size_t count)
+{
+ u32 glk = -1U;
+ acpi_status status;
+ ssize_t ret = count;
+
+ /* If we have ACPI, ensure mutual exclusion against any potential
+ * device access by the firmware, e.g. via AML methods:
+ */
+ status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+ if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+ /* Should never get here */
+ WARN(1, "%s: Failed to lock ACPI!\n", __func__);
+ memset(buf, 0, count);
+ return -EINVAL;
+ }
+
+ mutex_lock(&fw_cfg_dev_lock);
+ if (pos == 0) {
+ ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+ } else {
+ iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+ ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
+ if (ret < 0)
+ goto end;
+ ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+ }
+
+end:
+ mutex_unlock(&fw_cfg_dev_lock);
+
+ acpi_release_global_lock(glk);
+
+ return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
/* clean up fw_cfg device i/o */
static void fw_cfg_io_cleanup(void)
{
@@ -335,6 +382,32 @@ struct fw_cfg_sysfs_entry {
struct list_head list;
};
+#ifdef CONFIG_CRASH_CORE
+static ssize_t write_vmcoreinfo(const struct fw_cfg_file *f)
+{
+ struct vmci {
+ __le16 host_format;
+ __le16 guest_format;
+ __le32 size;
+ __le64 paddr;
+ } __packed;
+ static struct vmci data;
+ ssize_t ret;
+
+ data = (struct vmci) {
+ .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
+ .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
+ .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
+ };
+ /* spare ourself reading host format support for now since we
+ * don't know what else to format - host may ignore ours
+ */
+ ret = fw_cfg_write_blob(f->select, &data, 0, sizeof(struct vmci));
+
+ return ret;
+}
+#endif /* CONFIG_CRASH_CORE */
+
/* get fw_cfg_sysfs_entry from kobject member */
static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
{
@@ -574,6 +647,13 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f)
int err;
struct fw_cfg_sysfs_entry *entry;
+#ifdef CONFIG_CRASH_CORE
+ if (strcmp(f->name, "etc/vmcoreinfo") == 0 && !is_kdump_kernel()) {
+ if (write_vmcoreinfo(f) < 0)
+ pr_warn("fw_cfg: failed to write vmcoreinfo");
+ }
+#endif
+
/* allocate new entry */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
--
2.15.0.277.ga3d2ad2c43
From 1584772219671139860@xxx Wed Nov 22 13:16:03 +0000 2017
X-GM-THRID: 1584772219671139860
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread