From: Marc-André Lureau <[email protected]>
Hi,
This series brings DMA operations support to the fw_cfg kernel module.
There is a few FIXME that I don't know how to improve.
The 'write' operation is only meant as a proof-of-concept, since some
qemu firmware maintainers are strongly against the usage of this
operation by the kernel/user-space.
Marc-André Lureau (3):
fw_cfg: add DMA register
fw_cfg: do DMA read operation
fw_cfg: add DMA write operation proof-of-concept
drivers/firmware/qemu_fw_cfg.c | 240 ++++++++++++++++++++++++++++++++++++-----
1 file changed, 211 insertions(+), 29 deletions(-)
--
2.12.0.191.gc5d8de91d
From: Marc-André Lureau <[email protected]>
Add an optional <dma_off> kernel module (or command line) parameter
using the following syntax:
[fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
or
[fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
and initializes the register address using given or default offset.
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 53 ++++++++++++++++++++++++++++++++----------
1 file changed, 41 insertions(+), 12 deletions(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 0e2011636fbb..614037703530 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -10,20 +10,21 @@
* and select subsets of aarch64), a Device Tree node (on arm), or using
* a kernel module (or command line) parameter with the following syntax:
*
- * [fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>]
+ * [fw_cfg.]ioport=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
* or
- * [fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>]
+ * [fw_cfg.]mmio=<size>@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]
*
* where:
* <size> := size of ioport or mmio range
* <base> := physical base address of ioport or mmio range
* <ctrl_off> := (optional) offset of control register
* <data_off> := (optional) offset of data register
+ * <dma_off> := (optional) offset of dma register
*
* e.g.:
- * fw_cfg.ioport=2@0x510:0:1 (the default on x86)
+ * fw_cfg.ioport=2@0x510:0:1:4 (the default on x86)
* or
- * fw_cfg.mmio=0xA@0x9020000:8:0 (the default on arm)
+ * fw_cfg.mmio=0xA@0x9020000:8:0:16 (the default on arm)
*/
#include <linux/module.h>
@@ -63,6 +64,7 @@ static resource_size_t fw_cfg_p_size;
static void __iomem *fw_cfg_dev_base;
static void __iomem *fw_cfg_reg_ctrl;
static void __iomem *fw_cfg_reg_data;
+static void __iomem *fw_cfg_reg_dma;
/* atomic access to fw_cfg device (potentially slow i/o, so using mutex) */
static DEFINE_MUTEX(fw_cfg_dev_lock);
@@ -118,12 +120,14 @@ static void fw_cfg_io_cleanup(void)
# if (defined(CONFIG_ARM) || defined(CONFIG_ARM64))
# define FW_CFG_CTRL_OFF 0x08
# define FW_CFG_DATA_OFF 0x00
+# define FW_CFG_DMA_OFF 0x10
# elif (defined(CONFIG_PPC_PMAC) || defined(CONFIG_SPARC32)) /* ppc/mac,sun4m */
# define FW_CFG_CTRL_OFF 0x00
# define FW_CFG_DATA_OFF 0x02
# elif (defined(CONFIG_X86) || defined(CONFIG_SPARC64)) /* x86, sun4u */
# define FW_CFG_CTRL_OFF 0x00
# define FW_CFG_DATA_OFF 0x01
+# define FW_CFG_DMA_OFF 0x04
# else
# error "QEMU FW_CFG not available on this architecture!"
# endif
@@ -133,7 +137,7 @@ static void fw_cfg_io_cleanup(void)
static int fw_cfg_do_platform_probe(struct platform_device *pdev)
{
char sig[FW_CFG_SIG_SIZE];
- struct resource *range, *ctrl, *data;
+ struct resource *range, *ctrl, *data, *dma;
/* acquire i/o range details */
fw_cfg_is_mmio = false;
@@ -170,6 +174,7 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
/* were custom register offsets provided (e.g. on the command line)? */
ctrl = platform_get_resource_byname(pdev, IORESOURCE_REG, "ctrl");
data = platform_get_resource_byname(pdev, IORESOURCE_REG, "data");
+ dma = platform_get_resource_byname(pdev, IORESOURCE_REG, "dma");
if (ctrl && data) {
fw_cfg_reg_ctrl = fw_cfg_dev_base + ctrl->start;
fw_cfg_reg_data = fw_cfg_dev_base + data->start;
@@ -179,6 +184,13 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
fw_cfg_reg_data = fw_cfg_dev_base + FW_CFG_DATA_OFF;
}
+ if (dma)
+ fw_cfg_reg_dma = fw_cfg_dev_base + dma->start;
+#ifdef FW_CFG_DMA_OFF
+ else
+ fw_cfg_reg_dma = fw_cfg_dev_base + FW_CFG_DMA_OFF;
+#endif
+
/* verify fw_cfg device signature */
fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
@@ -628,6 +640,7 @@ static struct platform_device *fw_cfg_cmdline_dev;
/* use special scanf/printf modifier for phys_addr_t, resource_size_t */
#define PH_ADDR_SCAN_FMT "@%" __PHYS_ADDR_PREFIX "i%n" \
":%" __PHYS_ADDR_PREFIX "i" \
+ ":%" __PHYS_ADDR_PREFIX "i%n" \
":%" __PHYS_ADDR_PREFIX "i%n"
#define PH_ADDR_PR_1_FMT "0x%" __PHYS_ADDR_PREFIX "x@" \
@@ -637,12 +650,15 @@ static struct platform_device *fw_cfg_cmdline_dev;
":%" __PHYS_ADDR_PREFIX "u" \
":%" __PHYS_ADDR_PREFIX "u"
+#define PH_ADDR_PR_4_FMT PH_ADDR_PR_3_FMT \
+ ":%" __PHYS_ADDR_PREFIX "u"
+
static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
{
- struct resource res[3] = {};
+ struct resource res[4] = {};
char *str;
phys_addr_t base;
- resource_size_t size, ctrl_off, data_off;
+ resource_size_t size, ctrl_off, data_off, dma_off;
int processed, consumed = 0;
/* only one fw_cfg device can exist system-wide, so if one
@@ -658,19 +674,20 @@ static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
/* consume "<size>" portion of command line argument */
size = memparse(arg, &str);
- /* get "@<base>[:<ctrl_off>:<data_off>]" chunks */
+ /* get "@<base>[:<ctrl_off>:<data_off>[:<dma_off>]]" chunks */
processed = sscanf(str, PH_ADDR_SCAN_FMT,
&base, &consumed,
- &ctrl_off, &data_off, &consumed);
+ &ctrl_off, &data_off, &consumed,
+ &dma_off, &consumed);
- /* sscanf() must process precisely 1 or 3 chunks:
+ /* sscanf() must process precisely 1, 3 or 4 chunks:
* <base> is mandatory, optionally followed by <ctrl_off>
- * and <data_off>;
+ * and <data_off>, and <dma_off>;
* there must be no extra characters after the last chunk,
* so str[consumed] must be '\0'.
*/
if (str[consumed] ||
- (processed != 1 && processed != 3))
+ (processed != 1 && processed != 3 && process != 4))
return -EINVAL;
res[0].start = base;
@@ -687,6 +704,11 @@ static int fw_cfg_cmdline_set(const char *arg, const struct kernel_param *kp)
res[2].start = data_off;
res[2].flags = IORESOURCE_REG;
}
+ if (processed > 3) {
+ res[3].name = "dma";
+ res[3].start = dma_off;
+ res[3].flags = IORESOURCE_REG;
+ }
/* "processed" happens to nicely match the number of resources
* we need to pass in to this platform device.
@@ -721,6 +743,13 @@ static int fw_cfg_cmdline_get(char *buf, const struct kernel_param *kp)
fw_cfg_cmdline_dev->resource[0].start,
fw_cfg_cmdline_dev->resource[1].start,
fw_cfg_cmdline_dev->resource[2].start);
+ case 4:
+ return snprintf(buf, PAGE_SIZE, PH_ADDR_PR_4_FMT,
+ resource_size(&fw_cfg_cmdline_dev->resource[0]),
+ fw_cfg_cmdline_dev->resource[0].start,
+ fw_cfg_cmdline_dev->resource[1].start,
+ fw_cfg_cmdline_dev->resource[2].start,
+ fw_cfg_cmdline_dev->resource[3].start);
}
/* Should never get here */
--
2.12.0.191.gc5d8de91d
From: Marc-André Lureau <[email protected]>
Modify fw_cfg_read_blob() to use DMA if the device supports it.
Return errors, because the operation may fail.
This is a proof-of-concept patch with some FIXME. It uses yield() to
wait for the memory to be cleared, and it uses 2 iowrite32() to write a
64bit value. Help on how to improve this is welcome.
We may also want to switch the *buf address to use only kmalloc'ed
buffer (instead of allowing stack/image addresses with dma=false).
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 125 +++++++++++++++++++++++++++++++++++------
1 file changed, 109 insertions(+), 16 deletions(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 614037703530..430289332c95 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/io.h>
#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
MODULE_AUTHOR("Gabriel L. Somlo <[email protected]>");
MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
@@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
#define FW_CFG_ID 0x01
#define FW_CFG_FILE_DIR 0x19
+#define FW_CFG_VERSION_DMA 2
+#define FW_CFG_DMA_CTL_ERROR 0x01
+#define FW_CFG_DMA_CTL_READ 0x02
+#define FW_CFG_DMA_CTL_SKIP 0x04
+#define FW_CFG_DMA_CTL_SELECT 0x08
+#define FW_CFG_DMA_CTL_WRITE 0x10
+
/* size in bytes of fw_cfg signature */
#define FW_CFG_SIG_SIZE 4
/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
#define FW_CFG_MAX_FILE_PATH 56
+/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
+static u32 fw_cfg_rev;
+
/* fw_cfg file directory entry type */
struct fw_cfg_file {
u32 size;
@@ -57,6 +68,12 @@ struct fw_cfg_file {
char name[FW_CFG_MAX_FILE_PATH];
};
+struct fw_cfg_dma {
+ u32 control;
+ u32 length;
+ u64 address;
+} __packed;
+
/* fw_cfg device i/o register addresses */
static bool fw_cfg_is_mmio;
static phys_addr_t fw_cfg_p_base;
@@ -75,12 +92,73 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
}
+static inline bool fw_cfg_dma_enabled(void)
+{
+ return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
+}
+
+static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
+{
+ dma_addr_t dma_addr = 0;
+ struct fw_cfg_dma *d;
+ dma_addr_t dma;
+ ssize_t ret = length;
+ enum dma_data_direction dir =
+ (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+
+ if (address && length) {
+ dma_addr = dma_map_single(NULL, address, length, dir);
+ if (dma_mapping_error(NULL, dma_addr)) {
+ WARN(1, "fw_cfg_dma_transfer: failed to map address\n");
+ return -EFAULT;
+ }
+ }
+
+ d = kmalloc(sizeof(*d), GFP_KERNEL | GFP_DMA);
+ if (!d) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ dma = dma_map_single(NULL, d, sizeof(*d), DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(NULL, dma)) {
+ WARN(1, "fw_cfg_dma_transfer: failed to map fw_cfg_dma\n");
+ ret = -EFAULT;
+ goto end;
+ }
+
+ *d = (struct fw_cfg_dma) {
+ .address = cpu_to_be64(dma_addr),
+ .length = cpu_to_be32(length),
+ .control = cpu_to_be32(control)
+ };
+ /* fixme: no iowrite64? */
+ iowrite32(cpu_to_be32(dma >> 32), fw_cfg_reg_dma);
+ iowrite32(cpu_to_be32(dma), fw_cfg_reg_dma + 4);
+ while (be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR)
+ yield(); /* fixme: wait_event? */
+
+ if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR)
+ ret = -EIO;
+
+ dma_unmap_single(NULL, dma, sizeof(*d), DMA_BIDIRECTIONAL);
+
+end:
+ kfree(d);
+ if (dma_addr)
+ dma_unmap_single(NULL, dma_addr, length, dir);
+
+ return ret;
+}
+
/* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
-static inline void fw_cfg_read_blob(u16 key,
- void *buf, loff_t pos, size_t count)
+static ssize_t fw_cfg_read_blob(u16 key,
+ void *buf, loff_t pos, size_t count,
+ bool dma)
{
u32 glk = -1U;
acpi_status status;
+ ssize_t ret = count;
/* If we have ACPI, ensure mutual exclusion against any potential
* device access by the firmware, e.g. via AML methods:
@@ -90,17 +168,36 @@ static inline void fw_cfg_read_blob(u16 key,
/* Should never get here */
WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
memset(buf, 0, count);
- return;
+ return -EBUSY;
}
mutex_lock(&fw_cfg_dev_lock);
- iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
- while (pos-- > 0)
- ioread8(fw_cfg_reg_data);
- ioread8_rep(fw_cfg_reg_data, buf, count);
+ if (dma && fw_cfg_dma_enabled()) {
+ if (pos == 0) {
+ ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_READ);
+ } else {
+ iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+ ret = fw_cfg_dma_transfer(0, pos, FW_CFG_DMA_CTL_SKIP);
+ if (ret < 0)
+ goto end;
+ ret = fw_cfg_dma_transfer(buf, count,
+ FW_CFG_DMA_CTL_READ);
+ }
+ } else {
+ iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+ while (pos-- > 0)
+ ioread8(fw_cfg_reg_data);
+ ioread8_rep(fw_cfg_reg_data, buf, count);
+ }
+
+end:
mutex_unlock(&fw_cfg_dev_lock);
acpi_release_global_lock(glk);
+
+ return ret;
}
/* clean up fw_cfg device i/o */
@@ -192,7 +289,7 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
#endif
/* verify fw_cfg device signature */
- fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
+ fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE, false);
if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
fw_cfg_io_cleanup();
return -ENODEV;
@@ -201,9 +298,6 @@ static int fw_cfg_do_platform_probe(struct platform_device *pdev)
return 0;
}
-/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
-static u32 fw_cfg_rev;
-
static ssize_t fw_cfg_showrev(struct kobject *k, struct attribute *a, char *buf)
{
return sprintf(buf, "%u\n", fw_cfg_rev);
@@ -351,8 +445,7 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj,
if (count > entry->f.size - pos)
count = entry->f.size - pos;
- fw_cfg_read_blob(entry->f.select, buf, pos, count);
- return count;
+ return fw_cfg_read_blob(entry->f.select, buf, pos, count, true);
}
static struct bin_attribute fw_cfg_sysfs_attr_raw = {
@@ -505,7 +598,7 @@ static int fw_cfg_register_dir_entries(void)
struct fw_cfg_file *dir;
size_t dir_size;
- fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(count));
+ fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(count), false);
count = be32_to_cpu(count);
dir_size = count * sizeof(struct fw_cfg_file);
@@ -513,7 +606,7 @@ static int fw_cfg_register_dir_entries(void)
if (!dir)
return -ENOMEM;
- fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size);
+ fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(count), dir_size, true);
for (i = 0; i < count; i++) {
dir[i].size = be32_to_cpu(dir[i].size);
@@ -562,7 +655,7 @@ static int fw_cfg_sysfs_probe(struct platform_device *pdev)
goto err_probe;
/* get revision number, add matching top-level attribute */
- fw_cfg_read_blob(FW_CFG_ID, &fw_cfg_rev, 0, sizeof(fw_cfg_rev));
+ fw_cfg_read_blob(FW_CFG_ID, &fw_cfg_rev, 0, sizeof(fw_cfg_rev), false);
fw_cfg_rev = le32_to_cpu(fw_cfg_rev);
err = sysfs_create_file(fw_cfg_top_ko, &fw_cfg_rev_attr.attr);
if (err)
--
2.12.0.191.gc5d8de91d
From: Marc-André Lureau <[email protected]>
Since qemu 2.9, DMA write operations are allowed. However, usage of this
interface from kernel or user-space is strongly discouraged by the
maintainers. This patch is only meant as an experiment.
Signed-off-by: Marc-André Lureau <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 64 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 62 insertions(+), 2 deletions(-)
diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 430289332c95..7da47fffe833 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -104,7 +104,8 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
dma_addr_t dma;
ssize_t ret = length;
enum dma_data_direction dir =
- (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0);
+ (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
+ (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
if (address && length) {
dma_addr = dma_map_single(NULL, address, length, dir);
@@ -200,6 +201,46 @@ static ssize_t fw_cfg_read_blob(u16 key,
return ret;
}
+/* write chunk of given fw_cfg blob (caller responsible for sanity-check) */
+static ssize_t fw_cfg_write_blob(u16 key,
+ void *buf, loff_t pos, size_t count)
+{
+ u32 glk = -1U;
+ acpi_status status;
+ ssize_t ret = count;
+
+ /* If we have ACPI, ensure mutual exclusion against any potential
+ * device access by the firmware, e.g. via AML methods:
+ */
+ status = acpi_acquire_global_lock(ACPI_WAIT_FOREVER, &glk);
+ if (ACPI_FAILURE(status) && status != AE_NOT_CONFIGURED) {
+ /* Should never get here */
+ WARN(1, "fw_cfg_write_blob: Failed to lock ACPI!\n");
+ memset(buf, 0, count);
+ return -EBUSY;
+ }
+
+ mutex_lock(&fw_cfg_dev_lock);
+ if (pos == 0) {
+ ret = fw_cfg_dma_transfer(buf, count, key << 16
+ | FW_CFG_DMA_CTL_SELECT
+ | FW_CFG_DMA_CTL_WRITE);
+ } else {
+ iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+ ret = fw_cfg_dma_transfer(0, pos, FW_CFG_DMA_CTL_SKIP);
+ if (ret < 0)
+ goto end;
+ ret = fw_cfg_dma_transfer(buf, count, FW_CFG_DMA_CTL_WRITE);
+ }
+
+end:
+ mutex_unlock(&fw_cfg_dev_lock);
+
+ acpi_release_global_lock(glk);
+
+ return ret;
+}
+
/* clean up fw_cfg device i/o */
static void fw_cfg_io_cleanup(void)
{
@@ -448,9 +489,28 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file *filp, struct kobject *kobj,
return fw_cfg_read_blob(entry->f.select, buf, pos, count, true);
}
+static ssize_t fw_cfg_sysfs_write_raw(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
+{
+ struct fw_cfg_sysfs_entry *entry = to_entry(kobj);
+
+ if (!fw_cfg_dma_enabled())
+ return -ENOTSUPP;
+
+ if (pos >= entry->f.size && count)
+ return -EINVAL;
+
+ if (count > entry->f.size - pos)
+ count = entry->f.size - pos;
+
+ return fw_cfg_write_blob(entry->f.select, buf, pos, count);
+}
+
static struct bin_attribute fw_cfg_sysfs_attr_raw = {
- .attr = { .name = "raw", .mode = S_IRUSR },
+ .attr = { .name = "raw", .mode = 0600 },
.read = fw_cfg_sysfs_read_raw,
+ .write = fw_cfg_sysfs_write_raw,
};
/*
--
2.12.0.191.gc5d8de91d