2018-01-15 08:55:40

by Peter Xu

[permalink] [raw]
Subject: [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

fw_cfg device does not need IOMMU protection, so use physical addresses
always. That's how QEMU implements fw_cfg. Otherwise we'll see call
traces during boot when vIOMMU is enabled in guest:

[ 1.018306] ------------[ cut here ]------------
[ 1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 fw_cfg_dma_transfer+0x399/0x500
[ 1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
[ 1.018316] Modules linked in:
[ 1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.10.0-827.el7.x86_64 #1
[ 1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
[ 1.018322] Call Trace:
[ 1.018330] [<ffffffffafcf6b3a>] dump_stack+0x19/0x1b
[ 1.018334] [<ffffffffaf68fff8>] __warn+0xd8/0x100
[ 1.018336] [<ffffffffaf69007f>] warn_slowpath_fmt+0x5f/0x80
[ 1.018338] [<ffffffffafb67329>] fw_cfg_dma_transfer+0x399/0x500
[ 1.018340] [<ffffffffafb6753c>] fw_cfg_read_blob+0xac/0x1c0
[ 1.018342] [<ffffffffafb67720>] fw_cfg_register_dir_entries+0x80/0x450
[ 1.018344] [<ffffffffafb67d02>] fw_cfg_sysfs_probe+0x212/0x3f0
[ 1.018347] [<ffffffffafa70352>] platform_drv_probe+0x42/0x110
[ 1.018350] [<ffffffffafa6e002>] driver_probe_device+0xc2/0x3e0
[ 1.018352] [<ffffffffafa6e3f3>] __driver_attach+0x93/0xa0
[ 1.018354] [<ffffffffafa6e360>] ? __device_attach+0x40/0x40
[ 1.018359] [<ffffffffafa6bbd3>] bus_for_each_dev+0x73/0xc0
[ 1.018362] [<ffffffffafa6d97e>] driver_attach+0x1e/0x20
[ 1.018364] [<ffffffffafa6d420>] bus_add_driver+0x200/0x2d0
[ 1.018366] [<ffffffffb03c2041>] ? firmware_map_add_early+0x58/0x58
[ 1.018368] [<ffffffffafa6ea84>] driver_register+0x64/0xf0
[ 1.018370] [<ffffffffafa7013a>] __platform_driver_register+0x4a/0x50
[ 1.018372] [<ffffffffb03c2075>] fw_cfg_sysfs_init+0x34/0x61
[ 1.018376] [<ffffffffaf602108>] do_one_initcall+0xb8/0x230
[ 1.018379] [<ffffffffb036b34a>] kernel_init_freeable+0x17a/0x219
[ 1.018381] [<ffffffffb036ab19>] ? initcall_blacklist+0xb0/0xb0
[ 1.018383] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
[ 1.018385] [<ffffffffafce5b3e>] kernel_init+0xe/0xf0
[ 1.018388] [<ffffffffafd08898>] ret_from_fork+0x58/0x90
[ 1.018390] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
[ 1.018392] ---[ end trace d00a5b71608a8f59 ]---

Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
CC: Marc-André Lureau <[email protected]>
CC: Michael S. Tsirkin <[email protected]>
Signed-off-by: Peter Xu <[email protected]>
--

This is based on tree:
https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost

Please review, thanks.

Signed-off-by: Peter Xu <[email protected]>
---
drivers/firmware/qemu_fw_cfg.c | 37 ++++++++-----------------------------
1 file changed, 8 insertions(+), 29 deletions(-)

diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
index 6eb5d8f43c3e..62a44bc81a88 100644
--- a/drivers/firmware/qemu_fw_cfg.c
+++ b/drivers/firmware/qemu_fw_cfg.c
@@ -106,11 +106,12 @@ static inline bool fw_cfg_dma_enabled(void)
}

/* qemu fw_cfg device is sync today, but spec says it may become async */
-static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
+static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
{
do {
- dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
- if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
+ u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
+
+ if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
return;

usleep_range(50, 100);
@@ -119,21 +120,9 @@ static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)

static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
{
- dma_addr_t dma_addr = 0;
struct fw_cfg_dma *d = NULL;
- dma_addr_t dma;
ssize_t ret = length;
- enum dma_data_direction dir =
- (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
- (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
-
- if (address && length) {
- dma_addr = dma_map_single(dev, address, length, dir);
- if (dma_mapping_error(NULL, dma_addr)) {
- WARN(1, "%s: failed to map address\n", __func__);
- return -EFAULT;
- }
- }
+ phys_addr_t dma;

d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
@@ -142,34 +131,24 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
}

*d = (struct fw_cfg_dma) {
- .address = cpu_to_be64(dma_addr),
+ .address = cpu_to_be64(virt_to_phys(address)),
.length = cpu_to_be32(length),
.control = cpu_to_be32(control)
};

- dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
- if (dma_mapping_error(NULL, dma)) {
- WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
- ret = -EFAULT;
- goto end;
- }
+ dma = virt_to_phys(d);

iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
iowrite32be(dma, fw_cfg_reg_dma + 4);

- fw_cfg_wait_for_control(d, dma);
+ fw_cfg_wait_for_control(d);

if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
ret = -EIO;
}

- dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
-
end:
kfree(d);
- if (dma_addr)
- dma_unmap_single(dev, dma_addr, length, dir);
-
return ret;
}

--
2.14.3


2018-01-15 11:22:51

by Marc-Andre Lureau

[permalink] [raw]
Subject: Re: [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

Hi

On Mon, Jan 15, 2018 at 9:55 AM, Peter Xu <[email protected]> wrote:
> fw_cfg device does not need IOMMU protection, so use physical addresses
> always. That's how QEMU implements fw_cfg. Otherwise we'll see call
> traces during boot when vIOMMU is enabled in guest:
>
> [ 1.018306] ------------[ cut here ]------------
> [ 1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 fw_cfg_dma_transfer+0x399/0x500
> [ 1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
> [ 1.018316] Modules linked in:
> [ 1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.10.0-827.el7.x86_64 #1
> [ 1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
> [ 1.018322] Call Trace:
> [ 1.018330] [<ffffffffafcf6b3a>] dump_stack+0x19/0x1b
> [ 1.018334] [<ffffffffaf68fff8>] __warn+0xd8/0x100
> [ 1.018336] [<ffffffffaf69007f>] warn_slowpath_fmt+0x5f/0x80
> [ 1.018338] [<ffffffffafb67329>] fw_cfg_dma_transfer+0x399/0x500
> [ 1.018340] [<ffffffffafb6753c>] fw_cfg_read_blob+0xac/0x1c0
> [ 1.018342] [<ffffffffafb67720>] fw_cfg_register_dir_entries+0x80/0x450
> [ 1.018344] [<ffffffffafb67d02>] fw_cfg_sysfs_probe+0x212/0x3f0
> [ 1.018347] [<ffffffffafa70352>] platform_drv_probe+0x42/0x110
> [ 1.018350] [<ffffffffafa6e002>] driver_probe_device+0xc2/0x3e0
> [ 1.018352] [<ffffffffafa6e3f3>] __driver_attach+0x93/0xa0
> [ 1.018354] [<ffffffffafa6e360>] ? __device_attach+0x40/0x40
> [ 1.018359] [<ffffffffafa6bbd3>] bus_for_each_dev+0x73/0xc0
> [ 1.018362] [<ffffffffafa6d97e>] driver_attach+0x1e/0x20
> [ 1.018364] [<ffffffffafa6d420>] bus_add_driver+0x200/0x2d0
> [ 1.018366] [<ffffffffb03c2041>] ? firmware_map_add_early+0x58/0x58
> [ 1.018368] [<ffffffffafa6ea84>] driver_register+0x64/0xf0
> [ 1.018370] [<ffffffffafa7013a>] __platform_driver_register+0x4a/0x50
> [ 1.018372] [<ffffffffb03c2075>] fw_cfg_sysfs_init+0x34/0x61
> [ 1.018376] [<ffffffffaf602108>] do_one_initcall+0xb8/0x230
> [ 1.018379] [<ffffffffb036b34a>] kernel_init_freeable+0x17a/0x219
> [ 1.018381] [<ffffffffb036ab19>] ? initcall_blacklist+0xb0/0xb0
> [ 1.018383] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
> [ 1.018385] [<ffffffffafce5b3e>] kernel_init+0xe/0xf0
> [ 1.018388] [<ffffffffafd08898>] ret_from_fork+0x58/0x90
> [ 1.018390] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
> [ 1.018392] ---[ end trace d00a5b71608a8f59 ]---
>
> Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
> Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
> CC: Marc-André Lureau <[email protected]>
> CC: Michael S. Tsirkin <[email protected]>
> Signed-off-by: Peter Xu <[email protected]>
> --
>
> This is based on tree:
> https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost
>
> Please review, thanks.
>
> Signed-off-by: Peter Xu <[email protected]>

The DMA business is confusing, sadly I didn't get much clue what I was
supposed to do. What I can say:

Tested-by: Marc-André Lureau <[email protected]>

Should the series be removed from Michael tree and I squash your fix &
send a v10?

Fwiw, "fw_cfg: write vmcoreinfo details" should also be fixed to
allocate memory (unless your approach fixes that?)

thanks

> ---
> drivers/firmware/qemu_fw_cfg.c | 37 ++++++++-----------------------------
> 1 file changed, 8 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 6eb5d8f43c3e..62a44bc81a88 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -106,11 +106,12 @@ static inline bool fw_cfg_dma_enabled(void)
> }
>
> /* qemu fw_cfg device is sync today, but spec says it may become async */
> -static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
> {
> do {
> - dma_sync_single_for_cpu(dev, dma, sizeof(*d), DMA_FROM_DEVICE);
> - if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
> +
> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
> return;
>
> usleep_range(50, 100);
> @@ -119,21 +120,9 @@ static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
>
> static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> {
> - dma_addr_t dma_addr = 0;
> struct fw_cfg_dma *d = NULL;
> - dma_addr_t dma;
> ssize_t ret = length;
> - enum dma_data_direction dir =
> - (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
> - (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
> -
> - if (address && length) {
> - dma_addr = dma_map_single(dev, address, length, dir);
> - if (dma_mapping_error(NULL, dma_addr)) {
> - WARN(1, "%s: failed to map address\n", __func__);
> - return -EFAULT;
> - }
> - }
> + phys_addr_t dma;
>
> d = kmalloc(sizeof(*d), GFP_KERNEL);
> if (!d) {
> @@ -142,34 +131,24 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
> }
>
> *d = (struct fw_cfg_dma) {
> - .address = cpu_to_be64(dma_addr),
> + .address = cpu_to_be64(virt_to_phys(address)),
> .length = cpu_to_be32(length),
> .control = cpu_to_be32(control)
> };
>
> - dma = dma_map_single(dev, d, sizeof(*d), DMA_BIDIRECTIONAL);
> - if (dma_mapping_error(NULL, dma)) {
> - WARN(1, "%s: failed to map fw_cfg_dma\n", __func__);
> - ret = -EFAULT;
> - goto end;
> - }
> + dma = virt_to_phys(d);
>
> iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
> iowrite32be(dma, fw_cfg_reg_dma + 4);
>
> - fw_cfg_wait_for_control(d, dma);
> + fw_cfg_wait_for_control(d);
>
> if (be32_to_cpu(d->control) & FW_CFG_DMA_CTL_ERROR) {
> ret = -EIO;
> }
>
> - dma_unmap_single(dev, dma, sizeof(*d), DMA_BIDIRECTIONAL);
> -
> end:
> kfree(d);
> - if (dma_addr)
> - dma_unmap_single(dev, dma_addr, length, dir);
> -
> return ret;
> }
>
> --
> 2.14.3
>

2018-01-16 06:11:12

by Peter Xu

[permalink] [raw]
Subject: Re: [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

On Mon, Jan 15, 2018 at 12:22:48PM +0100, Marc-Andre Lureau wrote:
> Hi
>
> On Mon, Jan 15, 2018 at 9:55 AM, Peter Xu <[email protected]> wrote:
> > fw_cfg device does not need IOMMU protection, so use physical addresses
> > always. That's how QEMU implements fw_cfg. Otherwise we'll see call
> > traces during boot when vIOMMU is enabled in guest:
> >
> > [ 1.018306] ------------[ cut here ]------------
> > [ 1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 fw_cfg_dma_transfer+0x399/0x500
> > [ 1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
> > [ 1.018316] Modules linked in:
> > [ 1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.10.0-827.el7.x86_64 #1
> > [ 1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
> > [ 1.018322] Call Trace:
> > [ 1.018330] [<ffffffffafcf6b3a>] dump_stack+0x19/0x1b
> > [ 1.018334] [<ffffffffaf68fff8>] __warn+0xd8/0x100
> > [ 1.018336] [<ffffffffaf69007f>] warn_slowpath_fmt+0x5f/0x80
> > [ 1.018338] [<ffffffffafb67329>] fw_cfg_dma_transfer+0x399/0x500
> > [ 1.018340] [<ffffffffafb6753c>] fw_cfg_read_blob+0xac/0x1c0
> > [ 1.018342] [<ffffffffafb67720>] fw_cfg_register_dir_entries+0x80/0x450
> > [ 1.018344] [<ffffffffafb67d02>] fw_cfg_sysfs_probe+0x212/0x3f0
> > [ 1.018347] [<ffffffffafa70352>] platform_drv_probe+0x42/0x110
> > [ 1.018350] [<ffffffffafa6e002>] driver_probe_device+0xc2/0x3e0
> > [ 1.018352] [<ffffffffafa6e3f3>] __driver_attach+0x93/0xa0
> > [ 1.018354] [<ffffffffafa6e360>] ? __device_attach+0x40/0x40
> > [ 1.018359] [<ffffffffafa6bbd3>] bus_for_each_dev+0x73/0xc0
> > [ 1.018362] [<ffffffffafa6d97e>] driver_attach+0x1e/0x20
> > [ 1.018364] [<ffffffffafa6d420>] bus_add_driver+0x200/0x2d0
> > [ 1.018366] [<ffffffffb03c2041>] ? firmware_map_add_early+0x58/0x58
> > [ 1.018368] [<ffffffffafa6ea84>] driver_register+0x64/0xf0
> > [ 1.018370] [<ffffffffafa7013a>] __platform_driver_register+0x4a/0x50
> > [ 1.018372] [<ffffffffb03c2075>] fw_cfg_sysfs_init+0x34/0x61
> > [ 1.018376] [<ffffffffaf602108>] do_one_initcall+0xb8/0x230
> > [ 1.018379] [<ffffffffb036b34a>] kernel_init_freeable+0x17a/0x219
> > [ 1.018381] [<ffffffffb036ab19>] ? initcall_blacklist+0xb0/0xb0
> > [ 1.018383] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
> > [ 1.018385] [<ffffffffafce5b3e>] kernel_init+0xe/0xf0
> > [ 1.018388] [<ffffffffafd08898>] ret_from_fork+0x58/0x90
> > [ 1.018390] [<ffffffffafce5b30>] ? rest_init+0x80/0x80
> > [ 1.018392] ---[ end trace d00a5b71608a8f59 ]---
> >
> > Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
> > Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
> > CC: Marc-André Lureau <[email protected]>
> > CC: Michael S. Tsirkin <[email protected]>
> > Signed-off-by: Peter Xu <[email protected]>
> > --
> >
> > This is based on tree:
> > https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost
> >
> > Please review, thanks.
> >
> > Signed-off-by: Peter Xu <[email protected]>
>
> The DMA business is confusing, sadly I didn't get much clue what I was
> supposed to do. What I can say:
>
> Tested-by: Marc-André Lureau <[email protected]>

Thanks for confirming this.

>
> Should the series be removed from Michael tree and I squash your fix &
> send a v10?
>
> Fwiw, "fw_cfg: write vmcoreinfo details" should also be fixed to
> allocate memory (unless your approach fixes that?)

Yes, IMHO this patch should also work for writes (though not tested).

Thanks,

--
Peter Xu