2018-08-27 09:37:39

by Gerd Hoffmann

[permalink] [raw]
Subject: [PATCH v7] Add udmabuf misc device

A driver to let userspace turn memfd regions into dma-bufs.

Use case: Allows qemu create dmabufs for the vga framebuffer or
virtio-gpu ressources. Then they can be passed around to display
those guest things on the host. To spice client for classic full
framebuffer display, and hopefully some day to wayland server for
seamless guest window display.

qemu test branch:
https://git.kraxel.org/cgit/qemu/log/?h=sirius/udmabuf

Cc: David Airlie <[email protected]>
Cc: Tomeu Vizoso <[email protected]>
Cc: Laurent Pinchart <[email protected]>
Cc: Daniel Vetter <[email protected]>
Signed-off-by: Gerd Hoffmann <[email protected]>
---
Documentation/ioctl/ioctl-number.txt | 1 +
include/uapi/linux/udmabuf.h | 33 +++
drivers/dma-buf/udmabuf.c | 287 ++++++++++++++++++++++
tools/testing/selftests/drivers/dma-buf/udmabuf.c | 96 ++++++++
MAINTAINERS | 16 ++
drivers/dma-buf/Kconfig | 8 +
drivers/dma-buf/Makefile | 1 +
tools/testing/selftests/drivers/dma-buf/Makefile | 5 +
8 files changed, 447 insertions(+)
create mode 100644 include/uapi/linux/udmabuf.h
create mode 100644 drivers/dma-buf/udmabuf.c
create mode 100644 tools/testing/selftests/drivers/dma-buf/udmabuf.c
create mode 100644 tools/testing/selftests/drivers/dma-buf/Makefile

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 13a7c999c0..f2ac672eb7 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -272,6 +272,7 @@ Code Seq#(hex) Include File Comments
't' 90-91 linux/toshiba.h toshiba and toshiba_acpi SMM
'u' 00-1F linux/smb_fs.h gone
'u' 20-3F linux/uvcvideo.h USB video class host driver
+'u' 40-4f linux/udmabuf.h userspace dma-buf misc device
'v' 00-1F linux/ext2_fs.h conflict!
'v' 00-1F linux/fs.h conflict!
'v' 00-0F linux/sonypi.h conflict!
diff --git a/include/uapi/linux/udmabuf.h b/include/uapi/linux/udmabuf.h
new file mode 100644
index 0000000000..46b6532ed8
--- /dev/null
+++ b/include/uapi/linux/udmabuf.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_UDMABUF_H
+#define _UAPI_LINUX_UDMABUF_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define UDMABUF_FLAGS_CLOEXEC 0x01
+
+struct udmabuf_create {
+ __u32 memfd;
+ __u32 flags;
+ __u64 offset;
+ __u64 size;
+};
+
+struct udmabuf_create_item {
+ __u32 memfd;
+ __u32 __pad;
+ __u64 offset;
+ __u64 size;
+};
+
+struct udmabuf_create_list {
+ __u32 flags;
+ __u32 count;
+ struct udmabuf_create_item list[];
+};
+
+#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
+#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct udmabuf_create_list)
+
+#endif /* _UAPI_LINUX_UDMABUF_H */
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
new file mode 100644
index 0000000000..8e24204526
--- /dev/null
+++ b/drivers/dma-buf/udmabuf.c
@@ -0,0 +1,287 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/cred.h>
+#include <linux/shmem_fs.h>
+#include <linux/memfd.h>
+
+#include <uapi/linux/udmabuf.h>
+
+struct udmabuf {
+ u32 pagecount;
+ struct page **pages;
+};
+
+static int udmabuf_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct udmabuf *ubuf = vma->vm_private_data;
+
+ if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = ubuf->pages[vmf->pgoff];
+ get_page(vmf->page);
+ return 0;
+}
+
+static const struct vm_operations_struct udmabuf_vm_ops = {
+ .fault = udmabuf_vm_fault,
+};
+
+static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
+{
+ struct udmabuf *ubuf = buf->priv;
+
+ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
+ return -EINVAL;
+
+ vma->vm_ops = &udmabuf_vm_ops;
+ vma->vm_private_data = ubuf;
+ return 0;
+}
+
+static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
+ enum dma_data_direction direction)
+{
+ struct udmabuf *ubuf = at->dmabuf->priv;
+ struct sg_table *sg;
+
+ sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ goto err1;
+ if (sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
+ 0, ubuf->pagecount << PAGE_SHIFT,
+ GFP_KERNEL) < 0)
+ goto err2;
+ if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
+ goto err3;
+
+ return sg;
+
+err3:
+ sg_free_table(sg);
+err2:
+ kfree(sg);
+err1:
+ return ERR_PTR(-ENOMEM);
+}
+
+static void unmap_udmabuf(struct dma_buf_attachment *at,
+ struct sg_table *sg,
+ enum dma_data_direction direction)
+{
+ sg_free_table(sg);
+ kfree(sg);
+}
+
+static void release_udmabuf(struct dma_buf *buf)
+{
+ struct udmabuf *ubuf = buf->priv;
+ pgoff_t pg;
+
+ for (pg = 0; pg < ubuf->pagecount; pg++)
+ put_page(ubuf->pages[pg]);
+ kfree(ubuf->pages);
+ kfree(ubuf);
+}
+
+static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
+{
+ struct udmabuf *ubuf = buf->priv;
+ struct page *page = ubuf->pages[page_num];
+
+ return kmap(page);
+}
+
+static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
+ void *vaddr)
+{
+ kunmap(vaddr);
+}
+
+static struct dma_buf_ops udmabuf_ops = {
+ .map_dma_buf = map_udmabuf,
+ .unmap_dma_buf = unmap_udmabuf,
+ .release = release_udmabuf,
+ .map = kmap_udmabuf,
+ .unmap = kunmap_udmabuf,
+ .mmap = mmap_udmabuf,
+};
+
+#define SEALS_WANTED (F_SEAL_SHRINK)
+#define SEALS_DENIED (F_SEAL_WRITE)
+
+static long udmabuf_create(struct udmabuf_create_list *head,
+ struct udmabuf_create_item *list)
+{
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ struct file *memfd = NULL;
+ struct udmabuf *ubuf;
+ struct dma_buf *buf;
+ pgoff_t pgoff, pgcnt, pgidx, pgbuf;
+ struct page *page;
+ int seals, ret = -EINVAL;
+ u32 i, flags;
+
+ ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
+ if (!ubuf)
+ return -ENOMEM;
+
+ for (i = 0; i < head->count; i++) {
+ if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
+ goto err_free_ubuf;
+ if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
+ goto err_free_ubuf;
+ ubuf->pagecount += list[i].size >> PAGE_SHIFT;
+ }
+ ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ubuf->pages) {
+ ret = -ENOMEM;
+ goto err_free_ubuf;
+ }
+
+ pgbuf = 0;
+ for (i = 0; i < head->count; i++) {
+ memfd = fget(list[i].memfd);
+ if (!memfd)
+ goto err_put_pages;
+ if (!shmem_mapping(file_inode(memfd)->i_mapping))
+ goto err_put_pages;
+ seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
+ if (seals == -EINVAL ||
+ (seals & SEALS_WANTED) != SEALS_WANTED ||
+ (seals & SEALS_DENIED) != 0)
+ goto err_put_pages;
+ pgoff = list[i].offset >> PAGE_SHIFT;
+ pgcnt = list[i].size >> PAGE_SHIFT;
+ for (pgidx = 0; pgidx < pgcnt; pgidx++) {
+ page = shmem_read_mapping_page(
+ file_inode(memfd)->i_mapping, pgoff + pgidx);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto err_put_pages;
+ }
+ ubuf->pages[pgbuf++] = page;
+ }
+ fput(memfd);
+ }
+ memfd = NULL;
+
+ exp_info.ops = &udmabuf_ops;
+ exp_info.size = ubuf->pagecount << PAGE_SHIFT;
+ exp_info.priv = ubuf;
+
+ buf = dma_buf_export(&exp_info);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ goto err_put_pages;
+ }
+
+ flags = 0;
+ if (head->flags & UDMABUF_FLAGS_CLOEXEC)
+ flags |= O_CLOEXEC;
+ return dma_buf_fd(buf, flags);
+
+err_put_pages:
+ while (pgbuf > 0)
+ put_page(ubuf->pages[--pgbuf]);
+err_free_ubuf:
+ fput(memfd);
+ kfree(ubuf->pages);
+ kfree(ubuf);
+ return ret;
+}
+
+static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
+{
+ struct udmabuf_create create;
+ struct udmabuf_create_list head;
+ struct udmabuf_create_item list;
+
+ if (copy_from_user(&create, (void __user *)arg,
+ sizeof(struct udmabuf_create)))
+ return -EFAULT;
+
+ head.flags = create.flags;
+ head.count = 1;
+ list.memfd = create.memfd;
+ list.offset = create.offset;
+ list.size = create.size;
+
+ return udmabuf_create(&head, &list);
+}
+
+static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
+{
+ struct udmabuf_create_list head;
+ struct udmabuf_create_item *list;
+ int ret = -EINVAL;
+ u32 lsize;
+
+ if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
+ return -EFAULT;
+ if (head.count > 1024)
+ return -EINVAL;
+ lsize = sizeof(struct udmabuf_create_item) * head.count;
+ list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
+ if (IS_ERR(list))
+ return PTR_ERR(list);
+
+ ret = udmabuf_create(&head, list);
+ kfree(list);
+ return ret;
+}
+
+static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
+{
+ long ret;
+
+ switch (ioctl) {
+ case UDMABUF_CREATE:
+ ret = udmabuf_ioctl_create(filp, arg);
+ break;
+ case UDMABUF_CREATE_LIST:
+ ret = udmabuf_ioctl_create_list(filp, arg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static const struct file_operations udmabuf_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = udmabuf_ioctl,
+};
+
+static struct miscdevice udmabuf_misc = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "udmabuf",
+ .fops = &udmabuf_fops,
+};
+
+static int __init udmabuf_dev_init(void)
+{
+ return misc_register(&udmabuf_misc);
+}
+
+static void __exit udmabuf_dev_exit(void)
+{
+ misc_deregister(&udmabuf_misc);
+}
+
+module_init(udmabuf_dev_init)
+module_exit(udmabuf_dev_exit)
+
+MODULE_AUTHOR("Gerd Hoffmann <[email protected]>");
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
new file mode 100644
index 0000000000..376b1d6730
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <malloc.h>
+
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <linux/memfd.h>
+#include <linux/udmabuf.h>
+
+#define TEST_PREFIX "drivers/dma-buf/udmabuf"
+#define NUM_PAGES 4
+
+static int memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
+int main(int argc, char *argv[])
+{
+ struct udmabuf_create create;
+ int devfd, memfd, buf, ret;
+ off_t size;
+ void *mem;
+
+ devfd = open("/dev/udmabuf", O_RDWR);
+ if (devfd < 0) {
+ printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
+ exit(77);
+ }
+
+ memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
+ if (memfd < 0) {
+ printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
+ exit(77);
+ }
+
+ size = getpagesize() * NUM_PAGES;
+ ret = ftruncate(memfd, size);
+ if (ret == -1) {
+ printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ exit(1);
+ }
+
+ memset(&create, 0, sizeof(create));
+
+ /* should fail (offset not page aligned) */
+ create.memfd = memfd;
+ create.offset = getpagesize()/2;
+ create.size = getpagesize();
+ buf = ioctl(devfd, UDMABUF_CREATE, &create);
+ if (buf >= 0) {
+ printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
+ exit(1);
+ }
+
+ /* should fail (size not multiple of page) */
+ create.memfd = memfd;
+ create.offset = 0;
+ create.size = getpagesize()/2;
+ buf = ioctl(devfd, UDMABUF_CREATE, &create);
+ if (buf >= 0) {
+ printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
+ exit(1);
+ }
+
+ /* should fail (not memfd) */
+ create.memfd = 0; /* stdin */
+ create.offset = 0;
+ create.size = size;
+ buf = ioctl(devfd, UDMABUF_CREATE, &create);
+ if (buf >= 0) {
+ printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
+ exit(1);
+ }
+
+ /* should work */
+ create.memfd = memfd;
+ create.offset = 0;
+ create.size = size;
+ buf = ioctl(devfd, UDMABUF_CREATE, &create);
+ if (buf < 0) {
+ printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
+ exit(1);
+ }
+
+ fprintf(stderr, "%s: ok\n", TEST_PREFIX);
+ close(buf);
+ close(memfd);
+ close(devfd);
+ return 0;
+}
diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b259..11a9b04277 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14934,6 +14934,14 @@ S: Maintained
F: Documentation/filesystems/udf.txt
F: fs/udf/

+UDMABUF DRIVER
+M: Gerd Hoffmann <[email protected]>
+L: [email protected]
+S: Maintained
+F: drivers/dma-buf/udmabuf.c
+F: include/uapi/linux/udmabuf.h
+F: tools/testing/selftests/drivers/dma-buf/udmabuf.c
+
UDRAW TABLET
M: Bastien Nocera <[email protected]>
L: [email protected]
@@ -15343,6 +15351,14 @@ F: arch/x86/um/
F: fs/hostfs/
F: fs/hppfs/

+USERSPACE DMA BUFFER DRIVER
+M: Gerd Hoffmann <[email protected]>
+S: Maintained
+L: [email protected]
+F: drivers/dma-buf/udmabuf.c
+F: include/uapi/linux/udmabuf.h
+T: git git://anongit.freedesktop.org/drm/drm-misc
+
USERSPACE I/O (UIO)
M: Greg Kroah-Hartman <[email protected]>
S: Maintained
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index ed3b785bae..338129eb12 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -30,4 +30,12 @@ config SW_SYNC
WARNING: improper use of this can result in deadlocking kernel
drivers from userspace. Intended for test and debug only.

+config UDMABUF
+ bool "userspace dmabuf misc driver"
+ default n
+ depends on DMA_SHARED_BUFFER
+ help
+ A driver to let userspace turn memfd regions into dma-bufs.
+ Qemu can use this to create host dmabufs for guest framebuffers.
+
endmenu
diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
index c33bf88631..0913a6ccab 100644
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@@ -1,3 +1,4 @@
obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
obj-$(CONFIG_SYNC_FILE) += sync_file.o
obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
+obj-$(CONFIG_UDMABUF) += udmabuf.o
diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
new file mode 100644
index 0000000000..4154c3d7aa
--- /dev/null
+++ b/tools/testing/selftests/drivers/dma-buf/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -I../../../../../usr/include/
+
+TEST_GEN_PROGS := udmabuf
+
+include ../../lib.mk
--
2.9.3



2018-08-31 08:54:04

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

On Mon, Aug 27, 2018 at 11:34:44AM +0200, Gerd Hoffmann wrote:
> A driver to let userspace turn memfd regions into dma-bufs.
>
> Use case: Allows qemu create dmabufs for the vga framebuffer or
> virtio-gpu ressources. Then they can be passed around to display
> those guest things on the host. To spice client for classic full
> framebuffer display, and hopefully some day to wayland server for
> seamless guest window display.
>
> qemu test branch:
> https://git.kraxel.org/cgit/qemu/log/?h=sirius/udmabuf
>
> Cc: David Airlie <[email protected]>
> Cc: Tomeu Vizoso <[email protected]>
> Cc: Laurent Pinchart <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Signed-off-by: Gerd Hoffmann <[email protected]>
> ---
> Documentation/ioctl/ioctl-number.txt | 1 +
> include/uapi/linux/udmabuf.h | 33 +++
> drivers/dma-buf/udmabuf.c | 287 ++++++++++++++++++++++
> tools/testing/selftests/drivers/dma-buf/udmabuf.c | 96 ++++++++
> MAINTAINERS | 16 ++
> drivers/dma-buf/Kconfig | 8 +
> drivers/dma-buf/Makefile | 1 +
> tools/testing/selftests/drivers/dma-buf/Makefile | 5 +
> 8 files changed, 447 insertions(+)
> create mode 100644 include/uapi/linux/udmabuf.h
> create mode 100644 drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/Makefile
>
> diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
> index 13a7c999c0..f2ac672eb7 100644
> --- a/Documentation/ioctl/ioctl-number.txt
> +++ b/Documentation/ioctl/ioctl-number.txt
> @@ -272,6 +272,7 @@ Code Seq#(hex) Include File Comments
> 't' 90-91 linux/toshiba.h toshiba and toshiba_acpi SMM
> 'u' 00-1F linux/smb_fs.h gone
> 'u' 20-3F linux/uvcvideo.h USB video class host driver
> +'u' 40-4f linux/udmabuf.h userspace dma-buf misc device
> 'v' 00-1F linux/ext2_fs.h conflict!
> 'v' 00-1F linux/fs.h conflict!
> 'v' 00-0F linux/sonypi.h conflict!
> diff --git a/include/uapi/linux/udmabuf.h b/include/uapi/linux/udmabuf.h
> new file mode 100644
> index 0000000000..46b6532ed8
> --- /dev/null
> +++ b/include/uapi/linux/udmabuf.h
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef _UAPI_LINUX_UDMABUF_H
> +#define _UAPI_LINUX_UDMABUF_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define UDMABUF_FLAGS_CLOEXEC 0x01
> +
> +struct udmabuf_create {
> + __u32 memfd;
> + __u32 flags;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_item {
> + __u32 memfd;
> + __u32 __pad;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_list {
> + __u32 flags;
> + __u32 count;
> + struct udmabuf_create_item list[];
> +};
> +
> +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
> +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct udmabuf_create_list)
> +
> +#endif /* _UAPI_LINUX_UDMABUF_H */
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> new file mode 100644
> index 0000000000..8e24204526
> --- /dev/null
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -0,0 +1,287 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/miscdevice.h>
> +#include <linux/dma-buf.h>
> +#include <linux/highmem.h>
> +#include <linux/cred.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/memfd.h>
> +
> +#include <uapi/linux/udmabuf.h>
> +
> +struct udmabuf {
> + u32 pagecount;
> + struct page **pages;
> +};
> +
> +static int udmabuf_vm_fault(struct vm_fault *vmf)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct udmabuf *ubuf = vma->vm_private_data;
> +
> + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
> + return VM_FAULT_SIGBUS;
> +
> + vmf->page = ubuf->pages[vmf->pgoff];
> + get_page(vmf->page);
> + return 0;
> +}
> +
> +static const struct vm_operations_struct udmabuf_vm_ops = {
> + .fault = udmabuf_vm_fault,
> +};
> +
> +static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
> +{
> + struct udmabuf *ubuf = buf->priv;
> +
> + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
> + return -EINVAL;
> +
> + vma->vm_ops = &udmabuf_vm_ops;
> + vma->vm_private_data = ubuf;
> + return 0;
> +}
> +
> +static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
> + enum dma_data_direction direction)
> +{
> + struct udmabuf *ubuf = at->dmabuf->priv;
> + struct sg_table *sg;
> +
> + sg = kzalloc(sizeof(*sg), GFP_KERNEL);
> + if (!sg)
> + goto err1;
> + if (sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
> + 0, ubuf->pagecount << PAGE_SHIFT,
> + GFP_KERNEL) < 0)
> + goto err2;
> + if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
> + goto err3;
> +
> + return sg;
> +
> +err3:
> + sg_free_table(sg);
> +err2:
> + kfree(sg);
> +err1:
> + return ERR_PTR(-ENOMEM);
> +}
> +
> +static void unmap_udmabuf(struct dma_buf_attachment *at,
> + struct sg_table *sg,
> + enum dma_data_direction direction)
> +{
> + sg_free_table(sg);
> + kfree(sg);
> +}
> +
> +static void release_udmabuf(struct dma_buf *buf)
> +{
> + struct udmabuf *ubuf = buf->priv;
> + pgoff_t pg;
> +
> + for (pg = 0; pg < ubuf->pagecount; pg++)
> + put_page(ubuf->pages[pg]);
> + kfree(ubuf->pages);
> + kfree(ubuf);
> +}
> +
> +static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
> +{
> + struct udmabuf *ubuf = buf->priv;
> + struct page *page = ubuf->pages[page_num];
> +
> + return kmap(page);
> +}
> +
> +static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
> + void *vaddr)
> +{
> + kunmap(vaddr);
> +}
> +
> +static struct dma_buf_ops udmabuf_ops = {
> + .map_dma_buf = map_udmabuf,
> + .unmap_dma_buf = unmap_udmabuf,
> + .release = release_udmabuf,
> + .map = kmap_udmabuf,
> + .unmap = kunmap_udmabuf,
> + .mmap = mmap_udmabuf,
> +};
> +
> +#define SEALS_WANTED (F_SEAL_SHRINK)
> +#define SEALS_DENIED (F_SEAL_WRITE)
> +
> +static long udmabuf_create(struct udmabuf_create_list *head,
> + struct udmabuf_create_item *list)
> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct file *memfd = NULL;
> + struct udmabuf *ubuf;
> + struct dma_buf *buf;
> + pgoff_t pgoff, pgcnt, pgidx, pgbuf;
> + struct page *page;
> + int seals, ret = -EINVAL;
> + u32 i, flags;
> +
> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
> + if (!ubuf)
> + return -ENOMEM;
> +
> + for (i = 0; i < head->count; i++) {
> + if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> + goto err_free_ubuf;
> + if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
> + goto err_free_ubuf;
> + ubuf->pagecount += list[i].size >> PAGE_SHIFT;
> + }
> + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(struct page *),
> + GFP_KERNEL);
> + if (!ubuf->pages) {
> + ret = -ENOMEM;
> + goto err_free_ubuf;
> + }
> +
> + pgbuf = 0;
> + for (i = 0; i < head->count; i++) {
> + memfd = fget(list[i].memfd);
> + if (!memfd)
> + goto err_put_pages;
> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> + goto err_put_pages;
> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> + if (seals == -EINVAL ||
> + (seals & SEALS_WANTED) != SEALS_WANTED ||
> + (seals & SEALS_DENIED) != 0)
> + goto err_put_pages;
> + pgoff = list[i].offset >> PAGE_SHIFT;
> + pgcnt = list[i].size >> PAGE_SHIFT;
> + for (pgidx = 0; pgidx < pgcnt; pgidx++) {
> + page = shmem_read_mapping_page(
> + file_inode(memfd)->i_mapping, pgoff + pgidx);
> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> + goto err_put_pages;
> + }
> + ubuf->pages[pgbuf++] = page;
> + }
> + fput(memfd);
> + }
> + memfd = NULL;
> +
> + exp_info.ops = &udmabuf_ops;
> + exp_info.size = ubuf->pagecount << PAGE_SHIFT;
> + exp_info.priv = ubuf;
> +
> + buf = dma_buf_export(&exp_info);
> + if (IS_ERR(buf)) {
> + ret = PTR_ERR(buf);
> + goto err_put_pages;
> + }
> +
> + flags = 0;
> + if (head->flags & UDMABUF_FLAGS_CLOEXEC)
> + flags |= O_CLOEXEC;
> + return dma_buf_fd(buf, flags);
> +
> +err_put_pages:
> + while (pgbuf > 0)
> + put_page(ubuf->pages[--pgbuf]);
> +err_free_ubuf:
> + fput(memfd);
> + kfree(ubuf->pages);
> + kfree(ubuf);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
> +{
> + struct udmabuf_create create;
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item list;
> +
> + if (copy_from_user(&create, (void __user *)arg,
> + sizeof(struct udmabuf_create)))
> + return -EFAULT;
> +
> + head.flags = create.flags;
> + head.count = 1;
> + list.memfd = create.memfd;
> + list.offset = create.offset;
> + list.size = create.size;
> +
> + return udmabuf_create(&head, &list);
> +}
> +
> +static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
> +{
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item *list;
> + int ret = -EINVAL;
> + u32 lsize;
> +
> + if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
> + return -EFAULT;
> + if (head.count > 1024)
> + return -EINVAL;
> + lsize = sizeof(struct udmabuf_create_item) * head.count;
> + list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
> + if (IS_ERR(list))
> + return PTR_ERR(list);
> +
> + ret = udmabuf_create(&head, list);
> + kfree(list);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
> + unsigned long arg)
> +{
> + long ret;
> +
> + switch (ioctl) {
> + case UDMABUF_CREATE:
> + ret = udmabuf_ioctl_create(filp, arg);
> + break;
> + case UDMABUF_CREATE_LIST:
> + ret = udmabuf_ioctl_create_list(filp, arg);
> + break;
> + default:
> + ret = -EINVAL;
> + break;
> + }
> + return ret;
> +}
> +
> +static const struct file_operations udmabuf_fops = {
> + .owner = THIS_MODULE,
> + .unlocked_ioctl = udmabuf_ioctl,
> +};
> +
> +static struct miscdevice udmabuf_misc = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "udmabuf",
> + .fops = &udmabuf_fops,
> +};
> +
> +static int __init udmabuf_dev_init(void)
> +{
> + return misc_register(&udmabuf_misc);
> +}
> +
> +static void __exit udmabuf_dev_exit(void)
> +{
> + misc_deregister(&udmabuf_misc);
> +}
> +
> +module_init(udmabuf_dev_init)
> +module_exit(udmabuf_dev_exit)
> +
> +MODULE_AUTHOR("Gerd Hoffmann <[email protected]>");
> +MODULE_LICENSE("GPL v2");
> diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> new file mode 100644
> index 0000000000..376b1d6730
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <malloc.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/syscall.h>
> +#include <linux/memfd.h>
> +#include <linux/udmabuf.h>
> +
> +#define TEST_PREFIX "drivers/dma-buf/udmabuf"
> +#define NUM_PAGES 4
> +
> +static int memfd_create(const char *name, unsigned int flags)
> +{
> + return syscall(__NR_memfd_create, name, flags);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + struct udmabuf_create create;
> + int devfd, memfd, buf, ret;
> + off_t size;
> + void *mem;
> +
> + devfd = open("/dev/udmabuf", O_RDWR);
> + if (devfd < 0) {
> + printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
> + if (memfd < 0) {
> + printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + size = getpagesize() * NUM_PAGES;
> + ret = ftruncate(memfd, size);
> + if (ret == -1) {
> + printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + memset(&create, 0, sizeof(create));
> +
> + /* should fail (offset not page aligned) */
> + create.memfd = memfd;
> + create.offset = getpagesize()/2;
> + create.size = getpagesize();
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (size not multiple of page) */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = getpagesize()/2;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (not memfd) */
> + create.memfd = 0; /* stdin */
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should work */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf < 0) {
> + printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + fprintf(stderr, "%s: ok\n", TEST_PREFIX);
> + close(buf);
> + close(memfd);
> + close(devfd);
> + return 0;
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a5b256b259..11a9b04277 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14934,6 +14934,14 @@ S: Maintained
> F: Documentation/filesystems/udf.txt
> F: fs/udf/
>
> +UDMABUF DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +L: [email protected]
> +S: Maintained
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +F: tools/testing/selftests/drivers/dma-buf/udmabuf.c

Dupe, remove this one (since only the 2nd one has the drm-misc git repo
entry).

Acked-by: Daniel Vetter <[email protected]>

> +
> UDRAW TABLET
> M: Bastien Nocera <[email protected]>
> L: [email protected]
> @@ -15343,6 +15351,14 @@ F: arch/x86/um/
> F: fs/hostfs/
> F: fs/hppfs/
>
> +USERSPACE DMA BUFFER DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +S: Maintained
> +L: [email protected]
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +T: git git://anongit.freedesktop.org/drm/drm-misc
> +
> USERSPACE I/O (UIO)
> M: Greg Kroah-Hartman <[email protected]>
> S: Maintained
> diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
> index ed3b785bae..338129eb12 100644
> --- a/drivers/dma-buf/Kconfig
> +++ b/drivers/dma-buf/Kconfig
> @@ -30,4 +30,12 @@ config SW_SYNC
> WARNING: improper use of this can result in deadlocking kernel
> drivers from userspace. Intended for test and debug only.
>
> +config UDMABUF
> + bool "userspace dmabuf misc driver"
> + default n
> + depends on DMA_SHARED_BUFFER
> + help
> + A driver to let userspace turn memfd regions into dma-bufs.
> + Qemu can use this to create host dmabufs for guest framebuffers.
> +
> endmenu
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index c33bf88631..0913a6ccab 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,3 +1,4 @@
> obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o seqno-fence.o
> obj-$(CONFIG_SYNC_FILE) += sync_file.o
> obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
> +obj-$(CONFIG_UDMABUF) += udmabuf.o
> diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
> new file mode 100644
> index 0000000000..4154c3d7aa
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/dma-buf/Makefile
> @@ -0,0 +1,5 @@
> +CFLAGS += -I../../../../../usr/include/
> +
> +TEST_GEN_PROGS := udmabuf
> +
> +include ../../lib.mk
> --
> 2.9.3
>

--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

2018-09-09 11:13:47

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

I am by no means a kernel expert, but Tomeu asked whether I could
review this. Generally the patch looks okay, it applied (on top of
4.18.5-gentoo), and compiled without problems.

However, running the experimental qemu branch I get a kernel bug:

BUG: unable to handle kernel NULL pointer dereference at
0000000000000038
[12100.605908] PGD 0 P4D 0
[12100.605918] Oops: 0002 [#1] SMP NOPTI
[12100.605926] CPU: 5 PID: 7247 Comm: qemu-system-x86 Not tainted
4.18.5-gentoo #1
[12100.605930] Hardware name: To be filled by O.E.M. To be filled by
O.E.M./M5A97 R2.0, BIOS 1302 11/14/2012
[12100.605942] RIP: 0010:fput+0x5/0x80
[12100.605945] Code: e4 ff 48 8b 3d b4 23 10 01 48 8b 34 24 48 83 c4 08
e9 6f 6c fd ff 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00
00 <f0> 48 ff 4f 38 75 51 53 48 89 fb 65 8b 05 e9 fe 9c 5f 65 48 8b 3c
[12100.606003] RSP: 0018:ffffbf00c94ffd90 EFLAGS: 00010246
[12100.606007] RAX: 0000000000000000 RBX: fffffffffffffff8 RCX:
ffff98336e000b00
[12100.606011] RDX: 0000000000000000 RSI: 0000000000004000 RDI:
0000000000000000
[12100.606014] RBP: 0000000000000000 R08: ffff983366504bc0 R09:
00000000ffffffff
[12100.606018] R10: 0000000000000000 R11: 0000000000000000 R12:
ffffffffffffffea
[12100.606021] R13: 0000000000000000 R14: ffff983100c5ca20 R15:
0000000000000000
[12100.606026] FS: 00007fe49ea8fbc0(0000) GS:ffff98338ed40000(0000)
knlGS:0000000000000000
[12100.606029] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[12100.606032] CR2: 0000000000000038 CR3: 00000003df65e000 CR4:
00000000000406e0
[12100.606036] Call Trace:
[12100.606046] udmabuf_create+0x220/0x310
[12100.606057] udmabuf_ioctl+0xf3/0x120
[12100.606064] do_vfs_ioctl+0xb0/0x630
[12100.606071] ? handle_mm_fault+0x108/0x200
[12100.606077] ksys_ioctl+0x92/0xa0
[12100.606082] __x64_sys_ioctl+0x16/0x20
[12100.606089] do_syscall_64+0x4f/0xe0
[12100.606096] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[12100.606103] RIP: 0033:0x7fe4974b43b7

The offending code is in the error exit (0x220 = 544):

0xffffffff816085db <+539>: callq 0xffffffff81240350 <fput>
0xffffffff816085e0 <+544>: mov 0x8(%r14),%rdi
0xffffffff816085e4 <+548>: callq 0xffffffff81217ad0 <kfree>
0xffffffff816085e9 <+553>: mov %r14,%rdi
0xffffffff816085ec <+556>: callq 0xffffffff81217ad0 <kfree>
0xffffffff816085f1 <+561>: mov 0x50(%rsp),%rbx
0xffffffff816085f6 <+566>: xor %gs:0x28,%rbx
0xffffffff816085ff <+575>: mov %r12,%rax
0xffffffff81608602 <+578>: jne 0xffffffff816086c8
<udmabuf_create+776>

err_free_ubuf:
fput(memfd);
kfree(ubuf->pages);
kfree(ubuf);
return ret;

further commenst see below.

+
> +static long udmabuf_create(struct udmabuf_create_list *head,
> + struct udmabuf_create_item *list)
> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct file *memfd = NULL;
> + struct udmabuf *ubuf;
> + struct dma_buf *buf;
> + pgoff_t pgoff, pgcnt, pgidx, pgbuf;
> + struct page *page;
> + int seals, ret = -EINVAL;
> + u32 i, flags;
> +
> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
> + if (!ubuf)
> + return -ENOMEM;
> +
> + for (i = 0; i < head->count; i++) {
> + if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> + goto err_free_ubuf;
> + if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
> + goto err_free_ubuf;
> + ubuf->pagecount += list[i].size >> PAGE_SHIFT;
> + }
> + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(struct
> page *),
> + GFP_KERNEL);
> + if (!ubuf->pages) {
> + ret = -ENOMEM;
> + goto err_free_ubuf;
> + }
> +
> + pgbuf = 0;
> + for (i = 0; i < head->count; i++) {
> + memfd = fget(list[i].memfd);
> + if (!memfd)
> + goto err_put_pages;
> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> + goto err_put_pages;
> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> + if (seals == -EINVAL ||
> + (seals & SEALS_WANTED) != SEALS_WANTED ||
> + (seals & SEALS_DENIED) != 0)
> + goto err_put_pages;
> + pgoff = list[i].offset >> PAGE_SHIFT;
> + pgcnt = list[i].size >> PAGE_SHIFT;
> + for (pgidx = 0; pgidx < pgcnt; pgidx++) {
> + page = shmem_read_mapping_page(
> + file_inode(memfd)->i_mapping, pgoff
> + pgidx);
> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> + goto err_put_pages;
> + }
> + ubuf->pages[pgbuf++] = page;
> + }
> + fput(memfd);
> + }
> + memfd = NULL;
Now memfd is NULL

> +
> + exp_info.ops = &udmabuf_ops;
> + exp_info.size = ubuf->pagecount << PAGE_SHIFT;
> + exp_info.priv = ubuf;
> +
> + buf = dma_buf_export(&exp_info);
> + if (IS_ERR(buf)) {
> + ret = PTR_ERR(buf);
> + goto err_put_pages;
Assume an error occured

> + }
> +
> + flags = 0;
> + if (head->flags & UDMABUF_FLAGS_CLOEXEC)
> + flags |= O_CLOEXEC;
> + return dma_buf_fd(buf, flags);
> +
> +err_put_pages:
> + while (pgbuf > 0)
> + put_page(ubuf->pages[--pgbuf]);
> +err_free_ubuf:
> + fput(memfd);
Now fput it called with NULL, and in fput this is used in

0xffffffff81240350 <+0>: callq 0xffffffff81a015c0 <__fentry__>
0xffffffff81240355 <+5>: lock decq 0x38(%rdi)

where the bug is signalled, so I guess fput doesn't accept a null
pointer.

I'm not sure why you set memfd to NULL at all, because on the following
non-error path it is not used.

The other question is of course, why did dma_buf_export fail for me ...

Best,
Gert

PS: Please CC me since I'm not on lkml,

> + kfree(ubuf->pages);
> + kfree(ubuf);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl_create(struct file *filp, unsigned long
> arg)
> +{
> + struct udmabuf_create create;
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item list;
> +
> + if (copy_from_user(&create, (void __user *)arg,
> + sizeof(struct udmabuf_create)))
> + return -EFAULT;
> +
> + head.flags = create.flags;
> + head.count = 1;
> + list.memfd = create.memfd;
> + list.offset = create.offset;
> + list.size = create.size;
> +
> + return udmabuf_create(&head, &list);
> +}
> +
> +static long udmabuf_ioctl_create_list(struct file *filp, unsigned
> long arg)
> +{
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item *list;
> + int ret = -EINVAL;
> + u32 lsize;
> +
> + if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
> + return -EFAULT;
> + if (head.count > 1024)
> + return -EINVAL;
> + lsize = sizeof(struct udmabuf_create_item) * head.count;
> + list = memdup_user((void __user *)(arg + sizeof(head)),
> lsize);
> + if (IS_ERR(list))
> + return PTR_ERR(list);
> +
> + ret = udmabuf_create(&head, list);
> + kfree(list);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
> + unsigned long arg)
> +{
> + long ret;
> +
> + switch (ioctl) {
> + case UDMABUF_CREATE:
> + ret = udmabuf_ioctl_create(filp, arg);
> + break;
> + case UDMABUF_CREATE_LIST:
> + ret = udmabuf_ioctl_create_list(filp, arg);
> + break;
> + default:
> + ret = -EINVAL;
> + break;
> + }
> + return ret;
> +}
> +
> +static const struct file_operations udmabuf_fops = {
> + .owner = THIS_MODULE,
> + .unlocked_ioctl = udmabuf_ioctl,
> +};
> +
> +static struct miscdevice udmabuf_misc = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "udmabuf",
> + .fops = &udmabuf_fops,
> +};
> +
> +static int __init udmabuf_dev_init(void)
> +{
> + return misc_register(&udmabuf_misc);
> +}
> +
> +static void __exit udmabuf_dev_exit(void)
> +{
> + misc_deregister(&udmabuf_misc);
> +}
> +
> +module_init(udmabuf_dev_init)
> +module_exit(udmabuf_dev_exit)
> +
> +MODULE_AUTHOR("Gerd Hoffmann <[email protected]>");
> +MODULE_LICENSE("GPL v2");
> diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> new file mode 100644
> index 0000000000..376b1d6730
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <malloc.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/syscall.h>
> +#include <linux/memfd.h>
> +#include <linux/udmabuf.h>
> +
> +#define TEST_PREFIX "drivers/dma-buf/udmabuf"
> +#define NUM_PAGES 4
> +
> +static int memfd_create(const char *name, unsigned int flags)
> +{
> + return syscall(__NR_memfd_create, name, flags);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + struct udmabuf_create create;
> + int devfd, memfd, buf, ret;
> + off_t size;
> + void *mem;
> +
> + devfd = open("/dev/udmabuf", O_RDWR);
> + if (devfd < 0) {
> + printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
> + if (memfd < 0) {
> + printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + size = getpagesize() * NUM_PAGES;
> + ret = ftruncate(memfd, size);
> + if (ret == -1) {
> + printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + memset(&create, 0, sizeof(create));
> +
> + /* should fail (offset not page aligned) */
> + create.memfd = memfd;
> + create.offset = getpagesize()/2;
> + create.size = getpagesize();
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (size not multiple of page) */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = getpagesize()/2;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (not memfd) */
> + create.memfd = 0; /* stdin */
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should work */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf < 0) {
> + printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + fprintf(stderr, "%s: ok\n", TEST_PREFIX);
> + close(buf);
> + close(memfd);
> + close(devfd);
> + return 0;
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a5b256b259..11a9b04277 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14934,6 +14934,14 @@ S: Maintained
> F: Documentation/filesystems/udf.txt
> F: fs/udf/
>
> +UDMABUF DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +L: [email protected]
> +S: Maintained
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +F: tools/testing/selftests/drivers/dma-buf/udmabuf.c
> +
> UDRAW TABLET
> M: Bastien Nocera <[email protected]>
> L: [email protected]
> @@ -15343,6 +15351,14 @@ F: arch/x86/um/
> F: fs/hostfs/
> F: fs/hppfs/
>
> +USERSPACE DMA BUFFER DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +S: Maintained
> +L: [email protected]
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +T: git git://anongit.freedesktop.org/drm/drm-misc
> +
> USERSPACE I/O (UIO)
> M: Greg Kroah-Hartman <[email protected]>
> S: Maintained
> diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
> index ed3b785bae..338129eb12 100644
> --- a/drivers/dma-buf/Kconfig
> +++ b/drivers/dma-buf/Kconfig
> @@ -30,4 +30,12 @@ config SW_SYNC
> WARNING: improper use of this can result in deadlocking
> kernel
> drivers from userspace. Intended for test and debug only.
>
> +config UDMABUF
> + bool "userspace dmabuf misc driver"
> + default n
> + depends on DMA_SHARED_BUFFER
> + help
> + A driver to let userspace turn memfd regions into dma-
> bufs.
> + Qemu can use this to create host dmabufs for guest
> framebuffers.
> +
> endmenu
> diff --git a/drivers/dma-buf/Makefile b/drivers/dma-buf/Makefile
> index c33bf88631..0913a6ccab 100644
> --- a/drivers/dma-buf/Makefile
> +++ b/drivers/dma-buf/Makefile
> @@ -1,3 +1,4 @@
> obj-y := dma-buf.o dma-fence.o dma-fence-array.o reservation.o
> seqno-fence.o
> obj-$(CONFIG_SYNC_FILE) += sync_file.o
> obj-$(CONFIG_SW_SYNC) += sw_sync.o sync_debug.o
> +obj-$(CONFIG_UDMABUF) += udmabuf.o
> diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile
> b/tools/testing/selftests/drivers/dma-buf/Makefile
> new file mode 100644
> index 0000000000..4154c3d7aa
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/dma-buf/Makefile
> @@ -0,0 +1,5 @@
> +CFLAGS += -I../../../../../usr/include/
> +
> +TEST_GEN_PROGS := udmabuf
> +
> +include ../../lib.mk
> --
> 2.9.3

2018-09-10 08:38:53

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Hi,

> > + fput(memfd);
> > + }
> > + memfd = NULL;
> Now memfd is NULL

> > + buf = dma_buf_export(&exp_info);
> > + if (IS_ERR(buf)) {
> > + ret = PTR_ERR(buf);
> > + goto err_put_pages;
> Assume an error occured

> > +err_put_pages:
> > + while (pgbuf > 0)
> > + put_page(ubuf->pages[--pgbuf]);
> > +err_free_ubuf:
> > + fput(memfd);
> Now fput it called with NULL, and in fput this is used in

> where the bug is signalled, so I guess fput doesn't accept a null
> pointer.

Indeed. Already fixed in drm-misc-next.

> I'm not sure why you set memfd to NULL at all, because on the following
> non-error path it is not used.

And in the error path it should not be user either, at least when the
error happens *after* exiting the loop, otherwise we would fput twice in
the error case.

> The other question is of course, why did dma_buf_export fail for me ...

What exactly did you try?

cheers,
Gerd


2018-09-10 09:21:25

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Am Montag, den 10.09.2018, 10:37 +0200 schrieb Gerd Hoffmann:
...

> > The other question is of course, why did dma_buf_export fail for me
> > ...
>
> What exactly did you try?

I ran

qemu-system-x86_64 -enable-kvm -smp 5 -M q35 -m 8G \
-drive format=raw,file=ubuntu.raw,if=virtio \
-net nic,model=virtio -net user,hostfwd=tcp::2224-:22 \
-vga virtio

ubuntu.raw = Ubuntu 18.04 (bionic) + 4.17 kernel (from Ubuntu cosmic).

The VM boots up to a certain point, the last line from the boot log is
varying, as far as I can see it is a few lines after virtio_blk detects
the disks.

When I add '-display sdl' the Xsession freezes. I was still able to log
in remotely and kill -9 qemu, then X11 became usable again.

In all cases I get on the host:
qemu-system-x86_64: info: udmabuf: init ok

and there it stays, certainly because of the kernel bug reported.

Best,
Gert



2018-09-10 10:54:41

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

On Mon, Sep 10, 2018 at 11:18:38AM +0200, Gert Wollny wrote:
> Am Montag, den 10.09.2018, 10:37 +0200 schrieb Gerd Hoffmann:
> ...
>
> > > The other question is of course, why did dma_buf_export fail for me
> > > ...
> >
> > What exactly did you try?
>
> I ran
>
> qemu-system-x86_64 -enable-kvm -smp 5 -M q35 -m 8G \
> -drive format=raw,file=ubuntu.raw,if=virtio \
> -net nic,model=virtio -net user,hostfwd=tcp::2224-:22 \
> -vga virtio

By default qemu doesn't use memfd for backing storage, you have to
explicitly configure qemu that way (see qemu commit log of the test
branch):

qemu-system-x86_64 -m 2G
-object memory-backend-memfd,id=ram,size=2G
-numa node,memdev=ram"

HTH,
Gerd


2018-09-10 11:33:51

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Am Montag, den 10.09.2018, 12:53 +0200 schrieb Gerd Hoffmann:
>
> By default qemu doesn't use memfd for backing storage, you have to
> explicitly configure qemu that way (see qemu commit log of the test
> branch):
>
> qemu-system-x86_64 -m 2G
> -object memory-backend-memfd,id=ram,size=2G
> -numa node,memdev=ram"

Thanks, but that doesn't seem to help. Are there any host kernel
configuration parameters or features that I should set (apart from
enabling udmabuf)?

Best,
Gert




2018-09-10 12:14:17

by Laurent Pinchart

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Hi Gerd,

Thank you for the patch.

CC'ing the linux-api mailing list as this creates a new userspace API.

On Monday, 27 August 2018 12:34:44 EEST Gerd Hoffmann wrote:
> A driver to let userspace turn memfd regions into dma-bufs.
>
> Use case: Allows qemu create dmabufs for the vga framebuffer or
> virtio-gpu ressources. Then they can be passed around to display
> those guest things on the host. To spice client for classic full
> framebuffer display, and hopefully some day to wayland server for
> seamless guest window display.
>
> qemu test branch:
> https://git.kraxel.org/cgit/qemu/log/?h=sirius/udmabuf
>
> Cc: David Airlie <[email protected]>
> Cc: Tomeu Vizoso <[email protected]>
> Cc: Laurent Pinchart <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Signed-off-by: Gerd Hoffmann <[email protected]>
> ---
> Documentation/ioctl/ioctl-number.txt | 1 +
> include/uapi/linux/udmabuf.h | 33 +++
> drivers/dma-buf/udmabuf.c | 287 +++++++++++++++++++
> tools/testing/selftests/drivers/dma-buf/udmabuf.c | 96 ++++++++
> MAINTAINERS | 16 ++
> drivers/dma-buf/Kconfig | 8 +
> drivers/dma-buf/Makefile | 1 +
> tools/testing/selftests/drivers/dma-buf/Makefile | 5 +
> 8 files changed, 447 insertions(+)
> create mode 100644 include/uapi/linux/udmabuf.h
> create mode 100644 drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/Makefile

[snip]

> diff --git a/include/uapi/linux/udmabuf.h b/include/uapi/linux/udmabuf.h
> new file mode 100644
> index 0000000000..46b6532ed8
> --- /dev/null
> +++ b/include/uapi/linux/udmabuf.h
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef _UAPI_LINUX_UDMABUF_H
> +#define _UAPI_LINUX_UDMABUF_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define UDMABUF_FLAGS_CLOEXEC 0x01
> +
> +struct udmabuf_create {
> + __u32 memfd;
> + __u32 flags;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_item {
> + __u32 memfd;
> + __u32 __pad;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_list {
> + __u32 flags;
> + __u32 count;
> + struct udmabuf_create_item list[];
> +};
> +
> +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)

Why do you start at 0x42 if you reserve the 0x40-0x4f range ?

> +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct udmabuf_create_list)

Where's the documentation ? :-)

> +#endif /* _UAPI_LINUX_UDMABUF_H */
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> new file mode 100644
> index 0000000000..8e24204526
> --- /dev/null
> +++ b/drivers/dma-buf/udmabuf.c
> @@ -0,0 +1,287 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/miscdevice.h>
> +#include <linux/dma-buf.h>
> +#include <linux/highmem.h>
> +#include <linux/cred.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/memfd.h>

Could you please keep the #include alphabetically sorted ? It helps locating
duplicates.

> +#include <uapi/linux/udmabuf.h>

I think you can just #include <linux/udmabuf.h>, no need to use the uapi/
prefix.

> +struct udmabuf {
> + u32 pagecount;
> + struct page **pages;
> +};
> +
> +static int udmabuf_vm_fault(struct vm_fault *vmf)
> +{
> + struct vm_area_struct *vma = vmf->vma;
> + struct udmabuf *ubuf = vma->vm_private_data;
> +
> + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
> + return VM_FAULT_SIGBUS;

Just curious, when do you expect this to happen ?

> + vmf->page = ubuf->pages[vmf->pgoff];
> + get_page(vmf->page);
> + return 0;
> +}
> +
> +static const struct vm_operations_struct udmabuf_vm_ops = {
> + .fault = udmabuf_vm_fault,
> +};
> +
> +static int mmap_udmabuf(struct dma_buf *buf, struct vm_area_struct *vma)
> +{
> + struct udmabuf *ubuf = buf->priv;
> +
> + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
> + return -EINVAL;
> +
> + vma->vm_ops = &udmabuf_vm_ops;
> + vma->vm_private_data = ubuf;
> + return 0;
> +}
> +
> +static struct sg_table *map_udmabuf(struct dma_buf_attachment *at,
> + enum dma_data_direction direction)
> +{
> + struct udmabuf *ubuf = at->dmabuf->priv;
> + struct sg_table *sg;
> +
> + sg = kzalloc(sizeof(*sg), GFP_KERNEL);
> + if (!sg)
> + goto err1;

You can return ERR_PTR(-ENOMEM) directly.

> + if (sg_alloc_table_from_pages(sg, ubuf->pages, ubuf->pagecount,
> + 0, ubuf->pagecount << PAGE_SHIFT,
> + GFP_KERNEL) < 0)

Shouldn't you propagate the return value from sg_alloc_table_from_pages() ?

> + goto err2;
> + if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction))
> + goto err3;
> +
> + return sg;
> +
> +err3:
> + sg_free_table(sg);
> +err2:
> + kfree(sg);
> +err1:
> + return ERR_PTR(-ENOMEM);

You can merge all these labels with

error:
if (sg) {
sg_free_table(sg);
kfree(sg);
}

return ERR_PTR(-ENOMEM);

> +}
> +
> +static void unmap_udmabuf(struct dma_buf_attachment *at,
> + struct sg_table *sg,
> + enum dma_data_direction direction)
> +{
> + sg_free_table(sg);
> + kfree(sg);
> +}
> +
> +static void release_udmabuf(struct dma_buf *buf)
> +{
> + struct udmabuf *ubuf = buf->priv;
> + pgoff_t pg;
> +
> + for (pg = 0; pg < ubuf->pagecount; pg++)

Shouldn't both pg and pagecount have the same type ? Why does the loop counter
qualify for a pgoff_t and not the pagecount field ? Granted, the pgoff_t is
documented as "The type of an index in the page cache", so pagecount doesn't
really quality, but the fact that one is an unsigned long and the other a u32
makes me think that something is wrong.

> + put_page(ubuf->pages[pg]);
> + kfree(ubuf->pages);
> + kfree(ubuf);
> +}
> +
> +static void *kmap_udmabuf(struct dma_buf *buf, unsigned long page_num)
> +{
> + struct udmabuf *ubuf = buf->priv;
> + struct page *page = ubuf->pages[page_num];
> +
> + return kmap(page);
> +}
> +
> +static void kunmap_udmabuf(struct dma_buf *buf, unsigned long page_num,
> + void *vaddr)
> +{
> + kunmap(vaddr);
> +}
> +
> +static struct dma_buf_ops udmabuf_ops = {

static const struct

> + .map_dma_buf = map_udmabuf,
> + .unmap_dma_buf = unmap_udmabuf,
> + .release = release_udmabuf,
> + .map = kmap_udmabuf,
> + .unmap = kunmap_udmabuf,
> + .mmap = mmap_udmabuf,
> +};
> +
> +#define SEALS_WANTED (F_SEAL_SHRINK)
> +#define SEALS_DENIED (F_SEAL_WRITE)
> +
> +static long udmabuf_create(struct udmabuf_create_list *head,
> + struct udmabuf_create_item *list)

Those two structures are not modified by the function, you can make them
const.

> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct file *memfd = NULL;
> + struct udmabuf *ubuf;
> + struct dma_buf *buf;
> + pgoff_t pgoff, pgcnt, pgidx, pgbuf;
> + struct page *page;
> + int seals, ret = -EINVAL;
> + u32 i, flags;
> +
> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);

sizeof(*ubuf)

> + if (!ubuf)
> + return -ENOMEM;
> +
> + for (i = 0; i < head->count; i++) {
> + if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> + goto err_free_ubuf;
> + if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
> + goto err_free_ubuf;
> + ubuf->pagecount += list[i].size >> PAGE_SHIFT;

Is there a risk of overflowing pagecount ?

> + }
> + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(struct page *),
> + GFP_KERNEL);

sizeof(*ubuf->pages)

> + if (!ubuf->pages) {
> + ret = -ENOMEM;
> + goto err_free_ubuf;
> + }
> +
> + pgbuf = 0;
> + for (i = 0; i < head->count; i++) {
> + memfd = fget(list[i].memfd);
> + if (!memfd)
> + goto err_put_pages;
> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> + goto err_put_pages;
> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> + if (seals == -EINVAL ||
> + (seals & SEALS_WANTED) != SEALS_WANTED ||
> + (seals & SEALS_DENIED) != 0)
> + goto err_put_pages;

All these conditions will return -EINVAL. I'm not familiar with the memfd API,
should some error conditions return a different error code to make them
distinguishable by userspace ?

> + pgoff = list[i].offset >> PAGE_SHIFT;
> + pgcnt = list[i].size >> PAGE_SHIFT;
> + for (pgidx = 0; pgidx < pgcnt; pgidx++) {
> + page = shmem_read_mapping_page(
> + file_inode(memfd)->i_mapping, pgoff + pgidx);

Can't pgoff + pgcnt overflow the total number of avialble pages ?

> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> + goto err_put_pages;
> + }
> + ubuf->pages[pgbuf++] = page;
> + }
> + fput(memfd);
> + }
> + memfd = NULL;

I'd move this line just after fput(memfd) inside the loop to avoid
introduction bugs in the future if we add code that can break from the loop
before the fget() call.

> + exp_info.ops = &udmabuf_ops;
> + exp_info.size = ubuf->pagecount << PAGE_SHIFT;
> + exp_info.priv = ubuf;
> +
> + buf = dma_buf_export(&exp_info);
> + if (IS_ERR(buf)) {
> + ret = PTR_ERR(buf);
> + goto err_put_pages;
> + }
> +
> + flags = 0;
> + if (head->flags & UDMABUF_FLAGS_CLOEXEC)
> + flags |= O_CLOEXEC;
> + return dma_buf_fd(buf, flags);
> +
> +err_put_pages:
> + while (pgbuf > 0)
> + put_page(ubuf->pages[--pgbuf]);

If you initialize pgbuf to 0 you can merge the two error labels.

> +err_free_ubuf:
> + fput(memfd);
> + kfree(ubuf->pages);
> + kfree(ubuf);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl_create(struct file *filp, unsigned long arg)
> +{
> + struct udmabuf_create create;
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item list;
> +
> + if (copy_from_user(&create, (void __user *)arg,
> + sizeof(struct udmabuf_create)))

sizeof(create)

> + return -EFAULT;
> +
> + head.flags = create.flags;
> + head.count = 1;
> + list.memfd = create.memfd;
> + list.offset = create.offset;
> + list.size = create.size;
> +
> + return udmabuf_create(&head, &list);
> +}
> +
> +static long udmabuf_ioctl_create_list(struct file *filp, unsigned long arg)
> +{
> + struct udmabuf_create_list head;
> + struct udmabuf_create_item *list;
> + int ret = -EINVAL;
> + u32 lsize;
> +
> + if (copy_from_user(&head, (void __user *)arg, sizeof(head)))
> + return -EFAULT;
> + if (head.count > 1024)
> + return -EINVAL;
> + lsize = sizeof(struct udmabuf_create_item) * head.count;
> + list = memdup_user((void __user *)(arg + sizeof(head)), lsize);
> + if (IS_ERR(list))
> + return PTR_ERR(list);
> +
> + ret = udmabuf_create(&head, list);
> + kfree(list);
> + return ret;
> +}
> +
> +static long udmabuf_ioctl(struct file *filp, unsigned int ioctl,
> + unsigned long arg)
> +{
> + long ret;
> +
> + switch (ioctl) {
> + case UDMABUF_CREATE:
> + ret = udmabuf_ioctl_create(filp, arg);
> + break;
> + case UDMABUF_CREATE_LIST:
> + ret = udmabuf_ioctl_create_list(filp, arg);
> + break;
> + default:
> + ret = -EINVAL;

The proper error code for invalid ioctls is -ENOTTY.

> + break;
> + }
> + return ret;
> +}
> +
> +static const struct file_operations udmabuf_fops = {
> + .owner = THIS_MODULE,
> + .unlocked_ioctl = udmabuf_ioctl,
> +};
> +
> +static struct miscdevice udmabuf_misc = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "udmabuf",
> + .fops = &udmabuf_fops,
> +};
> +
> +static int __init udmabuf_dev_init(void)
> +{
> + return misc_register(&udmabuf_misc);
> +}
> +
> +static void __exit udmabuf_dev_exit(void)
> +{
> + misc_deregister(&udmabuf_misc);
> +}
> +
> +module_init(udmabuf_dev_init)
> +module_exit(udmabuf_dev_exit)
> +
> +MODULE_AUTHOR("Gerd Hoffmann <[email protected]>");
> +MODULE_LICENSE("GPL v2");
> diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> b/tools/testing/selftests/drivers/dma-buf/udmabuf.c new file mode 100644
> index 0000000000..376b1d6730
> --- /dev/null
> +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <malloc.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/syscall.h>
> +#include <linux/memfd.h>
> +#include <linux/udmabuf.h>
> +
> +#define TEST_PREFIX "drivers/dma-buf/udmabuf"
> +#define NUM_PAGES 4
> +
> +static int memfd_create(const char *name, unsigned int flags)
> +{
> + return syscall(__NR_memfd_create, name, flags);
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + struct udmabuf_create create;
> + int devfd, memfd, buf, ret;
> + off_t size;
> + void *mem;
> +
> + devfd = open("/dev/udmabuf", O_RDWR);
> + if (devfd < 0) {
> + printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + memfd = memfd_create("udmabuf-test", MFD_CLOEXEC);
> + if (memfd < 0) {
> + printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
> + exit(77);
> + }
> +
> + size = getpagesize() * NUM_PAGES;
> + ret = ftruncate(memfd, size);
> + if (ret == -1) {
> + printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + memset(&create, 0, sizeof(create));
> +
> + /* should fail (offset not page aligned) */
> + create.memfd = memfd;
> + create.offset = getpagesize()/2;
> + create.size = getpagesize();
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (size not multiple of page) */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = getpagesize()/2;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should fail (not memfd) */
> + create.memfd = 0; /* stdin */
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf >= 0) {
> + printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + /* should work */
> + create.memfd = memfd;
> + create.offset = 0;
> + create.size = size;
> + buf = ioctl(devfd, UDMABUF_CREATE, &create);
> + if (buf < 0) {
> + printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
> + exit(1);
> + }
> +
> + fprintf(stderr, "%s: ok\n", TEST_PREFIX);
> + close(buf);
> + close(memfd);
> + close(devfd);
> + return 0;
> +}
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a5b256b259..11a9b04277 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14934,6 +14934,14 @@ S: Maintained
> F: Documentation/filesystems/udf.txt
> F: fs/udf/
>
> +UDMABUF DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +L: [email protected]
> +S: Maintained
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +F: tools/testing/selftests/drivers/dma-buf/udmabuf.c
> +
> UDRAW TABLET
> M: Bastien Nocera <[email protected]>
> L: [email protected]
> @@ -15343,6 +15351,14 @@ F: arch/x86/um/
> F: fs/hostfs/
> F: fs/hppfs/
>
> +USERSPACE DMA BUFFER DRIVER
> +M: Gerd Hoffmann <[email protected]>
> +S: Maintained
> +L: [email protected]
> +F: drivers/dma-buf/udmabuf.c
> +F: include/uapi/linux/udmabuf.h
> +T: git git://anongit.freedesktop.org/drm/drm-misc

One entry should be enough.

> USERSPACE I/O (UIO)
> M: Greg Kroah-Hartman <[email protected]>
> S: Maintained

[snip]

--
Regards,

Laurent Pinchart




2018-09-10 13:38:13

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

On Mon, Sep 10, 2018 at 01:31:08PM +0200, Gert Wollny wrote:
> Am Montag, den 10.09.2018, 12:53 +0200 schrieb Gerd Hoffmann:
> >
> > By default qemu doesn't use memfd for backing storage, you have to
> > explicitly configure qemu that way (see qemu commit log of the test
> > branch):
> >
> > qemu-system-x86_64 -m 2G
> > -object memory-backend-memfd,id=ram,size=2G
> > -numa node,memdev=ram"
>
> Thanks, but that doesn't seem to help.

Note the memory is specified twice, once with -m, once for the
memory-backend-memfd object. The two must match of course (or, to be
exact, the sum of all memory backends must match the total memory, but
unless you want create a virtual numa machine there is no reason to have
multiple memory-backends).

> Are there any host kernel
> configuration parameters or features that I should set (apart from
> enabling udmabuf)?

I'm not aware of any unusual parameters required. My config is based on
rhel-7, then stripped down with localmodconfig, then enabled the bits I
want in my test kernel.

Oh, wait, CONFIG_MEMFD_CREATE could be the one. I guess I should add a
Kconfig dependency. Even though udmabuf will build just fine without
that (because memfd is basically shmem with some features on top) it
doesn't buy you much if the memfd_create syscall is not available ...

cheers,
Gerd


2018-09-10 14:20:06

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Am Montag, den 10.09.2018, 15:30 +0200 schrieb Gerd Hoffmann:
> On Mon, Sep 10, 2018 at 01:31:08PM +0200, Gert Wollny wrote:
> > Am Montag, den 10.09.2018, 12:53 +0200 schrieb Gerd Hoffmann:
> > >
> > > By default qemu doesn't use memfd for backing storage, you have
> > > to
> > > explicitly configure qemu that way (see qemu commit log of the
> > > test
> > > branch):
> > >
> > > qemu-system-x86_64 -m 2G
> > > -object memory-backend-memfd,id=ram,size=2G
> > > -numa node,memdev=ram"
> >
> > Thanks, but that doesn't seem to help.
>
> Note the memory is specified twice, once with -m, once for the
> memory-backend-memfd object. The two must match of course (or, to be
> exact, the sum of all memory backends must match the total memory,
> but unless you want create a virtual numa machine there is no reason
> to have multiple memory-backends).
Qemu actually complained when it was not the same.

>
> > Are there any host kernel configuration parameters or features that
> > I should set (apart from enabling udmabuf)?
>
> I'm not aware of any unusual parameters required. My config is based
> on rhel-7, then stripped down with localmodconfig, then enabled the
> bits I want in my test kernel.
I use Gentoo and my kernel is rather stripped down ...

>
> Oh, wait, CONFIG_MEMFD_CREATE could be the one. I guess I should add
> a Kconfig dependency.
This was enabled, so not sure what else could be missing, I guess I'll
have to add some logging to get an idea.

Best,
Gert


2018-09-10 16:28:45

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Am Montag, den 10.09.2018, 15:30 +0200 schrieb Gerd Hoffmann:
> On Mon, Sep 10, 2018 at 01:31:08PM +0200, Gert Wollny wrote:
> > Am Montag, den 10.09.2018, 12:53 +0200 schrieb Gerd Hoffmann:
> > >
> > > By default qemu doesn't use memfd for backing storage, you have
> > > to
> > > explicitly configure qemu that way (see qemu commit log of the
> > > test
> > > branch):
> > >
> > > qemu-system-x86_64 -m 2G
> > > -object memory-backend-memfd,id=ram,size=2G
> > > -numa node,memdev=ram"
> >
> > Thanks, but that doesn't seem to help.
>
With some more debugging I found

WARNING: CPU: 2 PID: 14658 at drivers/dma-buf/dma-buf.c:410
dma_buf_export+0x85/0x270 which refers to dma_buf_export:414

if (WARN_ON(!exp_info->priv
|| !exp_info->ops
|| !exp_info->ops->map_dma_buf
|| !exp_info->ops->unmap_dma_buf
|| !exp_info->ops->release
|| !exp_info->ops->map_atomic
|| !exp_info->ops->map
|| !exp_info->ops->mmap)) {
return ERR_PTR(-EINVAL);
}

Specifically, map_atomic is NULL, but as I can see from the git
version, this was removed some time ago, going to test the 4.19 rc2
kernel now,

Best,
Gert





2018-09-10 17:42:03

by Gert Wollny

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Using the kernel 4.19.0-rc2 it works now, so With the fix for not
calling fput when memfd == NULL the patch is
Reviewed-By: Gert Wollny <[email protected]>

best,
Gert

Am Montag, den 10.09.2018, 15:30 +0200 schrieb Gerd Hoffmann:
> On Mon, Sep 10, 2018 at 01:31:08PM +0200, Gert Wollny wrote:
> > Am Montag, den 10.09.2018, 12:53 +0200 schrieb Gerd Hoffmann:
> > >
> > > By default qemu doesn't use memfd for backing storage, you have
> > > to
> > > explicitly configure qemu that way (see qemu commit log of the
> > > test
> > > branch):
> > >
> > > qemu-system-x86_64 -m 2G
> > > -object memory-backend-memfd,id=ram,size=2G
> > > -numa node,memdev=ram"
> >
> > Thanks, but that doesn't seem to help.
>
> Note the memory is specified twice, once with -m, once for the
> memory-backend-memfd object. The two must match of course (or, to be
> exact, the sum of all memory backends must match the total memory,
> but
> unless you want create a virtual numa machine there is no reason to
> have
> multiple memory-backends).
>
> > Are there any host kernel
> > configuration parameters or features that I should set (apart from
> > enabling udmabuf)?
>
> I'm not aware of any unusual parameters required. My config is based
> on
> rhel-7, then stripped down with localmodconfig, then enabled the bits
> I
> want in my test kernel.
>
> Oh, wait, CONFIG_MEMFD_CREATE could be the one. I guess I should add
> a
> Kconfig dependency. Even though udmabuf will build just fine without
> that (because memfd is basically shmem with some features on top) it
> doesn't buy you much if the memfd_create syscall is not available ...
>
> cheers,
> Gerd
>

2018-09-11 06:51:43

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Hi,

> > +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
>
> Why do you start at 0x42 if you reserve the 0x40-0x4f range ?

No particular strong reason, just that using 42 was less boring than
starting with 0x40.

> > +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct udmabuf_create_list)
>
> Where's the documentation ? :-)

Isn't it simple enough?

But, well, yes, I guess I can add some kerneldoc comments.

> > +static int udmabuf_vm_fault(struct vm_fault *vmf)
> > +{
> > + struct vm_area_struct *vma = vmf->vma;
> > + struct udmabuf *ubuf = vma->vm_private_data;
> > +
> > + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
> > + return VM_FAULT_SIGBUS;
>
> Just curious, when do you expect this to happen ?

It should not. If it actually happens it would be a bug somewhere,
thats why the WARN_ON.

> > + struct udmabuf *ubuf;

> > + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
>
> sizeof(*ubuf)

Why? Should not make a difference ...

> > + memfd = fget(list[i].memfd);
> > + if (!memfd)
> > + goto err_put_pages;
> > + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> > + goto err_put_pages;
> > + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> > + if (seals == -EINVAL ||
> > + (seals & SEALS_WANTED) != SEALS_WANTED ||
> > + (seals & SEALS_DENIED) != 0)
> > + goto err_put_pages;
>
> All these conditions will return -EINVAL. I'm not familiar with the memfd API,
> should some error conditions return a different error code to make them
> distinguishable by userspace ?

Hmm, I guess EBADFD would be reasonable in case the file handle isn't a
memfd. Other suggestions?

I'll prepare a fixup patch series addressing most of the other
review comments.

cheers,
Gerd


2018-09-11 09:55:36

by Laurent Pinchart

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

Hi Gerd,

On Tuesday, 11 September 2018 09:50:14 EEST Gerd Hoffmann wrote:
> Hi,
>
> >> +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
> >
> > Why do you start at 0x42 if you reserve the 0x40-0x4f range ?
>
> No particular strong reason, just that using 42 was less boring than
> starting with 0x40.
>
> >> +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct
> >> udmabuf_create_list)
> >
> > Where's the documentation ? :-)
>
> Isn't it simple enough?

No kernel UAPI is simple enough to get away without documenting it.

> But, well, yes, I guess I can add some kerneldoc comments.
>
> >> +static int udmabuf_vm_fault(struct vm_fault *vmf)
> >> +{
> >> + struct vm_area_struct *vma = vmf->vma;
> >> + struct udmabuf *ubuf = vma->vm_private_data;
> >> +
> >> + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
> >> + return VM_FAULT_SIGBUS;
> >
> > Just curious, when do you expect this to happen ?
>
> It should not. If it actually happens it would be a bug somewhere,
> thats why the WARN_ON.

But you seem to consider that this condition that should never happen still
has a high enough chance of happening that it's worth a WARN_ON(). I was
wondering why this one in particular, and not other conditions that also can't
happen and are not checked through the code.

> >> + struct udmabuf *ubuf;
> >>
> >> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
> >
> > sizeof(*ubuf)
>
> Why? Should not make a difference ...

Because the day we replace

struct udmabuf *ubuf;

with

struct udmabuf_ext *ubuf;

and forget to change the next line, we'll introduce a bug. That's why
sizeof(variable) is preferred over sizeof(type). Another reason is that I can
easily see that

ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);

is correct, while using sizeof(type) requires me to go and look up the
declaration of the variable.

> >> + memfd = fget(list[i].memfd);
> >> + if (!memfd)
> >> + goto err_put_pages;
> >> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> >> + goto err_put_pages;
> >> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> >> + if (seals == -EINVAL ||
> >> + (seals & SEALS_WANTED) != SEALS_WANTED ||
> >> + (seals & SEALS_DENIED) != 0)
> >> + goto err_put_pages;
> >
> > All these conditions will return -EINVAL. I'm not familiar with the memfd
> > API, should some error conditions return a different error code to make
> > them distinguishable by userspace ?
>
> Hmm, I guess EBADFD would be reasonable in case the file handle isn't a
> memfd. Other suggestions?

I'll let others comment on this as I don't feel qualified to pick proper error
codes, not being familiar with the memfd API.

> I'll prepare a fixup patch series addressing most of the other
> review comments.

--
Regards,

Laurent Pinchart




2018-09-11 10:08:03

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

On Tue, Sep 11, 2018 at 11:50 AM, Laurent Pinchart
<[email protected]> wrote:
> Hi Gerd,
>
> On Tuesday, 11 September 2018 09:50:14 EEST Gerd Hoffmann wrote:
>> Hi,
>>
>> >> +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
>> >
>> > Why do you start at 0x42 if you reserve the 0x40-0x4f range ?
>>
>> No particular strong reason, just that using 42 was less boring than
>> starting with 0x40.
>>
>> >> +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct
>> >> udmabuf_create_list)
>> >
>> > Where's the documentation ? :-)
>>
>> Isn't it simple enough?
>
> No kernel UAPI is simple enough to get away without documenting it.

Simplest option would be to throw a bit of kerneldoc into the uapi
header, add Documentation/driver-api/dma-buf.rst.
-Daniel

>
>> But, well, yes, I guess I can add some kerneldoc comments.
>>
>> >> +static int udmabuf_vm_fault(struct vm_fault *vmf)
>> >> +{
>> >> + struct vm_area_struct *vma = vmf->vma;
>> >> + struct udmabuf *ubuf = vma->vm_private_data;
>> >> +
>> >> + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
>> >> + return VM_FAULT_SIGBUS;
>> >
>> > Just curious, when do you expect this to happen ?
>>
>> It should not. If it actually happens it would be a bug somewhere,
>> thats why the WARN_ON.
>
> But you seem to consider that this condition that should never happen still
> has a high enough chance of happening that it's worth a WARN_ON(). I was
> wondering why this one in particular, and not other conditions that also can't
> happen and are not checked through the code.
>
>> >> + struct udmabuf *ubuf;
>> >>
>> >> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
>> >
>> > sizeof(*ubuf)
>>
>> Why? Should not make a difference ...
>
> Because the day we replace
>
> struct udmabuf *ubuf;
>
> with
>
> struct udmabuf_ext *ubuf;
>
> and forget to change the next line, we'll introduce a bug. That's why
> sizeof(variable) is preferred over sizeof(type). Another reason is that I can
> easily see that
>
> ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
>
> is correct, while using sizeof(type) requires me to go and look up the
> declaration of the variable.
>
>> >> + memfd = fget(list[i].memfd);
>> >> + if (!memfd)
>> >> + goto err_put_pages;
>> >> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
>> >> + goto err_put_pages;
>> >> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
>> >> + if (seals == -EINVAL ||
>> >> + (seals & SEALS_WANTED) != SEALS_WANTED ||
>> >> + (seals & SEALS_DENIED) != 0)
>> >> + goto err_put_pages;
>> >
>> > All these conditions will return -EINVAL. I'm not familiar with the memfd
>> > API, should some error conditions return a different error code to make
>> > them distinguishable by userspace ?
>>
>> Hmm, I guess EBADFD would be reasonable in case the file handle isn't a
>> memfd. Other suggestions?
>
> I'll let others comment on this as I don't feel qualified to pick proper error
> codes, not being familiar with the memfd API.
>
>> I'll prepare a fixup patch series addressing most of the other
>> review comments.
>
> --
> Regards,
>
> Laurent Pinchart
>
>
>



--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

2018-09-11 12:04:35

by Gerd Hoffmann

[permalink] [raw]
Subject: Re: [PATCH v7] Add udmabuf misc device

> > >> + if (WARN_ON(vmf->pgoff >= ubuf->pagecount))
> > >> + return VM_FAULT_SIGBUS;
> > >
> > > Just curious, when do you expect this to happen ?
> >
> > It should not. If it actually happens it would be a bug somewhere,
> > thats why the WARN_ON.
>
> But you seem to consider that this condition that should never happen still
> has a high enough chance of happening that it's worth a WARN_ON(). I was
> wondering why this one in particular, and not other conditions that also can't
> happen and are not checked through the code.

Added it while writing the code, to get any coding mistake I make
flagged right away instead of things exploding later on.

I can drop it.

> > >> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
> > >
> > > sizeof(*ubuf)
> >
> > Why? Should not make a difference ...
>
> Because the day we replace
>
> struct udmabuf *ubuf;
>
> with
>
> struct udmabuf_ext *ubuf;
>
> and forget to change the next line, we'll introduce a bug. That's why
> sizeof(variable) is preferred over sizeof(type). Another reason is that I can
> easily see that
>
> ubuf = kzalloc(sizeof(*ubuf), GFP_KERNEL);
>
> is correct, while using sizeof(type) requires me to go and look up the
> declaration of the variable.

So it simplifies review, ok, will change it.

BTW: Maybe the kernel should pick up a neat trick from glib:

g_new0() is a macro which takes the type instead of the size as first
argument, and it casts the return value to that type. So the compiler
will throw warnings in case of a mismatch. That'll work better than
depending purely on the coder being careful and review catching the
remaining issues.

cheers,
Gerd


2018-09-11 20:48:09

by Yann Droneaud

[permalink] [raw]
Subject: Re: [v7] Add udmabuf misc device

Hi,

Le lundi 27 août 2018 à 11:34 +0200, Gerd Hoffmann a écrit :
> A driver to let userspace turn memfd regions into dma-bufs.
>
> Use case: Allows qemu create dmabufs for the vga framebuffer or
> virtio-gpu ressources. Then they can be passed around to display
> those guest things on the host. To spice client for classic full
> framebuffer display, and hopefully some day to wayland server for
> seamless guest window display.
>
> qemu test branch:
> https://git.kraxel.org/cgit/qemu/log/?h=sirius/udmabuf
>
> Cc: David Airlie <[email protected]>
> Cc: Tomeu Vizoso <[email protected]>
> Cc: Laurent Pinchart <[email protected]>
> Cc: Daniel Vetter <[email protected]>
> Signed-off-by: Gerd Hoffmann <[email protected]>
> Acked-by: Daniel Vetter <[email protected]>
> ---
> Documentation/ioctl/ioctl-number.txt | 1 +
> include/uapi/linux/udmabuf.h | 33 +++
> drivers/dma-buf/udmabuf.c | 287
> ++++++++++++++++++++++
> tools/testing/selftests/drivers/dma-buf/udmabuf.c | 96 ++++++++
> MAINTAINERS | 16 ++
> drivers/dma-buf/Kconfig | 8 +
> drivers/dma-buf/Makefile | 1 +
> tools/testing/selftests/drivers/dma-buf/Makefile | 5 +
> 8 files changed, 447 insertions(+)
> create mode 100644 include/uapi/linux/udmabuf.h
> create mode 100644 drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/udmabuf.c
> create mode 100644 tools/testing/selftests/drivers/dma-buf/Makefile
>
> diff --git a/include/uapi/linux/udmabuf.h
> b/include/uapi/linux/udmabuf.h
> new file mode 100644
> index 0000000000..46b6532ed8
> --- /dev/null
> +++ b/include/uapi/linux/udmabuf.h
> @@ -0,0 +1,33 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +#ifndef _UAPI_LINUX_UDMABUF_H
> +#define _UAPI_LINUX_UDMABUF_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +#define UDMABUF_FLAGS_CLOEXEC 0x01
> +
> +struct udmabuf_create {
> + __u32 memfd;
> + __u32 flags;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_item {
> + __u32 memfd;
> + __u32 __pad;
> + __u64 offset;
> + __u64 size;
> +};
> +
> +struct udmabuf_create_list {
> + __u32 flags;
> + __u32 count;
> + struct udmabuf_create_item list[];
> +};
> +
> +#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
> +#define UDMABUF_CREATE_LIST _IOW('u', 0x43, struct
> udmabuf_create_list)
> +
> +#endif /* _UAPI_LINUX_UDMABUF_H */
> diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
> new file mode 100644
> index 0000000000..8e24204526
> --- /dev/null
> +++ b/drivers/dma-buf/udmabuf.c
> +static long udmabuf_create(struct udmabuf_create_list *head,
> + struct udmabuf_create_item *list)
> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct file *memfd = NULL;
> + struct udmabuf *ubuf;
> + struct dma_buf *buf;
> + pgoff_t pgoff, pgcnt, pgidx, pgbuf;
> + struct page *page;
> + int seals, ret = -EINVAL;
> + u32 i, flags;
> +
> + ubuf = kzalloc(sizeof(struct udmabuf), GFP_KERNEL);
> + if (!ubuf)
> + return -ENOMEM;
> +
> + for (i = 0; i < head->count; i++) {

You need to check .__pad for unsupported value:

if (list[i].__pad) {
ret = -EINVAL;
goto err_free_ubuf;
}

> + if (!IS_ALIGNED(list[i].offset, PAGE_SIZE))
> + goto err_free_ubuf;
> + if (!IS_ALIGNED(list[i].size, PAGE_SIZE))
> + goto err_free_ubuf;
> + ubuf->pagecount += list[i].size >> PAGE_SHIFT;
> + }
> + ubuf->pages = kmalloc_array(ubuf->pagecount, sizeof(struct page
> *),
> + GFP_KERNEL);
> + if (!ubuf->pages) {
> + ret = -ENOMEM;
> + goto err_free_ubuf;
> + }
> +
> + pgbuf = 0;
> + for (i = 0; i < head->count; i++) {
> + memfd = fget(list[i].memfd);
> + if (!memfd)
> + goto err_put_pages;
> + if (!shmem_mapping(file_inode(memfd)->i_mapping))
> + goto err_put_pages;
> + seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
> + if (seals == -EINVAL ||
> + (seals & SEALS_WANTED) != SEALS_WANTED ||
> + (seals & SEALS_DENIED) != 0)
> + goto err_put_pages;
> + pgoff = list[i].offset >> PAGE_SHIFT;
> + pgcnt = list[i].size >> PAGE_SHIFT;
> + for (pgidx = 0; pgidx < pgcnt; pgidx++) {
> + page = shmem_read_mapping_page(
> + file_inode(memfd)->i_mapping, pgoff +
> pgidx);
> + if (IS_ERR(page)) {
> + ret = PTR_ERR(page);
> + goto err_put_pages;
> + }
> + ubuf->pages[pgbuf++] = page;
> + }
> + fput(memfd);
> + }
> + memfd = NULL;
> +
> + exp_info.ops = &udmabuf_ops;
> + exp_info.size = ubuf->pagecount << PAGE_SHIFT;
> + exp_info.priv = ubuf;
> +
> + buf = dma_buf_export(&exp_info);
> + if (IS_ERR(buf)) {
> + ret = PTR_ERR(buf);
> + goto err_put_pages;
> + }
> +
> + flags = 0;

You need to check .flags for unsupported value:

if (head->flags & ~UDMABUF_FLAGS_CLOEXEC)
return -EINVAL;

(at the beginning of the function, of course).

> + if (head->flags & UDMABUF_FLAGS_CLOEXEC)
> + flags |= O_CLOEXEC;
> + return dma_buf_fd(buf, flags);
> +
> +err_put_pages:
> + while (pgbuf > 0)
> + put_page(ubuf->pages[--pgbuf]);
> +err_free_ubuf:
> + fput(memfd);
> + kfree(ubuf->pages);
> + kfree(ubuf);
> + return ret;
> +}
> +

Regards

--
Yann Droneaud
OPTEYA