2008-12-04 01:51:44

by Edward Estabrook

[permalink] [raw]
Subject: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

From: Edward Estabrook <[email protected]>

Here is a patch that adds the ability to dynamically allocate and use
coherent DMA
from userspace by extending the Userspace IO driver. This patch applies against
2.6.28-rc6.

The gist of this implementation is to overload uio's mmap
functionality to allocate
and map a new DMA region on demand. The bus-specific DMA address as returned by
dma_alloc_coherent is made available to userspace in the 1st long word
of the newly
created region (as well as through the conventional 'addr' file in sysfs).

The kernel-api change is that passing an offset value of 0xFFFFF000UL
to the a uio
device's mmap operation will dynamically allocate a DMA region. This
cannot change/
break existing behavior as the previous UIO code only allowed a maximum of 5
mappings.

To allocate a DMA region you use the following:
/* Pass this magic number to mmap as offset to dynamically allocate a
chunk of memory */
#define DMA_MEM_ALLOCATE_MMAP_OFFSET 0xFFFFF000UL

void* memory = mmap (NULL, size, PROT_READ | PROT_WRITE , MAP_SHARED,
fd, DMA_MEM_ALLOCATE_MMAP_OFFSET);
u_int64_t *addr = *(u_int64_t *) memory;

where 'size' is the size in bytes of the region you want and fd is the
opened /dev/uioN file.

Allocation occurs in page sized pieces by design to ensure that
buffers are page-aligned.

Memory is released when the uio_unregister_device() is called.

Signed-off-by: Edward Estabrook <[email protected]>
---
--- linux-2.6.28-rc6/include/linux/uio_driver.h.orig 2008-12-02
09:39:34.000000000 -0800
+++ linux-2.6.28-rc6/include/linux/uio_driver.h 2008-12-02
10:55:01.000000000 -0800
@@ -5,6 +5,7 @@
* Copyright(C) 2005, Thomas Gleixner <[email protected]>
* Copyright(C) 2006, Hans J. Koch <[email protected]>
* Copyright(C) 2006, Greg Kroah-Hartman <[email protected]>
+ * Copyright(C) 2008, Edward Estabrook <[email protected]>
*
* Userspace IO driver.
*
@@ -26,7 +27,11 @@ struct uio_map;
* @size: size of IO
* @memtype: type of memory addr points to
* @internal_addr: ioremap-ped version of addr, for driver internal use
- * @map: for use by the UIO core only.
+ * (for DMA memory regions the dma_handle is stored here)
+ * @map: for use by the UIO core only
+ * @list: DMA-capable buffers ; undefined if not a DMA buffer
+ * @index: index number associated with this DMA-capable mapping
+ * (undefined if not a DMA buffer)
*/
struct uio_mem {
unsigned long addr;
@@ -34,9 +39,12 @@ struct uio_mem {
int memtype;
void __iomem *internal_addr;
struct uio_map *map;
+ struct list_head list;
+ unsigned index;
};

-#define MAX_UIO_MAPS 5
+/* Increased to accomodate many logical memory regions per BAR. */
+#define MAX_UIO_MAPS 100

struct uio_device;

@@ -46,6 +54,8 @@ struct uio_device;
* @name: device name
* @version: device driver version
* @mem: list of mappable memory regions, size==0 for end of list
+ * @dma_mem_head list of dynamically allocated DMA-capable memory regions
+ * @dma_mem_size number of entries in dma_mem_head (used to speed up insertion)
* @irq: interrupt number or UIO_IRQ_CUSTOM
* @irq_flags: flags for request_irq()
* @priv: optional private data
@@ -60,6 +70,8 @@ struct uio_info {
char *name;
char *version;
struct uio_mem mem[MAX_UIO_MAPS];
+ struct list_head dma_mem;
+ unsigned dma_mem_size;
long irq;
unsigned long irq_flags;
void *priv;
@@ -82,6 +94,16 @@ static inline int __must_check
extern void uio_unregister_device(struct uio_info *info);
extern void uio_event_notify(struct uio_info *info);

+/* uio_dev_get_name - return the name associated with this uio device */
+extern char *uio_dev_get_name(struct uio_device *idev);
+
+/* Starting index assigned to dynamically allocated regions. */
+#define UIO_DMA_MEM_BASE_INDEX 1000
+
+/* mmap dynamically allocates a DMA memory block if
+ * its offset parameter matches this value. */
+#define UIO_DMA_MEM_ALLOCATE_VM_PGOFF_MAGIC 0xFFFFFUL
+
/* defines for uio_info->irq */
#define UIO_IRQ_CUSTOM -1
#define UIO_IRQ_NONE -2
--- linux-2.6.28-rc6/drivers/uio/uio.c.orig 2008-12-03 02:01:16.000000000 -0800
+++ linux-2.6.28-rc6/drivers/uio/uio.c 2008-12-03 02:14:08.000000000 -0800
@@ -5,6 +5,7 @@
* Copyright(C) 2005, Thomas Gleixner <[email protected]>
* Copyright(C) 2006, Hans J. Koch <[email protected]>
* Copyright(C) 2006, Greg Kroah-Hartman <[email protected]>
+ * Copyright(C) 2008, Edward Estabrook <[email protected]>
*
* Userspace IO
*
@@ -21,6 +22,7 @@
#include <linux/idr.h>
#include <linux/string.h>
#include <linux/kobject.h>
+#include <linux/dma-mapping.h>
#include <linux/uio_driver.h>

#define UIO_MAX_DEVICES 255
@@ -62,7 +64,10 @@ struct uio_map {

static ssize_t map_addr_show(struct uio_mem *mem, char *buf)
{
- return sprintf(buf, "0x%lx\n", mem->addr);
+ if (mem->index < UIO_DMA_MEM_BASE_INDEX)
+ return sprintf(buf, "0x%lx\n", mem->addr);
+ else
+ return sprintf(buf, "0x%p\n", mem->internal_addr);
}

static ssize_t map_size_show(struct uio_mem *mem, char *buf)
@@ -171,14 +176,38 @@ static struct attribute_group uio_attr_g
.attrs = uio_attrs,
};

+static int uio_mem_add_attribute(struct uio_device *idev, struct uio_mem *umem)
+{
+ struct uio_map *map = kzalloc(sizeof(*map), GFP_KERNEL);
+ int ret = 0;
+ if (!map)
+ return -ENOMEM;
+
+ kobject_init(&map->kobj, &map_attr_type);
+ map->mem = umem;
+ umem->map = map;
+ ret = kobject_add(&map->kobj, idev->map_dir, "map%d", umem->index);
+ if (ret)
+ return ret;
+ return kobject_uevent(&map->kobj, KOBJ_ADD);
+}
+
+
/*
* device functions
*/
+
+/* uio_dev_get_name - return the name associated with this uio device */
+char *uio_dev_get_name(struct uio_device *idev)
+{
+ return idev->dev->bus_id;
+}
+EXPORT_SYMBOL_GPL(uio_dev_get_name);
+
static int uio_dev_add_attributes(struct uio_device *idev)
{
int ret;
int mi;
- int map_found = 0;
struct uio_mem *mem;
struct uio_map *map;

@@ -186,27 +215,21 @@ static int uio_dev_add_attributes(struct
if (ret)
goto err_group;

+ /*
+ * Always register 'maps' even if there are no static
+ * memory regions so it's ready for dynamic DMA maps
+ */
+ idev->map_dir = kobject_create_and_add("maps", &idev->dev->kobj);
+ if (!idev->map_dir)
+ goto err_remove_group;
+
for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
mem = &idev->info->mem[mi];
if (mem->size == 0)
break;
- if (!map_found) {
- map_found = 1;
- idev->map_dir = kobject_create_and_add("maps",
- &idev->dev->kobj);
- if (!idev->map_dir)
- goto err;
- }
- map = kzalloc(sizeof(*map), GFP_KERNEL);
- if (!map)
- goto err;
- kobject_init(&map->kobj, &map_attr_type);
- map->mem = mem;
- mem->map = map;
- ret = kobject_add(&map->kobj, idev->map_dir, "map%d", mi);
- if (ret)
- goto err;
- ret = kobject_uevent(&map->kobj, KOBJ_ADD);
+ mem->index = mi;
+
+ ret = uio_mem_add_attribute(idev, mem);
if (ret)
goto err;
}
@@ -214,12 +237,13 @@ static int uio_dev_add_attributes(struct
return 0;

err:
- for (mi--; mi>=0; mi--) {
+ for (mi--; mi >= 0; mi--) {
mem = &idev->info->mem[mi];
map = mem->map;
kobject_put(&map->kobj);
}
kobject_put(idev->map_dir);
+err_remove_group:
sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
err_group:
dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
@@ -230,12 +254,16 @@ static void uio_dev_del_attributes(struc
{
int mi;
struct uio_mem *mem;
+
+ /* Unregister static maps */
for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
mem = &idev->info->mem[mi];
if (mem->size == 0)
break;
kobject_put(&mem->map->kobj);
}
+
+ /* Unregister containers */
kobject_put(idev->map_dir);
sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
}
@@ -459,18 +487,39 @@ static ssize_t uio_write(struct file *fi
return retval ? retval : sizeof(s32);
}

-static int uio_find_mem_index(struct vm_area_struct *vma)
+
+/**
+ * uio_find_mem_region - return the memory region identified by vm->vm_pgoff
+ * @vma: vma descriptor as passed to mmap.
+ *
+ * returns memory region from either mem array or dma_mem list.
+ */
+static struct uio_mem *uio_find_mem_region(struct vm_area_struct *vma)
{
- int mi;
struct uio_device *idev = vma->vm_private_data;
+ struct uio_mem *umem;

- for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
- if (idev->info->mem[mi].size == 0)
- return -1;
- if (vma->vm_pgoff == mi)
- return mi;
+
+ if (vma->vm_pgoff < UIO_DMA_MEM_BASE_INDEX) {
+ if (vma->vm_pgoff < MAX_UIO_MAPS
+ && idev->info->mem[vma->vm_pgoff].size != 0)
+ return &(idev->info->mem[vma->vm_pgoff]);
+ } else {
+ if (vma->vm_pgoff >= idev->info->dma_mem_size
+ + UIO_DMA_MEM_BASE_INDEX)
+ return 0; /* Too big: fail automatically */
+
+ /* Iterate thru looking for entry in which vm_pgoff matches
+ * index. Yes, this is slow, but it's only for test & debug
+ * access anyway.
+ */
+ list_for_each_entry(umem, &idev->info->dma_mem, list) {
+ if (umem->index == vma->vm_pgoff)
+ return umem;
+ }
}
- return -1;
+
+ return 0;
}

static void uio_vma_open(struct vm_area_struct *vma)
@@ -487,25 +536,24 @@ static void uio_vma_close(struct vm_area

static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct uio_device *idev = vma->vm_private_data;
struct page *page;
unsigned long offset;

- int mi = uio_find_mem_index(vma);
- if (mi < 0)
+ struct uio_mem *umem = uio_find_mem_region(vma);
+ if (!umem)
return VM_FAULT_SIGBUS;

/*
* We need to subtract mi because userspace uses offset = N*PAGE_SIZE
* to use mem[N].
*/
- offset = (vmf->pgoff - mi) << PAGE_SHIFT;
+ offset = (vmf->pgoff - umem->index) << PAGE_SHIFT;

- if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL)
- page = virt_to_page(idev->info->mem[mi].addr + offset);
+ if (umem->memtype == UIO_MEM_LOGICAL)
+ page = virt_to_page(umem->addr + offset);
else
- page = vmalloc_to_page((void *)idev->info->mem[mi].addr
- + offset);
+ page = vmalloc_to_page((void *)umem->addr + offset);
+
get_page(page);
vmf->page = page;
return 0;
@@ -519,9 +567,8 @@ static struct vm_operations_struct uio_v

static int uio_mmap_physical(struct vm_area_struct *vma)
{
- struct uio_device *idev = vma->vm_private_data;
- int mi = uio_find_mem_index(vma);
- if (mi < 0)
+ struct uio_mem *umem = uio_find_mem_region(vma);
+ if (!umem)
return -EINVAL;

vma->vm_flags |= VM_IO | VM_RESERVED;
@@ -530,7 +577,7 @@ static int uio_mmap_physical(struct vm_a

return remap_pfn_range(vma,
vma->vm_start,
- idev->info->mem[mi].addr >> PAGE_SHIFT,
+ umem->addr >> PAGE_SHIFT,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
}
@@ -543,12 +590,99 @@ static int uio_mmap_logical(struct vm_ar
return 0;
}

+/**
+ * uio_mem_create_dma_region - allocate a DMA region of size bytes
+ * @umem: the newly allocated uio_mem structure is returned here
+ * @size: the number of bytes to allocate
+ * @idev: the UIO device this region belongs to
+ *
+ * return 0 if success or negative error code on failure
+ */
+static int uio_mem_create_dma_region(struct uio_mem **umem,
+ unsigned long size,
+ struct uio_device *idev)
+{
+ int ret = 0;
+ unsigned long *addr;
+
+ *umem = kzalloc(sizeof(struct uio_mem), GFP_KERNEL);
+ if (!*umem) {
+ dev_err(idev->dev,
+ "Unable to allocate uio_mem structure.\n");
+ return -ENOMEM;
+ }
+
+ /* Allocate DMA-capable buffer */
+ addr = dma_alloc_coherent(idev->dev->parent, size,
+ (dma_addr_t *) &(*umem)->internal_addr,
+ GFP_KERNEL);
+ if (!addr) {
+ dev_warn(idev->dev, "Unable to allocate requested DMA-capable"
+ " block of size 0x%lx (%lu) during mmap in uio.\n",
+ size, size);
+ ret = -ENOMEM;
+ goto err_free_umem;
+ }
+
+ /* Store the physical address and index as the
+ * first two long words for userspace access */
+ (*umem)->addr = (unsigned long) addr;
+ addr[0] = (unsigned long) (*umem)->internal_addr;
+ (*umem)->memtype = UIO_MEM_LOGICAL;
+ (*umem)->size = size;
+ (*umem)->index = idev->info->dma_mem_size + UIO_DMA_MEM_BASE_INDEX;
+ addr[1] = (unsigned long) (*umem)->index;
+
+ /* Register the mapping in sysfs */
+ ret = uio_mem_add_attribute(idev, *umem);
+ if (ret) {
+ dev_err(idev->dev, "Unable to register sysfs entry for "
+ "DMA mapping (%d) in uio.\n", ret);
+ goto err_free_addr;
+ }
+
+ idev->info->dma_mem_size++;
+ list_add_tail(&((*umem)->list), &(idev->info->dma_mem));
+
+ return 0;
+
+err_free_addr:
+ dma_free_coherent(idev->dev->parent, size, addr,
+ (dma_addr_t) (*umem)->internal_addr);
+err_free_umem:
+ kfree(*umem);
+ *umem = 0;
+ return ret;
+}
+
+/**
+ * uio_mem_destroy_dma_region - destroy dynamically created DMA region
+ * @umem: UIO memory region to destroy
+ * @idev: the UIO device this region belongs to
+ *
+ */
+static void uio_mem_destroy_dma_region(struct uio_mem *umem,
+ struct uio_device *idev)
+{
+ /* Remove sysfs attributes */
+ struct uio_map *map = umem->map;
+ kobject_put(&map->kobj);
+
+ /* Free the buffer itself */
+ dma_free_coherent(idev->dev->parent, umem->size, (void *) umem->addr,
+ (dma_addr_t) umem->internal_addr);
+ list_del(&(umem->list));
+
+ kfree(umem);
+}
+
static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
{
struct uio_listener *listener = filep->private_data;
struct uio_device *idev = listener->dev;
- int mi;
+ struct uio_mem *umem = 0;
unsigned long requested_pages, actual_pages;
+ unsigned long requested_size;
int ret = 0;

if (vma->vm_end < vma->vm_start)
@@ -556,12 +690,24 @@ static int uio_mmap(struct file *filep,

vma->vm_private_data = idev;

- mi = uio_find_mem_index(vma);
- if (mi < 0)
- return -EINVAL;
+ requested_size = vma->vm_end - vma->vm_start;
+ requested_pages = requested_size >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff == UIO_DMA_MEM_ALLOCATE_VM_PGOFF_MAGIC) {
+ /* Create a memory region and change
+ * vm_pgoff to the newly created index */
+ ret = uio_mem_create_dma_region(&umem, requested_size, idev);
+ if (ret)
+ return ret;
+
+ vma->vm_pgoff = umem->index;
+ } else {
+ umem = uio_find_mem_region(vma);
+ if (!umem)
+ return -EINVAL;
+ }

- requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
- actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT;
+ actual_pages = (umem->size + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (requested_pages > actual_pages)
return -EINVAL;

@@ -570,14 +716,14 @@ static int uio_mmap(struct file *filep,
return ret;
}

- switch (idev->info->mem[mi].memtype) {
- case UIO_MEM_PHYS:
- return uio_mmap_physical(vma);
- case UIO_MEM_LOGICAL:
- case UIO_MEM_VIRTUAL:
- return uio_mmap_logical(vma);
- default:
- return -EINVAL;
+ switch (umem->memtype) {
+ case UIO_MEM_PHYS:
+ return uio_mmap_physical(vma);
+ case UIO_MEM_LOGICAL:
+ case UIO_MEM_VIRTUAL:
+ return uio_mmap_logical(vma);
+ default:
+ return -EINVAL;
}
}

@@ -658,6 +804,50 @@ static void uio_class_destroy(void)
kref_put(&uio_class->kref, release_uio_class);
}

+void uio_remove_all_dma_regions(struct uio_info *info)
+{
+ struct uio_mem *mem, *pos;
+
+ list_for_each_entry_safe(pos, mem, &info->dma_mem, list) {
+ uio_mem_destroy_dma_region(pos, info->uio_dev);
+ }
+}
+
+/**
+ * uio_validate_mem - ensure memory regions are page-aligned
+ * and unused regions are zeroed
+ * @idev: The UIO device containing memory regions to check
+ *
+ * returns 0 on success or -EINVAL if misaligned.
+ */
+static int uio_validate_mem(struct uio_device *idev)
+{
+ int ret;
+ int mi;
+ struct uio_mem *mem;
+
+ for (mi = 0; mi < MAX_UIO_MAPS && idev->info->mem[mi].size != 0; mi++) {
+ mem = &idev->info->mem[mi];
+
+ if ((mem->addr & PAGE_MASK) != mem->addr) {
+ ret = -EINVAL;
+ goto err_misaligned;
+ }
+ }
+
+ /* zero out mem->size for unused regions for faster/safer lookup */
+ for (; mi < MAX_UIO_MAPS; mi++)
+ idev->info->mem[mi].size = 0;
+
+ return 0;
+err_misaligned:
+ dev_err(
+ idev->dev,
+ "mapable memory regions must be page-aligned (%d)\n",
+ ret);
+ return ret;
+}
+
/**
* uio_register_device - register a new userspace IO device
* @owner: module that creates the new device
@@ -677,6 +867,8 @@ int __uio_register_device(struct module
return -EINVAL;

info->uio_dev = NULL;
+ INIT_LIST_HEAD(&info->dma_mem);
+ info->dma_mem_size = 0;

ret = init_uio_class();
if (ret)
@@ -706,6 +898,10 @@ int __uio_register_device(struct module
goto err_device_create;
}

+ ret = uio_validate_mem(idev);
+ if (ret)
+ goto err_validate_mem;
+
ret = uio_dev_add_attributes(idev);
if (ret)
goto err_uio_dev_add_attributes;
@@ -714,7 +910,8 @@ int __uio_register_device(struct module

if (idev->info->irq >= 0) {
ret = request_irq(idev->info->irq, uio_interrupt,
- idev->info->irq_flags, idev->info->name, idev);
+ idev->info->irq_flags, idev->info->name,
+ idev);
if (ret)
goto err_request_irq;
}
@@ -723,6 +920,7 @@ int __uio_register_device(struct module

err_request_irq:
uio_dev_del_attributes(idev);
+err_validate_mem:
err_uio_dev_add_attributes:
device_destroy(uio_class->class, MKDEV(uio_major, idev->minor));
err_device_create:
@@ -754,6 +952,8 @@ void uio_unregister_device(struct uio_in
if (info->irq >= 0)
free_irq(info->irq, idev);

+ uio_remove_all_dma_regions(info);
+
uio_dev_del_attributes(idev);

dev_set_drvdata(idev->dev, NULL);


2008-12-04 04:47:17

by Greg KH

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Wed, Dec 03, 2008 at 05:51:30PM -0800, Edward Estabrook wrote:
> From: Edward Estabrook <[email protected]>
>
> Here is a patch that adds the ability to dynamically allocate and use
> coherent DMA
> from userspace by extending the Userspace IO driver. This patch applies against
> 2.6.28-rc6.
>
> The gist of this implementation is to overload uio's mmap
> functionality to allocate
> and map a new DMA region on demand. The bus-specific DMA address as returned by
> dma_alloc_coherent is made available to userspace in the 1st long word
> of the newly
> created region (as well as through the conventional 'addr' file in sysfs).
>
> The kernel-api change is that passing an offset value of 0xFFFFF000UL
> to the a uio
> device's mmap operation will dynamically allocate a DMA region. This
> cannot change/
> break existing behavior as the previous UIO code only allowed a maximum of 5
> mappings.

Odd formatting of your paragraphs :(

Anyway, what about 64bit processors? What happens if they try to use a
valid address in this range?

Is this value always an "invalid" value for all arches that Linux runs
on?

thanks,

greg k-h

2008-12-04 05:34:53

by Edward Estabrook

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

> Anyway, what about 64bit processors? What happens if they try to use a
> valid address in this range?
>
> Is this value always an "invalid" value for all arches that Linux runs
> on?

Yes. I have run this on 64-bit systems.

The offset parameter in UIO is used to specify the memory-mapping index
of interest, where offset = N * getpagesize().

In include/linux/uio_driver.h we had:
#define MAX_UIO_MAPS 5

so assuming a page size of 4k the maximum legal value was 0x5000.

This logic is equally true on in any architecture.

With the dynamic DMA allocation one can now have an arbitrarily large
number of mappings. This magic number simply reduces the theoretical limit
to the number of distinct mappings to of 1,048,574.

Note that even if the in-kernel driver-stub choose to overload the mmap method
both the original code and these modifications would error out on any value
which did not correlate with a valid memory-mapping index number before
handing control to the overloaded implementation.

2008-12-04 19:41:23

by Hans J. Koch

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Wed, Dec 03, 2008 at 05:51:30PM -0800, Edward Estabrook wrote:
> From: Edward Estabrook <[email protected]>
>
> Here is a patch that adds the ability to dynamically allocate and use
> coherent DMA
> from userspace by extending the Userspace IO driver. This patch applies against
> 2.6.28-rc6.
>
> The gist of this implementation is to overload uio's mmap
> functionality to allocate
> and map a new DMA region on demand.

What I don't understand is why you want to allocate memory in the UIO core.
In the current UIO concept, this is the driver's task. Of course we can have
(exported) functions in the core that can help a driver doing this.

Can you explain why you designed it this way? Can you post a driver that
uses this feature?

My comments below are mostly related to this design decision.

Thanks,
Hans

> The bus-specific DMA address as returned by
> dma_alloc_coherent is made available to userspace in the 1st long word
> of the newly
> created region (as well as through the conventional 'addr' file in sysfs).
>
> The kernel-api change is that passing an offset value of 0xFFFFF000UL
> to the a uio
> device's mmap operation will dynamically allocate a DMA region. This
> cannot change/
> break existing behavior as the previous UIO code only allowed a maximum of 5
> mappings.
>
> To allocate a DMA region you use the following:
> /* Pass this magic number to mmap as offset to dynamically allocate a
> chunk of memory */
> #define DMA_MEM_ALLOCATE_MMAP_OFFSET 0xFFFFF000UL
>
> void* memory = mmap (NULL, size, PROT_READ | PROT_WRITE , MAP_SHARED,
> fd, DMA_MEM_ALLOCATE_MMAP_OFFSET);
> u_int64_t *addr = *(u_int64_t *) memory;
>
> where 'size' is the size in bytes of the region you want and fd is the
> opened /dev/uioN file.
>
> Allocation occurs in page sized pieces by design to ensure that
> buffers are page-aligned.
>
> Memory is released when the uio_unregister_device() is called.
>
> Signed-off-by: Edward Estabrook <[email protected]>
> ---
> --- linux-2.6.28-rc6/include/linux/uio_driver.h.orig 2008-12-02
> 09:39:34.000000000 -0800
> +++ linux-2.6.28-rc6/include/linux/uio_driver.h 2008-12-02
> 10:55:01.000000000 -0800
> @@ -5,6 +5,7 @@
> * Copyright(C) 2005, Thomas Gleixner <[email protected]>
> * Copyright(C) 2006, Hans J. Koch <[email protected]>
> * Copyright(C) 2006, Greg Kroah-Hartman <[email protected]>
> + * Copyright(C) 2008, Edward Estabrook <[email protected]>
> *
> * Userspace IO driver.
> *
> @@ -26,7 +27,11 @@ struct uio_map;
> * @size: size of IO
> * @memtype: type of memory addr points to
> * @internal_addr: ioremap-ped version of addr, for driver internal use
> - * @map: for use by the UIO core only.
> + * (for DMA memory regions the dma_handle is stored here)
> + * @map: for use by the UIO core only
> + * @list: DMA-capable buffers ; undefined if not a DMA buffer
> + * @index: index number associated with this DMA-capable mapping
> + * (undefined if not a DMA buffer)
> */
> struct uio_mem {
> unsigned long addr;
> @@ -34,9 +39,12 @@ struct uio_mem {
> int memtype;
> void __iomem *internal_addr;
> struct uio_map *map;
> + struct list_head list;
> + unsigned index;
> };
>
> -#define MAX_UIO_MAPS 5
> +/* Increased to accomodate many logical memory regions per BAR. */
> +#define MAX_UIO_MAPS 100

Isn't 100 a bit much? Note that uio_info.mem[] is an array with this size.

>
> struct uio_device;
>
> @@ -46,6 +54,8 @@ struct uio_device;
> * @name: device name
> * @version: device driver version
> * @mem: list of mappable memory regions, size==0 for end of list
> + * @dma_mem_head list of dynamically allocated DMA-capable memory regions
> + * @dma_mem_size number of entries in dma_mem_head (used to speed up insertion)
> * @irq: interrupt number or UIO_IRQ_CUSTOM
> * @irq_flags: flags for request_irq()
> * @priv: optional private data
> @@ -60,6 +70,8 @@ struct uio_info {
> char *name;
> char *version;
> struct uio_mem mem[MAX_UIO_MAPS];
> + struct list_head dma_mem;
> + unsigned dma_mem_size;
> long irq;
> unsigned long irq_flags;
> void *priv;
> @@ -82,6 +94,16 @@ static inline int __must_check
> extern void uio_unregister_device(struct uio_info *info);
> extern void uio_event_notify(struct uio_info *info);
>
> +/* uio_dev_get_name - return the name associated with this uio device */
> +extern char *uio_dev_get_name(struct uio_device *idev);
> +
> +/* Starting index assigned to dynamically allocated regions. */
> +#define UIO_DMA_MEM_BASE_INDEX 1000
> +
> +/* mmap dynamically allocates a DMA memory block if
> + * its offset parameter matches this value. */
> +#define UIO_DMA_MEM_ALLOCATE_VM_PGOFF_MAGIC 0xFFFFFUL

I'd prefer to make this a flag, using one of the bits 16..31 of the value
passed to mmap() in userspace. This would leave the lower 16 bits for the
number of the mapping and some more flag bits on top for future extensions.
The flag can then be or'ed to the map number in userspace.

> +
> /* defines for uio_info->irq */
> #define UIO_IRQ_CUSTOM -1
> #define UIO_IRQ_NONE -2
> --- linux-2.6.28-rc6/drivers/uio/uio.c.orig 2008-12-03 02:01:16.000000000 -0800
> +++ linux-2.6.28-rc6/drivers/uio/uio.c 2008-12-03 02:14:08.000000000 -0800
> @@ -5,6 +5,7 @@
> * Copyright(C) 2005, Thomas Gleixner <[email protected]>
> * Copyright(C) 2006, Hans J. Koch <[email protected]>
> * Copyright(C) 2006, Greg Kroah-Hartman <[email protected]>
> + * Copyright(C) 2008, Edward Estabrook <[email protected]>
> *
> * Userspace IO
> *
> @@ -21,6 +22,7 @@
> #include <linux/idr.h>
> #include <linux/string.h>
> #include <linux/kobject.h>
> +#include <linux/dma-mapping.h>
> #include <linux/uio_driver.h>
>
> #define UIO_MAX_DEVICES 255
> @@ -62,7 +64,10 @@ struct uio_map {
>
> static ssize_t map_addr_show(struct uio_mem *mem, char *buf)
> {
> - return sprintf(buf, "0x%lx\n", mem->addr);
> + if (mem->index < UIO_DMA_MEM_BASE_INDEX)
> + return sprintf(buf, "0x%lx\n", mem->addr);
> + else
> + return sprintf(buf, "0x%p\n", mem->internal_addr);
> }
>
> static ssize_t map_size_show(struct uio_mem *mem, char *buf)
> @@ -171,14 +176,38 @@ static struct attribute_group uio_attr_g
> .attrs = uio_attrs,
> };
>
> +static int uio_mem_add_attribute(struct uio_device *idev, struct uio_mem *umem)
> +{
> + struct uio_map *map = kzalloc(sizeof(*map), GFP_KERNEL);
> + int ret = 0;
> + if (!map)
> + return -ENOMEM;
> +
> + kobject_init(&map->kobj, &map_attr_type);
> + map->mem = umem;
> + umem->map = map;
> + ret = kobject_add(&map->kobj, idev->map_dir, "map%d", umem->index);
> + if (ret)
> + return ret;
> + return kobject_uevent(&map->kobj, KOBJ_ADD);
> +}
> +
> +
> /*
> * device functions
> */
> +
> +/* uio_dev_get_name - return the name associated with this uio device */
> +char *uio_dev_get_name(struct uio_device *idev)
> +{
> + return idev->dev->bus_id;
> +}
> +EXPORT_SYMBOL_GPL(uio_dev_get_name);
> +
> static int uio_dev_add_attributes(struct uio_device *idev)
> {
> int ret;
> int mi;
> - int map_found = 0;
> struct uio_mem *mem;
> struct uio_map *map;
>
> @@ -186,27 +215,21 @@ static int uio_dev_add_attributes(struct
> if (ret)
> goto err_group;
>
> + /*
> + * Always register 'maps' even if there are no static
> + * memory regions so it's ready for dynamic DMA maps
> + */

That's a change to an existing userspace API. It would be better if the
driver already fills a mem[] struct and maybe marks it with a flag.
After all, each driver has to decide whether it wants to offer DMA
memory or not.

> + idev->map_dir = kobject_create_and_add("maps", &idev->dev->kobj);
> + if (!idev->map_dir)
> + goto err_remove_group;
> +
> for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
> mem = &idev->info->mem[mi];
> if (mem->size == 0)
> break;
> - if (!map_found) {
> - map_found = 1;
> - idev->map_dir = kobject_create_and_add("maps",
> - &idev->dev->kobj);
> - if (!idev->map_dir)
> - goto err;
> - }
> - map = kzalloc(sizeof(*map), GFP_KERNEL);
> - if (!map)
> - goto err;
> - kobject_init(&map->kobj, &map_attr_type);
> - map->mem = mem;
> - mem->map = map;
> - ret = kobject_add(&map->kobj, idev->map_dir, "map%d", mi);
> - if (ret)
> - goto err;
> - ret = kobject_uevent(&map->kobj, KOBJ_ADD);
> + mem->index = mi;
> +
> + ret = uio_mem_add_attribute(idev, mem);
> if (ret)
> goto err;
> }
> @@ -214,12 +237,13 @@ static int uio_dev_add_attributes(struct
> return 0;
>
> err:
> - for (mi--; mi>=0; mi--) {
> + for (mi--; mi >= 0; mi--) {
> mem = &idev->info->mem[mi];
> map = mem->map;
> kobject_put(&map->kobj);
> }
> kobject_put(idev->map_dir);
> +err_remove_group:
> sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
> err_group:
> dev_err(idev->dev, "error creating sysfs files (%d)\n", ret);
> @@ -230,12 +254,16 @@ static void uio_dev_del_attributes(struc
> {
> int mi;
> struct uio_mem *mem;
> +
> + /* Unregister static maps */
> for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
> mem = &idev->info->mem[mi];
> if (mem->size == 0)
> break;
> kobject_put(&mem->map->kobj);
> }
> +
> + /* Unregister containers */
> kobject_put(idev->map_dir);
> sysfs_remove_group(&idev->dev->kobj, &uio_attr_grp);
> }
> @@ -459,18 +487,39 @@ static ssize_t uio_write(struct file *fi
> return retval ? retval : sizeof(s32);
> }
>
> -static int uio_find_mem_index(struct vm_area_struct *vma)
> +
> +/**
> + * uio_find_mem_region - return the memory region identified by vm->vm_pgoff
> + * @vma: vma descriptor as passed to mmap.
> + *
> + * returns memory region from either mem array or dma_mem list.
> + */
> +static struct uio_mem *uio_find_mem_region(struct vm_area_struct *vma)
> {
> - int mi;
> struct uio_device *idev = vma->vm_private_data;
> + struct uio_mem *umem;
>
> - for (mi = 0; mi < MAX_UIO_MAPS; mi++) {
> - if (idev->info->mem[mi].size == 0)
> - return -1;
> - if (vma->vm_pgoff == mi)
> - return mi;
> +
> + if (vma->vm_pgoff < UIO_DMA_MEM_BASE_INDEX) {
> + if (vma->vm_pgoff < MAX_UIO_MAPS
> + && idev->info->mem[vma->vm_pgoff].size != 0)
> + return &(idev->info->mem[vma->vm_pgoff]);
> + } else {
> + if (vma->vm_pgoff >= idev->info->dma_mem_size
> + + UIO_DMA_MEM_BASE_INDEX)
> + return 0; /* Too big: fail automatically */
> +
> + /* Iterate thru looking for entry in which vm_pgoff matches
> + * index. Yes, this is slow, but it's only for test & debug
> + * access anyway.
> + */

Huh? It's for any access to DMA memory, isn't it?

> + list_for_each_entry(umem, &idev->info->dma_mem, list) {
> + if (umem->index == vma->vm_pgoff)
> + return umem;
> + }
> }
> - return -1;
> +
> + return 0;
> }
>
> static void uio_vma_open(struct vm_area_struct *vma)
> @@ -487,25 +536,24 @@ static void uio_vma_close(struct vm_area
>
> static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> - struct uio_device *idev = vma->vm_private_data;
> struct page *page;
> unsigned long offset;
>
> - int mi = uio_find_mem_index(vma);
> - if (mi < 0)
> + struct uio_mem *umem = uio_find_mem_region(vma);
> + if (!umem)
> return VM_FAULT_SIGBUS;
>
> /*
> * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
> * to use mem[N].
> */
> - offset = (vmf->pgoff - mi) << PAGE_SHIFT;
> + offset = (vmf->pgoff - umem->index) << PAGE_SHIFT;
>
> - if (idev->info->mem[mi].memtype == UIO_MEM_LOGICAL)
> - page = virt_to_page(idev->info->mem[mi].addr + offset);
> + if (umem->memtype == UIO_MEM_LOGICAL)
> + page = virt_to_page(umem->addr + offset);
> else
> - page = vmalloc_to_page((void *)idev->info->mem[mi].addr
> - + offset);
> + page = vmalloc_to_page((void *)umem->addr + offset);
> +
> get_page(page);
> vmf->page = page;
> return 0;
> @@ -519,9 +567,8 @@ static struct vm_operations_struct uio_v
>
> static int uio_mmap_physical(struct vm_area_struct *vma)
> {
> - struct uio_device *idev = vma->vm_private_data;
> - int mi = uio_find_mem_index(vma);
> - if (mi < 0)
> + struct uio_mem *umem = uio_find_mem_region(vma);
> + if (!umem)
> return -EINVAL;
>
> vma->vm_flags |= VM_IO | VM_RESERVED;
> @@ -530,7 +577,7 @@ static int uio_mmap_physical(struct vm_a
>
> return remap_pfn_range(vma,
> vma->vm_start,
> - idev->info->mem[mi].addr >> PAGE_SHIFT,
> + umem->addr >> PAGE_SHIFT,
> vma->vm_end - vma->vm_start,
> vma->vm_page_prot);
> }
> @@ -543,12 +590,99 @@ static int uio_mmap_logical(struct vm_ar
> return 0;
> }
>
> +/**
> + * uio_mem_create_dma_region - allocate a DMA region of size bytes
> + * @umem: the newly allocated uio_mem structure is returned here
> + * @size: the number of bytes to allocate
> + * @idev: the UIO device this region belongs to
> + *
> + * return 0 if success or negative error code on failure
> + */
> +static int uio_mem_create_dma_region(struct uio_mem **umem,
> + unsigned long size,
> + struct uio_device *idev)

If you exported this function, a UIO driver could use it to setup
its mem struct. Looks cleaner to me.

> +{
> + int ret = 0;
> + unsigned long *addr;
> +
> + *umem = kzalloc(sizeof(struct uio_mem), GFP_KERNEL);
> + if (!*umem) {
> + dev_err(idev->dev,
> + "Unable to allocate uio_mem structure.\n");
> + return -ENOMEM;
> + }
> +
> + /* Allocate DMA-capable buffer */
> + addr = dma_alloc_coherent(idev->dev->parent, size,
> + (dma_addr_t *) &(*umem)->internal_addr,
> + GFP_KERNEL);
> + if (!addr) {
> + dev_warn(idev->dev, "Unable to allocate requested DMA-capable"
> + " block of size 0x%lx (%lu) during mmap in uio.\n",
> + size, size);
> + ret = -ENOMEM;
> + goto err_free_umem;
> + }
> +
> + /* Store the physical address and index as the
> + * first two long words for userspace access */
> + (*umem)->addr = (unsigned long) addr;
> + addr[0] = (unsigned long) (*umem)->internal_addr;
> + (*umem)->memtype = UIO_MEM_LOGICAL;
> + (*umem)->size = size;
> + (*umem)->index = idev->info->dma_mem_size + UIO_DMA_MEM_BASE_INDEX;
> + addr[1] = (unsigned long) (*umem)->index;
> +
> + /* Register the mapping in sysfs */
> + ret = uio_mem_add_attribute(idev, *umem);
> + if (ret) {
> + dev_err(idev->dev, "Unable to register sysfs entry for "
> + "DMA mapping (%d) in uio.\n", ret);
> + goto err_free_addr;
> + }
> +
> + idev->info->dma_mem_size++;
> + list_add_tail(&((*umem)->list), &(idev->info->dma_mem));
> +
> + return 0;
> +
> +err_free_addr:
> + dma_free_coherent(idev->dev->parent, size, addr,
> + (dma_addr_t) (*umem)->internal_addr);
> +err_free_umem:
> + kfree(*umem);
> + *umem = 0;
> + return ret;
> +}
> +
> +/**
> + * uio_mem_destroy_dma_region - destroy dynamically created DMA region
> + * @umem: UIO memory region to destroy
> + * @idev: the UIO device this region belongs to
> + *
> + */
> +static void uio_mem_destroy_dma_region(struct uio_mem *umem,
> + struct uio_device *idev)
> +{
> + /* Remove sysfs attributes */
> + struct uio_map *map = umem->map;
> + kobject_put(&map->kobj);
> +
> + /* Free the buffer itself */
> + dma_free_coherent(idev->dev->parent, umem->size, (void *) umem->addr,
> + (dma_addr_t) umem->internal_addr);
> + list_del(&(umem->list));
> +
> + kfree(umem);
> +}
> +
> static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
> {
> struct uio_listener *listener = filep->private_data;
> struct uio_device *idev = listener->dev;
> - int mi;
> + struct uio_mem *umem = 0;
> unsigned long requested_pages, actual_pages;
> + unsigned long requested_size;
> int ret = 0;
>
> if (vma->vm_end < vma->vm_start)
> @@ -556,12 +690,24 @@ static int uio_mmap(struct file *filep,
>
> vma->vm_private_data = idev;
>
> - mi = uio_find_mem_index(vma);
> - if (mi < 0)
> - return -EINVAL;
> + requested_size = vma->vm_end - vma->vm_start;
> + requested_pages = requested_size >> PAGE_SHIFT;
> +
> + if (vma->vm_pgoff == UIO_DMA_MEM_ALLOCATE_VM_PGOFF_MAGIC) {

If you had a flag like I suggested above, this could be handled much
simpler. Something like

if (vma->vm_pgoff & UIO_DMA_MEM_FLAG)
vma->vm_pgoff &= UIO_MEM_INDEX_MASK;


> + /* Create a memory region and change
> + * vm_pgoff to the newly created index */
> + ret = uio_mem_create_dma_region(&umem, requested_size, idev);
> + if (ret)
> + return ret;
> +
> + vma->vm_pgoff = umem->index;

Then you wouldn't need this index.

> + } else {
> + umem = uio_find_mem_region(vma);
> + if (!umem)
> + return -EINVAL;
> + }
>
> - requested_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> - actual_pages = (idev->info->mem[mi].size + PAGE_SIZE -1) >> PAGE_SHIFT;
> + actual_pages = (umem->size + PAGE_SIZE - 1) >> PAGE_SHIFT;
> if (requested_pages > actual_pages)
> return -EINVAL;
>
> @@ -570,14 +716,14 @@ static int uio_mmap(struct file *filep,
> return ret;
> }
>
> - switch (idev->info->mem[mi].memtype) {
> - case UIO_MEM_PHYS:
> - return uio_mmap_physical(vma);
> - case UIO_MEM_LOGICAL:
> - case UIO_MEM_VIRTUAL:
> - return uio_mmap_logical(vma);
> - default:
> - return -EINVAL;
> + switch (umem->memtype) {
> + case UIO_MEM_PHYS:
> + return uio_mmap_physical(vma);
> + case UIO_MEM_LOGICAL:
> + case UIO_MEM_VIRTUAL:
> + return uio_mmap_logical(vma);
> + default:
> + return -EINVAL;
> }
> }
>
> @@ -658,6 +804,50 @@ static void uio_class_destroy(void)
> kref_put(&uio_class->kref, release_uio_class);
> }
>
> +void uio_remove_all_dma_regions(struct uio_info *info)
> +{
> + struct uio_mem *mem, *pos;
> +
> + list_for_each_entry_safe(pos, mem, &info->dma_mem, list) {
> + uio_mem_destroy_dma_region(pos, info->uio_dev);
> + }
> +}
> +
> +/**
> + * uio_validate_mem - ensure memory regions are page-aligned
> + * and unused regions are zeroed
> + * @idev: The UIO device containing memory regions to check
> + *
> + * returns 0 on success or -EINVAL if misaligned.
> + */
> +static int uio_validate_mem(struct uio_device *idev)
> +{
> + int ret;
> + int mi;
> + struct uio_mem *mem;
> +
> + for (mi = 0; mi < MAX_UIO_MAPS && idev->info->mem[mi].size != 0; mi++) {
> + mem = &idev->info->mem[mi];
> +
> + if ((mem->addr & PAGE_MASK) != mem->addr) {
> + ret = -EINVAL;
> + goto err_misaligned;
> + }
> + }
> +
> + /* zero out mem->size for unused regions for faster/safer lookup */
> + for (; mi < MAX_UIO_MAPS; mi++)
> + idev->info->mem[mi].size = 0;
> +
> + return 0;
> +err_misaligned:
> + dev_err(
> + idev->dev,
> + "mapable memory regions must be page-aligned (%d)\n",
> + ret);
> + return ret;
> +}
> +
> /**
> * uio_register_device - register a new userspace IO device
> * @owner: module that creates the new device
> @@ -677,6 +867,8 @@ int __uio_register_device(struct module
> return -EINVAL;
>
> info->uio_dev = NULL;
> + INIT_LIST_HEAD(&info->dma_mem);
> + info->dma_mem_size = 0;
>
> ret = init_uio_class();
> if (ret)
> @@ -706,6 +898,10 @@ int __uio_register_device(struct module
> goto err_device_create;
> }
>
> + ret = uio_validate_mem(idev);
> + if (ret)
> + goto err_validate_mem;
> +
> ret = uio_dev_add_attributes(idev);
> if (ret)
> goto err_uio_dev_add_attributes;
> @@ -714,7 +910,8 @@ int __uio_register_device(struct module
>
> if (idev->info->irq >= 0) {
> ret = request_irq(idev->info->irq, uio_interrupt,
> - idev->info->irq_flags, idev->info->name, idev);
> + idev->info->irq_flags, idev->info->name,
> + idev);
> if (ret)
> goto err_request_irq;
> }
> @@ -723,6 +920,7 @@ int __uio_register_device(struct module
>
> err_request_irq:
> uio_dev_del_attributes(idev);
> +err_validate_mem:
> err_uio_dev_add_attributes:
> device_destroy(uio_class->class, MKDEV(uio_major, idev->minor));
> err_device_create:
> @@ -754,6 +952,8 @@ void uio_unregister_device(struct uio_in
> if (info->irq >= 0)
> free_irq(info->irq, idev);
>
> + uio_remove_all_dma_regions(info);
> +
> uio_dev_del_attributes(idev);
>
> dev_set_drvdata(idev->dev, NULL);

2008-12-06 00:16:18

by Edward Estabrook

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

> What I don't understand is why you want to allocate memory in the UIO core.
> In the current UIO concept, this is the driver's task. Of course we can have
> (exported) functions in the core that can help a driver doing this.
>

There are several compelling reasons to allocate memory in UIO core.
1)
I have a requirement to *dynamically* allocate the DMA regions.
The statically mapped regions contain a variable-sized ring buffer.
I use UIO driver to allocate several thousand 4k DMA regions.
If I need more during program execution I can grow the ring-buffer.

The existing infrastructure requires that you decide at driver-stub registration
how many mappings needed, period. Yes, we could add a flag to determine that
some mappings are to be allocated for DMA, but this does not facilitate the
*dynamic* allocation that is required.

(You and others are absolutely correct that if I only required static DMA maps
that could be gracefully handled in the driver stub and mapped 'traditionally',
but that is not the case here).

2)
Given that I need to add dynamic DMA allocation it seemed best to reuse as
much of the memory region-finding and mapping code in the UIO core. Had I
written the whole thing in the driver stub I would have to implement another
userspace <-> kernel communication mechanism and I'd have to copy and paste
a lot of the UIO cores mmap functionality and would lose the default handling
of the traditional static mappings.

3)
By implementing this in the UIO core other folks who need such capability in
the future can do so without duplicating this effort.

4)
I'm using the same driver stub to control several different 'devices', not all
of which have DMA. These devices can change configuration on the fly via FPGA
download. I am trying to keep as much of the device logic in userspace
as possible.


>> -#define MAX_UIO_MAPS 5
>> +/* Increased to accomodate many logical memory regions per BAR. */
>> +#define MAX_UIO_MAPS 100
>
> Isn't 100 a bit much? Note that uio_info.mem[] is an array with this size.
>

Yes, 100 is overkill. I'm using 2 bars with potentially 20 mappings
each to logically
separate functionality. I got 100 by scaling this to more 5 BARs.
But 40 would be sufficient for my purposes.

Would you agree the 'best' way to do this would be eliminate the
static list, pass in a size,
and use the linked-list-based search for both DMA and static maps.


>> + /*
>> + * Always register 'maps' even if there are no static
>> + * memory regions so it's ready for dynamic DMA maps
>> + */
>
> That's a change to an existing userspace API. It would be better if the
> driver already fills a mem[] struct and maybe marks it with a flag.
> After all, each driver has to decide whether it wants to offer DMA
> memory or not.

As commented above, the driver stub does not actually determine how much
DMA memory is required.

>> + /* Iterate thru looking for entry in which vm_pgoff matches
>> + * index. Yes, this is slow, but it's only for test & debug
>> + * access anyway.
>> + */
>> + list_for_each_entry(umem, &idev->info->dma_mem, list) {
>> + if (umem->index == vma->vm_pgoff)
>> + return umem;
>
> Huh? It's for any access to DMA memory, isn't it?
>

The 'normal' way to use the the DMA region is to call mmap with the magic number
and just go ahead and use the returned region. The beauty of overloading mmap
is you get immediate O(1) access to the memory. If memory were statically
mapped by the driver stub one would have to search for each region.

You can still use the search-for-index technique but this is O(N^2) and won't
scale well to the thousands of regions I need. You'd only need to access the
sysfs and do the search-by-index approach if you needed multiple processes to
access the same DMA region. (For example, a debug process or command
line tool).


>> +static int uio_mem_create_dma_region(struct uio_mem **umem,
>> + unsigned long size,
>> + struct uio_device *idev)
>
> If you exported this function, a UIO driver could use it to setup
> its mem struct. Looks cleaner to me.
>

Exporting this might prove useful to other driver-stub writers and this is
a valid suggestion, but exporting this isn't sufficient

>> +{
>> + int ret = 0;
>> + unsigned long *addr;
>> +
>> + *umem = kzalloc(sizeof(struct uio_mem), GFP_KERNEL);
>> + if (!*umem) {
>> + dev_err(idev->dev,
>> + "Unable to allocate uio_mem structure.\n");
>> + return -ENOMEM;
>> + }
>> +
>> + /* Allocate DMA-capable buffer */
>> + addr = dma_alloc_coherent(idev->dev->parent, size,
>> + (dma_addr_t *) &(*umem)->internal_addr,
>> + GFP_KERNEL);
>> + if (!addr) {
>> + dev_warn(idev->dev, "Unable to allocate requested DMA-capable"
>> + " block of size 0x%lx (%lu) during mmap in uio.\n",
>> + size, size);
>> + ret = -ENOMEM;
>> + goto err_free_umem;
>> + }
>> +
>> + /* Store the physical address and index as the
>> + * first two long words for userspace access */
>> + (*umem)->addr = (unsigned long) addr;
>> + addr[0] = (unsigned long) (*umem)->internal_addr;
>> + (*umem)->memtype = UIO_MEM_LOGICAL;
>> + (*umem)->size = size;
>> + (*umem)->index = idev->info->dma_mem_size + UIO_DMA_MEM_BASE_INDEX;
>> + addr[1] = (unsigned long) (*umem)->index;
>> +
>> + /* Register the mapping in sysfs */
>> + ret = uio_mem_add_attribute(idev, *umem);
>> + if (ret) {
>> + dev_err(idev->dev, "Unable to register sysfs entry for "
>> + "DMA mapping (%d) in uio.\n", ret);
>> + goto err_free_addr;
>> + }
>> +
>> + idev->info->dma_mem_size++;
>> + list_add_tail(&((*umem)->list), &(idev->info->dma_mem));
>> +
>> + return 0;
>> +
>> +err_free_addr:
>> + dma_free_coherent(idev->dev->parent, size, addr,
>> + (dma_addr_t) (*umem)->internal_addr);
>> +err_free_umem:
>> + kfree(*umem);
>> + *umem = 0;
>> + return ret;
>> +}
>> +
>> +/**
>> + * uio_mem_destroy_dma_region - destroy dynamically created DMA region
>> + * @umem: UIO memory region to destroy
>> + * @idev: the UIO device this region belongs to
>> + *
>> + */
>> +static void uio_mem_destroy_dma_region(struct uio_mem *umem,
>> + struct uio_device *idev)
>> +{
>> + /* Remove sysfs attributes */
>> + struct uio_map *map = umem->map;
>> + kobject_put(&map->kobj);
>> +
>> + /* Free the buffer itself */
>> + dma_free_coherent(idev->dev->parent, umem->size, (void *) umem->addr,
>> + (dma_addr_t) umem->internal_addr);
>> + list_del(&(umem->list));
>> +
>> + kfree(umem);
>> +}
>> +
>> static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
>> {
>> struct uio_listener *listener = filep->private_data;
>> struct uio_device *idev = listener->dev;
>> - int mi;
>> + struct uio_mem *umem = 0;
>> unsigned long requested_pages, actual_pages;
>> + unsigned long requested_size;
>> int ret = 0;
>>
>> if (vma->vm_end < vma->vm_start)
>> @@ -556,12 +690,24 @@ static int uio_mmap(struct file *filep,
>>
>> vma->vm_private_data = idev;
>>
>> - mi = uio_find_mem_index(vma);
>> - if (mi < 0)
>> - return -EINVAL;
>> + requested_size = vma->vm_end - vma->vm_start;
>> + requested_pages = requested_size >> PAGE_SHIFT;
>> +
>> + if (vma->vm_pgoff == UIO_DMA_MEM_ALLOCATE_VM_PGOFF_MAGIC) {
>
> If you had a flag like I suggested above, this could be handled much
> simpler. Something like
>
> if (vma->vm_pgoff & UIO_DMA_MEM_FLAG)
> vma->vm_pgoff &= UIO_MEM_INDEX_MASK;
>
>
>> + /* Create a memory region and change
>> + * vm_pgoff to the newly created index */
>> + ret = uio_mem_create_dma_region(&umem, requested_size, idev);
>> + if (ret)
>> + return ret;
>> +
>> + vma->vm_pgoff = umem->index;
>
> Then you wouldn't need this index.
>
>> + } else {
>> + umem = uio_find_mem_region(vma);
>> + if (!umem)
>> + return -EINVAL;
>> + }
>>

2008-12-11 00:34:19

by Hans J. Koch

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Wed, Dec 03, 2008 at 05:51:30PM -0800, Edward Estabrook wrote:
> From: Edward Estabrook <[email protected]>
>
> Here is a patch that adds the ability to dynamically allocate and use
> coherent DMA
> from userspace by extending the Userspace IO driver. This patch applies against
> 2.6.28-rc6.

Hi Edward,
sorry I didn't answer sooner. It was not lack of interest in your work, I
just had to think about it (and had quite a lot of other work, too).
I consider your idea of having DMA coherent memory allocatable from userspace
a valid demand. However, the interface should be implemented differently.

As a general requirement, all information you need to pass to userspace has
to go through sysfs. This is a UIO design decision and should not be changed
lightly. Please don't fill information into the allocated memory.

We shouldn't mix dynamically allocated DMA regions with the already existing
static memory mappings. There should be an extra directory, maybe called
/sys/class/uio/uioN/dyn_dma_mem/ containing information about this kind of
DMA memory. This directory would not exist by default, it's only created if
some user actually allocates such memory.

In the UIO core, these mappings should be handled with a dynamic list, not
a static array. The number of possible mappings has to be limited to a
sensible maximum, maybe 50 or 100. The code handling the existing static
mappings could mostly stay as-is.

Userspace can allocate new DMA memory by using some magic number as the offset
parameter of mmap() like you suggested. However, this magic should be just one
of the bits 16..31 (e.g. 0x00010000) to have other bits available for future
extensions.

I think you should also reconsider the way you unmap the memory. It's certainly
right to unmap everything when the device is unregistered, but I think there's
more to think about:
- Should the memory be freed as soon as all users have the device closed?
- What happens if a user calls munmap() on such a mapping?

I'm looking forward to your next version.

Thanks,
Hans

2008-12-25 12:30:20

by Joerg Roedel

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Thu, Dec 11, 2008 at 01:33:41AM +0100, Hans J. Koch wrote:
> On Wed, Dec 03, 2008 at 05:51:30PM -0800, Edward Estabrook wrote:
> > From: Edward Estabrook <[email protected]>
> >
> > Here is a patch that adds the ability to dynamically allocate and use
> > coherent DMA
> > from userspace by extending the Userspace IO driver. This patch applies against
> > 2.6.28-rc6.
>
> Hi Edward,
> sorry I didn't answer sooner. It was not lack of interest in your work, I
> just had to think about it (and had quite a lot of other work, too).
> I consider your idea of having DMA coherent memory allocatable from userspace
> a valid demand. However, the interface should be implemented differently.
>
> As a general requirement, all information you need to pass to userspace has
> to go through sysfs. This is a UIO design decision and should not be changed
> lightly. Please don't fill information into the allocated memory.
>
> We shouldn't mix dynamically allocated DMA regions with the already existing
> static memory mappings. There should be an extra directory, maybe called
> /sys/class/uio/uioN/dyn_dma_mem/ containing information about this kind of
> DMA memory. This directory would not exist by default, it's only created if
> some user actually allocates such memory.
>
> In the UIO core, these mappings should be handled with a dynamic list, not
> a static array. The number of possible mappings has to be limited to a
> sensible maximum, maybe 50 or 100. The code handling the existing static
> mappings could mostly stay as-is.

I like that idea of a seperate directory in sysfs. How about allocating
DMA memory by creating files in this directory. The memory itself can be
accessed via mmap() and the dma address can be gathered using read().
When a file is closed the dma memory is unmapped.
This would work great as long as /sys/class/uio/uioN/ belongs only to
one physical device.

Joerg

2008-12-25 15:20:39

by Hans J. Koch

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Thu, Dec 25, 2008 at 01:30:04PM +0100, Joerg Roedel wrote:
> On Thu, Dec 11, 2008 at 01:33:41AM +0100, Hans J. Koch wrote:
> > On Wed, Dec 03, 2008 at 05:51:30PM -0800, Edward Estabrook wrote:
> > > From: Edward Estabrook <[email protected]>
> > >
> > > Here is a patch that adds the ability to dynamically allocate and use
> > > coherent DMA
> > > from userspace by extending the Userspace IO driver. This patch applies against
> > > 2.6.28-rc6.
> >
> > Hi Edward,
> > sorry I didn't answer sooner. It was not lack of interest in your work, I
> > just had to think about it (and had quite a lot of other work, too).
> > I consider your idea of having DMA coherent memory allocatable from userspace
> > a valid demand. However, the interface should be implemented differently.
> >
> > As a general requirement, all information you need to pass to userspace has
> > to go through sysfs. This is a UIO design decision and should not be changed
> > lightly. Please don't fill information into the allocated memory.
> >
> > We shouldn't mix dynamically allocated DMA regions with the already existing
> > static memory mappings. There should be an extra directory, maybe called
> > /sys/class/uio/uioN/dyn_dma_mem/ containing information about this kind of
> > DMA memory. This directory would not exist by default, it's only created if
> > some user actually allocates such memory.
> >
> > In the UIO core, these mappings should be handled with a dynamic list, not
> > a static array. The number of possible mappings has to be limited to a
> > sensible maximum, maybe 50 or 100. The code handling the existing static
> > mappings could mostly stay as-is.
>
> I like that idea of a seperate directory in sysfs. How about allocating
> DMA memory by creating files in this directory.

Nice idea, but AFAICT sysfs doesn't support creating files from userspace.

> The memory itself can be
> accessed via mmap() and the dma address can be gathered using read().
> When a file is closed the dma memory is unmapped.
> This would work great as long as /sys/class/uio/uioN/ belongs only to
> one physical device.

That's guaranteed.

Thanks,
Hans

2008-12-27 00:49:55

by Andreas Bombe

[permalink] [raw]
Subject: Re: [PATCH 1/1] Userspace I/O (UIO): Add support for userspace DMA (corrected)

On Thu, Dec 25, 2008 at 04:20:15PM +0100, Hans J. Koch wrote:
> On Thu, Dec 25, 2008 at 01:30:04PM +0100, Joerg Roedel wrote:
> > I like that idea of a seperate directory in sysfs. How about allocating
> > DMA memory by creating files in this directory.
>
> Nice idea, but AFAICT sysfs doesn't support creating files from userspace.

For that there is configfs.