Hi everybody,
in short: The attached patch adds zerocopy support to the usbfs driver.
I tested it on x86 and on a globalscale mirabox ARM board. Until now it
works
quite nice and I'd love to get some comments and feedback on the patch.
Longer version: The usbfs driver does not support direct data transfers
from the controller
to user-memory. For several use-cases (in my case USB3 Vision cameras
pushing up to 360MB/s)
the copy operation from kernel to user-space results in a lot of
unnecessary CPU overhead
especially on small embedded devices. I measured a decrease in CPU usage
from 44% to 22% on
a the mirabox.
This patch implements zerocopy beside the existing code paths, so it is
easy to disable
zerocopy at runtime and to directly compare the CPU usage.
To disable zerocopy just do an
echo 1 > /sys/module/usbcore/parameters/usbfs_disable_zerocopy
and to reenable it
echo 0 > /sys/module/usbcore/parameters/usbfs_disable_zerocopy
On modern desktop systems the change in CPU usage is quite low so this
mostly affects
smaller embedded devices.
Implementation details:
The patch only touches drivers/usb/core/devio.c.
In procy_do_submiturb(), it is checked if zerocopy is allowed. This is
currently a rough
check which compares the number of required pages to
ps->dev->bus->sg_tablesize.
I don't know if there is more to check there.
Then the user memory provided inside the usbdevfs_urb structure is
pinned to
physical memory using get_user_pages_fast().
All the user pages are added to the scatter-gather list and the logic
continues as before.
In copy_urb_data_to_user() the pages are marked with
set_page_dirty_lock() if the transfer
was a zerocopy one.
In free_async() the pinned pages are released, if the transfer was a
zerocopy one.
Questions/Notes:
- I'm quite unhappy with the added member async::is_user_mem. Is there a
better place
where I could store this information?
- I had a look at some other drivers using the get_user_pages ->
sg_set_page -> page_cache_release
technique and didn't see any special code to handle corner cases.
Are there corner cases? - Like usb controllers not supporting the
whole memory etc. ?
- In the kernel log I see messages like:
xhci_hcd 0000:00:14.0: WARN Event TRB for slot 4 ep 8 with no TDs queued?
after enabling zerocopy. Any idea where this might come from?
This patch should cleanly apply to the current master (3.16-rc2)
Kind regards
Stefan
Signed-off-by: Stefan Klug <[email protected]>
---
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 257876e..c97d1ec 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -4,6 +4,7 @@
* devio.c -- User space communication with USB devices.
*
* Copyright (C) 1999-2000 Thomas Sailer ([email protected])
+ * Copyright (C) 2014 Stefan Klug ([email protected])
*
* This program is free software; you can redistribute it and/or
modify
* it under the terms of the GNU General Public License as
published by
@@ -30,6 +31,7 @@
* 04.01.2000 0.2 Turned into its own filesystem
* 30.09.2005 0.3 Fix user-triggerable oops in async URB delivery
* (CAN-2005-3055)
+ * 07.05.2014 0.4 Added zerocopy support
*/
/*****************************************************************************/
@@ -52,6 +54,7 @@
#include <linux/uaccess.h>
#include <asm/byteorder.h>
#include <linux/moduleparam.h>
+#include <linux/pagemap.h>
#include "usb.h"
@@ -94,6 +97,9 @@ struct async {
u32 secid;
u8 bulk_addr;
u8 bulk_status;
+
+ //set to 1, if the buffer is pinned user-memory
+ u8 is_user_mem;
};
static bool usbfs_snoop;
@@ -118,6 +124,12 @@ module_param(usbfs_memory_mb, uint, 0644);
MODULE_PARM_DESC(usbfs_memory_mb,
"maximum MB allowed for usbfs buffers (0 = no limit)");
+
+static bool usbfs_disable_zerocopy = false;
+module_param(usbfs_disable_zerocopy, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usbfs_disable_zerocopy, "true to disable zerocopy and
fallback to the old implementation");
+
+
/* Hard limit, necessary to avoid arithmetic overflow */
#define USBFS_XFER_MAX (UINT_MAX / 2 - 1000000)
@@ -289,14 +301,23 @@ static struct async *alloc_async(unsigned int
numisoframes)
static void free_async(struct async *as)
{
int i;
+ struct page* p;
put_pid(as->pid);
if (as->cred)
put_cred(as->cred);
+
for (i = 0; i < as->urb->num_sgs; i++) {
- if (sg_page(&as->urb->sg[i]))
- kfree(sg_virt(&as->urb->sg[i]));
+ p = sg_page(&as->urb->sg[i]);
+ if (p) {
+ if(as->is_user_mem) {
+ page_cache_release(p);
+ } else {
+ kfree(sg_virt(&as->urb->sg[i]));
+ }
+ }
}
+
kfree(as->urb->sg);
kfree(as->urb->transfer_buffer);
kfree(as->urb->setup_packet);
@@ -419,9 +440,11 @@ static void snoop_urb_data(struct urb *urb,
unsigned len)
}
}
-static int copy_urb_data_to_user(u8 __user *userbuffer, struct urb *urb)
+static int copy_urb_data_to_user(u8 __user *userbuffer, struct async *as)
{
unsigned i, len, size;
+ struct urb *urb = as->urb;
+ struct page* page;
if (urb->number_of_packets > 0) /* Isochronous */
len = urb->transfer_buffer_length;
@@ -434,12 +457,20 @@ static int copy_urb_data_to_user(u8 __user
*userbuffer, struct urb *urb)
return 0;
}
- for (i = 0; i < urb->num_sgs && len; i++) {
- size = (len > USB_SG_SIZE) ? USB_SG_SIZE : len;
- if (copy_to_user(userbuffer, sg_virt(&urb->sg[i]), size))
- return -EFAULT;
- userbuffer += size;
- len -= size;
+ if(as->is_user_mem) {
+ for (i = 0; i < urb->num_sgs; i++) {
+ page = sg_page(&urb->sg[i]);
+ if (page)
+ set_page_dirty_lock(page);
+ }
+ } else {
+ for (i = 0; i < urb->num_sgs && len; i++) {
+ size = (len > USB_SG_SIZE) ? USB_SG_SIZE : len;
+ if (copy_to_user(userbuffer, sg_virt(&urb->sg[i]), size))
+ return -EFAULT;
+ userbuffer += size;
+ len -= size;
+ }
}
return 0;
@@ -1294,6 +1325,12 @@ static int proc_do_submiturb(struct usb_dev_state
*ps, struct usbdevfs_urb *uurb
unsigned int stream_id = 0;
void *buf;
+ int num_pages = 0;
+ void *buf_aligned = 0; //the page aligned version of urb->buffer
+ int buf_offset = 0; // the offset nbetween urb->buffer and
buf_alignment
+ int o;
+ struct page **pages = NULL;
+
if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
USBDEVFS_URB_SHORT_NOT_OK |
USBDEVFS_URB_BULK_CONTINUATION |
@@ -1369,9 +1406,21 @@ static int proc_do_submiturb(struct usb_dev_state
*ps, struct usbdevfs_urb *uurb
uurb->type = USBDEVFS_URB_TYPE_INTERRUPT;
goto interrupt_urb;
}
- num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE);
- if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize)
- num_sgs = 0;
+
+
+ buf_aligned = (char*)((unsigned long)uurb->buffer & PAGE_MASK);
+ buf_offset = uurb->buffer - buf_aligned;
+ if(uurb->buffer_length > 0)
+ num_pages = DIV_ROUND_UP(uurb->buffer_length + buf_offset,
PAGE_SIZE);
+
+ if(usbfs_disable_zerocopy || num_pages >
ps->dev->bus->sg_tablesize) {
+ num_pages = 0;
+
+ //fallback to the non-zerocopy path
+ num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE);
+ if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize)
+ num_sgs = 0;
+ }
if (ep->streams)
stream_id = uurb->stream_id;
break;
@@ -1435,8 +1484,14 @@ static int proc_do_submiturb(struct usb_dev_state
*ps, struct usbdevfs_urb *uurb
goto error;
}
- u += sizeof(struct async) + sizeof(struct urb) + uurb->buffer_length +
- num_sgs * sizeof(struct scatterlist);
+ u += sizeof(struct async) + sizeof(struct urb) +
+ num_sgs * sizeof(struct scatterlist) +
+ num_pages * sizeof(struct scatterlist);
+
+ //only add buffer_length if not doing zerocopy
+ if(!num_pages)
+ u += uurb->buffer_length;
+
ret = usbfs_increase_memory_usage(u);
if (ret)
goto error;
@@ -1471,6 +1526,57 @@ static int proc_do_submiturb(struct usb_dev_state
*ps, struct usbdevfs_urb *uurb
}
totlen -= u;
}
+ } else if(num_pages) {
+ pages = kmalloc(num_pages*sizeof(struct page*), GFP_KERNEL);
+ if(!pages) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ //create the scatterlist
+ as->urb->sg = kmalloc(num_pages * sizeof(struct scatterlist),
GFP_KERNEL);
+ if (!as->urb->sg) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = get_user_pages_fast((unsigned long)buf_aligned,
+ num_pages,
+ is_in,
+ pages);
+
+ if(ret < 0) {
+ printk("get_user_pages failed %i\n", ret);
+ goto error;
+ }
+
+ //did we get all pages?
+ if(ret < num_pages) {
+ printk("get_user_pages didn't deliver all pages %i\n", ret);
+ //free the pages and error out
+ for(i=0; i<ret; i++) {
+ page_cache_release(pages[i]);
+ }
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ as->is_user_mem = 1;
+ as->urb->num_sgs = num_pages;
+ sg_init_table(as->urb->sg, as->urb->num_sgs);
+
+ totlen = uurb->buffer_length + buf_offset;
+ o = buf_offset;
+ for (i = 0; i < as->urb->num_sgs; i++) {
+ u = (totlen > PAGE_SIZE) ? PAGE_SIZE : totlen;
+ u-= o;
+ sg_set_page(&as->urb->sg[i], pages[i], u, o);
+ totlen -= u + o;
+ o = 0;
+ }
+
+ kfree(pages);
+ pages = NULL;
} else if (uurb->buffer_length > 0) {
as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
GFP_KERNEL);
@@ -1602,6 +1708,7 @@ static int proc_do_submiturb(struct usb_dev_state
*ps, struct usbdevfs_urb *uurb
error:
kfree(isopkt);
kfree(dr);
+ kfree(pages);
if (as)
free_async(as);
return ret;
@@ -1650,7 +1757,7 @@ static int processcompl(struct async *as, void
__user * __user *arg)
unsigned int i;
if (as->userbuffer && urb->actual_length) {
- if (copy_urb_data_to_user(as->userbuffer, urb))
+ if (copy_urb_data_to_user(as->userbuffer, as))
goto err_out;
}
if (put_user(as->status, &userurb->status))
@@ -1818,7 +1925,7 @@ static int processcompl_compat(struct async *as,
void __user * __user *arg)
unsigned int i;
if (as->userbuffer && urb->actual_length) {
- if (copy_urb_data_to_user(as->userbuffer, urb))
+ if (copy_urb_data_to_user(as->userbuffer, as))
return -EFAULT;
}
if (put_user(as->status, &userurb->status))
---
Stefan Klug
Software Developer
Basler AG
An der Strusbek 60-62
22926 Ahrensburg
Germany
Tel. +49 4102 463 582
Fax +49 4102 463 46 582
[email protected]
http://www.baslerweb.com
Vorstand: Dr.-Ing. Dietmar Ley (Vorsitzender) · John P. Jennings · Arndt Bake · Hardy Mehl
Aufsichtsratsvorsitzender: Norbert Basler
Basler AG · Amtsgericht Lübeck HRB 4090 · Ust-IdNr.: DE 135 098 121 · Steuer-Nr.: 30 292 04497 · WEEE-Reg.-Nr. DE 83888045
On Wed, Jul 02, 2014 at 05:53:56PM +0200, Stefan Klug wrote:
> Hi everybody,
> in short: The attached patch adds zerocopy support to the usbfs driver.
> I tested it on x86 and on a globalscale mirabox ARM board. Until now it
> works
> quite nice and I'd love to get some comments and feedback on the patch.
Very nice.
Minor non-technical issues here, your patch is corrupted (linewrapped
and tabs converted to spaces) and can't be applied at all. Can you fix
your email client and try again? Look at
Documentation/email_clients.txt for hints on how to do this.
> --- a/drivers/usb/core/devio.c
> +++ b/drivers/usb/core/devio.c
> @@ -4,6 +4,7 @@
> * devio.c -- User space communication with USB devices.
> *
> * Copyright (C) 1999-2000 Thomas Sailer ([email protected])
> + * Copyright (C) 2014 Stefan Klug ([email protected])
According to legal advice given to me, this isn't needed / true, so can
you drop it? Or, if you have different legal advice, might I ask for
what the legal justification is that this needs to be added?
> * This program is free software; you can redistribute it and/or
> modify
> * it under the terms of the GNU General Public License as published
> by
> @@ -30,6 +31,7 @@
> * 04.01.2000 0.2 Turned into its own filesystem
> * 30.09.2005 0.3 Fix user-triggerable oops in async URB delivery
> * (CAN-2005-3055)
> + * 07.05.2014 0.4 Added zerocopy support
No need to do this, we use git now and that's where the changelog goes.
We should just delete this "fake changelog" entirely from this file.
thanks,
greg k-h
On Wed, 2 Jul 2014, Stefan Klug wrote:
> Hi everybody,
> in short: The attached patch adds zerocopy support to the usbfs driver.
> I tested it on x86 and on a globalscale mirabox ARM board. Until now it
> works
> quite nice and I'd love to get some comments and feedback on the patch.
>
> Longer version: The usbfs driver does not support direct data transfers
> from the controller
> to user-memory. For several use-cases (in my case USB3 Vision cameras
> pushing up to 360MB/s)
> the copy operation from kernel to user-space results in a lot of
> unnecessary CPU overhead
> especially on small embedded devices. I measured a decrease in CPU usage
> from 44% to 22% on
> a the mirabox.
>
> This patch implements zerocopy beside the existing code paths, so it is
> easy to disable
> zerocopy at runtime and to directly compare the CPU usage.
> To disable zerocopy just do an
> echo 1 > /sys/module/usbcore/parameters/usbfs_disable_zerocopy
> and to reenable it
> echo 0 > /sys/module/usbcore/parameters/usbfs_disable_zerocopy
>
> On modern desktop systems the change in CPU usage is quite low so this
> mostly affects
> smaller embedded devices.
>
> Implementation details:
> The patch only touches drivers/usb/core/devio.c.
> In procy_do_submiturb(), it is checked if zerocopy is allowed. This is
> currently a rough
> check which compares the number of required pages to
> ps->dev->bus->sg_tablesize.
> I don't know if there is more to check there.
> Then the user memory provided inside the usbdevfs_urb structure is
> pinned to
> physical memory using get_user_pages_fast().
> All the user pages are added to the scatter-gather list and the logic
> continues as before.
> In copy_urb_data_to_user() the pages are marked with
> set_page_dirty_lock() if the transfer
> was a zerocopy one.
> In free_async() the pinned pages are released, if the transfer was a
> zerocopy one.
Overall this implementation seems quite straightforward. However, it
appears that your email client has mangled the whitespace in the patch.
The patch contains many style violations; you should run it through
checkpatch.pl. It also has one or two mistakes, such as the
calculation of u for the amount of memory used.
> Questions/Notes:
> - I'm quite unhappy with the added member async::is_user_mem. Is there a
> better place
> where I could store this information?
No, async is the right place. Why are you unhappy about it?
> - I had a look at some other drivers using the get_user_pages ->
> sg_set_page -> page_cache_release
> technique and didn't see any special code to handle corner cases.
> Are there corner cases? - Like usb controllers not supporting the
> whole memory etc. ?
Indeed yes. The page addresses have to be checked against the DMA
mask. Also, many host controllers cannot handle arbitrary alignment.
It would be best to require that the buffer start at a page boundary.
> - In the kernel log I see messages like:
> xhci_hcd 0000:00:14.0: WARN Event TRB for slot 4 ep 8 with no TDs queued?
> after enabling zerocopy. Any idea where this might come from?
Not directly related; that's a bug in xhci-hcd.
Using a global module parameter to control the use of zerocopy (for
anything other than debugging or testing) is a bad idea. If you think
people will have a reason for avoiding zerocopy then you should make it
possible to decide for each URB, by adding a new flag to struct
usbdevfs_urb.
People will want to use zerocopy with isochronous transfers as well as
bulk. Implementing that isn't going to be quite so easy; it will be
necessary for the user to set up a memory mapping. In fact, once
that's done the same mechanism could be used for bulk transfers too.
Alan Stern
Thank you very much for working on this, Stefan.
Alan Stern wrote:
> Also, many host controllers cannot handle arbitrary alignment.
> It would be best to require that the buffer start at a page boundary.
This requires a bit of negotiation with userspace, maybe per-URB but
it seems better to negotiate per-claim or even per-open. What about
large control transfers?
> Using a global module parameter to control the use of zerocopy (for
> anything other than debugging or testing) is a bad idea.
I agree.
> If you think people will have a reason for avoiding zerocopy then
> you should make it possible to decide for each URB, by adding a new
> flag to struct usbdevfs_urb.
People might want to use zerocopy always, but have buffers in
userspace which make that impossible with the given hardware.
It's important that the kernel gives userspace enough information
about the constraints, if userspace wants zerocopy.
> People will want to use zerocopy with isochronous transfers as well as
> bulk. Implementing that isn't going to be quite so easy; it will be
> necessary for the user to set up a memory mapping. In fact, once
> that's done the same mechanism could be used for bulk transfers too.
Indeed I think userspace wants to be involved in choosing memory also
with bulk, in order to ensure that zerocopy will always work when
userspace cares about that.
Is it enough to expose the DMA mask of the host controller?
//Peter
On Wed, 2014-07-02 at 17:53 +0200, Stefan Klug wrote:
> Implementation details:
> The patch only touches drivers/usb/core/devio.c.
> In procy_do_submiturb(), it is checked if zerocopy is allowed. This is
> currently a rough
> check which compares the number of required pages to
> ps->dev->bus->sg_tablesize.
It seems to me that the check is per call, so using
multiple calls one could still pin unlimited amounts
of memory.
> I don't know if there is more to check there.
> Then the user memory provided inside the usbdevfs_urb structure is
> pinned to
> physical memory using get_user_pages_fast().
> All the user pages are added to the scatter-gather list and the logic
> continues as before.
How do you enforce the cache coherency rules?
Also you don't have a fall back if get_user_pages_fast()
returns less than requested. It seems to me that than you
ought to fall back buffered IO.
Regards
Oliver
On Wed, 2 Jul 2014, Peter Stuge wrote:
> Thank you very much for working on this, Stefan.
>
> Alan Stern wrote:
> > Also, many host controllers cannot handle arbitrary alignment.
> > It would be best to require that the buffer start at a page boundary.
>
> This requires a bit of negotiation with userspace, maybe per-URB but
I don't follow. What negotiation is needed? All that needs to happen
is the user program submits a transfer where the buffer is aligned on a
page boundary.
> it seems better to negotiate per-claim or even per-open. What about
> large control transfers?
The kernel doesn't support scatter-gather for control transfers, only
bulk.
> > If you think people will have a reason for avoiding zerocopy then
> > you should make it possible to decide for each URB, by adding a new
> > flag to struct usbdevfs_urb.
>
> People might want to use zerocopy always, but have buffers in
> userspace which make that impossible with the given hardware.
>
> It's important that the kernel gives userspace enough information
> about the constraints, if userspace wants zerocopy.
I don't know of any way for the kernel to give userspace any
information about constraints of this sort. Do you? For example, the
man page for open(2) says: "The O_DIRECT flag may impose alignment
restrictions on the length and address of user-space buffers and the
file offset of I/Os", but it doesn't say how to find out what those
restrictions are.
> > People will want to use zerocopy with isochronous transfers as well as
> > bulk. Implementing that isn't going to be quite so easy; it will be
> > necessary for the user to set up a memory mapping. In fact, once
> > that's done the same mechanism could be used for bulk transfers too.
>
> Indeed I think userspace wants to be involved in choosing memory also
> with bulk, in order to ensure that zerocopy will always work when
> userspace cares about that.
>
> Is it enough to expose the DMA mask of the host controller?
It doesn't need to be exposed, since the mmap(2) call would be handled
by the kernel's USB stack (and besides, the user program can't request
that the mapped memory be located in any particular physical address
region).
Furthermore, the DMA mask already is exposed in sysfs. For example,
the DMA mask for the host controller on bus 2 is given in
/sys/bus/usb/devices/usb2/../dma_mask_bits.
Alan Stern
On Wed, 2 Jul 2014, Oliver Neukum wrote:
> On Wed, 2014-07-02 at 17:53 +0200, Stefan Klug wrote:
>
> > Implementation details:
> > The patch only touches drivers/usb/core/devio.c.
> > In procy_do_submiturb(), it is checked if zerocopy is allowed. This is
> > currently a rough
> > check which compares the number of required pages to
> > ps->dev->bus->sg_tablesize.
>
> It seems to me that the check is per call, so using
> multiple calls one could still pin unlimited amounts
> of memory.
usbfs keeps track of the total amount of pinned memory and enforces an
overall limit. It will be necessary to add the size of the transfer
buffer to that total.
> > I don't know if there is more to check there.
> > Then the user memory provided inside the usbdevfs_urb structure is
> > pinned to
> > physical memory using get_user_pages_fast().
> > All the user pages are added to the scatter-gather list and the logic
> > continues as before.
>
> How do you enforce the cache coherency rules?
There is no way to do this. If the user program accesses memory when
it shouldn't, the transfer might not work right.
> Also you don't have a fall back if get_user_pages_fast()
> returns less than requested. It seems to me that than you
> ought to fall back buffered IO.
Agreed.
Alan Stern
Alan Stern wrote:
> > > Also, many host controllers cannot handle arbitrary alignment.
> > > It would be best to require that the buffer start at a page boundary.
> >
> > This requires a bit of negotiation with userspace, maybe per-URB but
>
> I don't follow. What negotiation is needed? All that needs to happen
> is the user program submits a transfer where the buffer is aligned on a
> page boundary.
The negotiation needed would be for userspace to learn what alignment
is required, so that it can make sure to provide only such buffers.
But see below on mmap..
> > it seems better to negotiate per-claim or even per-open. What about
> > large control transfers?
>
> The kernel doesn't support scatter-gather for control transfers, only
> bulk.
That could possibly change, right, and then it would be nice to have
zerocopy for free there as well?
> > It's important that the kernel gives userspace enough information
> > about the constraints, if userspace wants zerocopy.
>
> I don't know of any way for the kernel to give userspace any
> information about constraints of this sort. Do you?
I don't know of any at the moment, no. It might be done through an
ioctl into usbfs, but if sysfs already has all neccessary information
then no ioctl is needed. Anyway...
> > Indeed I think userspace wants to be involved in choosing memory also
> > with bulk, in order to ensure that zerocopy will always work when
> > userspace cares about that.
> >
> > Is it enough to expose the DMA mask of the host controller?
>
> It doesn't need to be exposed, since the mmap(2) call would be handled
> by the kernel's USB stack (and besides, the user program can't request
> that the mapped memory be located in any particular physical address
> region).
Since alignment isn't the only issue I don't think there's a way to
avoid it. I was just hoping to be able to avoid allocating zerocopy
buffers with mmap().
> Furthermore, the DMA mask already is exposed in sysfs.
> For example, the DMA mask for the host controller on bus 2 is
> given in /sys/bus/usb/devices/usb2/../dma_mask_bits.
I realize that this doesn't help much, since userspace can't get the
physical address for its virtual addresses anyway.
//Peter
On Wed, 2 Jul 2014, Peter Stuge wrote:
> > The kernel doesn't support scatter-gather for control transfers, only
> > bulk.
>
> That could possibly change, right, and then it would be nice to have
> zerocopy for free there as well?
No. ohci-hcd doesn't support control transfers larger than 4 KB
anyway, and ehci-hcd doesn't support control transfers larger than 16
KB. This is not likely to change. Besides, a control transfer can
never be larger than 64 KB, and throughput isn't an issue for them.
> > > Indeed I think userspace wants to be involved in choosing memory also
> > > with bulk, in order to ensure that zerocopy will always work when
> > > userspace cares about that.
> > >
> > > Is it enough to expose the DMA mask of the host controller?
> >
> > It doesn't need to be exposed, since the mmap(2) call would be handled
> > by the kernel's USB stack (and besides, the user program can't request
> > that the mapped memory be located in any particular physical address
> > region).
>
> Since alignment isn't the only issue I don't think there's a way to
> avoid it. I was just hoping to be able to avoid allocating zerocopy
> buffers with mmap().
How else can you guarantee that a buffer is located in the first 4 GB
of memory? Even on a 32-bit system (if the computer has more than 4 GB
of RAM)?
Alan Stern
On 02.07.2014 19:55, Greg KH wrote:
> Very nice.
Thanks.
> Minor non-technical issues here, your patch is corrupted (linewrapped
> and tabs converted to spaces) and can't be applied at all. Can you fix
> your email client and try again? Look at
> Documentation/email_clients.txt for hints on how to do this.
Hm, I thought I checked that one. Sorry. I'll test and resend correctly.
>> --- a/drivers/usb/core/devio.c
>> +++ b/drivers/usb/core/devio.c
>> @@ -4,6 +4,7 @@
>> * devio.c -- User space communication with USB devices.
>> *
>> * Copyright (C) 1999-2000 Thomas Sailer ([email protected])
>> + * Copyright (C) 2014 Stefan Klug ([email protected])
> According to legal advice given to me, this isn't needed / true, so can
> you drop it? Or, if you have different legal advice, might I ask for
> what the legal justification is that this needs to be added?
No problem. I'll remove them.
>
>> * This program is free software; you can redistribute it and/or
>> modify
>> * it under the terms of the GNU General Public License as published
>> by
>> @@ -30,6 +31,7 @@
>> * 04.01.2000 0.2 Turned into its own filesystem
>> * 30.09.2005 0.3 Fix user-triggerable oops in async URB delivery
>> * (CAN-2005-3055)
>> + * 07.05.2014 0.4 Added zerocopy support
> No need to do this, we use git now and that's where the changelog goes.
> We should just delete this "fake changelog" entirely from this file.
I'll remove the line and maybe prepare a cleanup patch later.
Stefan Klug
Software Developer
Basler AG
An der Strusbek 60-62
22926 Ahrensburg
Germany
Tel. +49 4102 463 582
Fax +49 4102 463 46 582
[email protected]
http://www.baslerweb.com
Vorstand: Dr.-Ing. Dietmar Ley (Vorsitzender) · John P. Jennings · Arndt Bake · Hardy Mehl
Aufsichtsratsvorsitzender: Norbert Basler
Basler AG · Amtsgericht Lübeck HRB 4090 · Ust-IdNr.: DE 135 098 121 · Steuer-Nr.: 30 292 04497 · WEEE-Reg.-Nr. DE 83888045
On 02.07.2014 20:24, Alan Stern wrote:
> Overall this implementation seems quite straightforward. However, it
> appears that your email client has mangled the whitespace in the patch.
>
> The patch contains many style violations; you should run it through
> checkpatch.pl. It also has one or two mistakes, such as the
> calculation of u for the amount of memory used.
I'll fix these...
>> Questions/Notes:
>> - I'm quite unhappy with the added member async::is_user_mem. Is there a
>> better place
>> where I could store this information?
> No, async is the right place. Why are you unhappy about it?
A whole byte for this flag felt a bit like an overkill, but I'm fine
with that.
>> - I had a look at some other drivers using the get_user_pages ->
>> sg_set_page -> page_cache_release
>> technique and didn't see any special code to handle corner cases.
>> Are there corner cases? - Like usb controllers not supporting the
>> whole memory etc. ?
> Indeed yes. The page addresses have to be checked against the DMA
> mask. Also, many host controllers cannot handle arbitrary alignment.
> It would be best to require that the buffer start at a page boundary.
Is there any way to check if the host controller supports arbitrary
alignment?
If I read the xhci spec correctly arbitrary alignment is explicitly
permitted. In my use case
the user allocates large amounts of memory, which is passed down to
submiturb in smaller chunks.
So asking the kernel for aligned memory for every chunk would force me
to recombine to urbs on the user side,
adding another copy operation. So I would vote for a solution where the
user can allocate the memory and pass it down
without restrictions.
The system should fallback to copying in kernel space, if the buffer is
not supported by the
host-controller (but this is only the case for USB 2 and 1, right?).
>
>> - In the kernel log I see messages like:
>> xhci_hcd 0000:00:14.0: WARN Event TRB for slot 4 ep 8 with no TDs queued?
>> after enabling zerocopy. Any idea where this might come from?
> Not directly related; that's a bug in xhci-hcd.
Whew. That saves me a lot of trouble :-)
>
> Using a global module parameter to control the use of zerocopy (for
> anything other than debugging or testing) is a bad idea. If you think
> people will have a reason for avoiding zerocopy then you should make it
> possible to decide for each URB, by adding a new flag to struct
> usbdevfs_urb.
I'd like to leave the parameter in for debugging purposes mostly, at
least as long as this is not stable.
And it is a security measure. If anything goes wrong with zerocopy (are
there buggy host-controllers out there?),
the user is able to disable it on a system wide basis.
>
> People will want to use zerocopy with isochronous transfers as well as
> bulk. Implementing that isn't going to be quite so easy; it will be
> necessary for the user to set up a memory mapping. In fact, once
> that's done the same mechanism could be used for bulk transfers too.
>
Yes you are right. I'll look into that. Is this a either both or nothing
decision?
As I don't use isochronous transfers at all, it will be quite difficult
for me to correctly test that.
Thanks a lot for your input.
Stefan
--
Stefan Klug
Software Developer
Basler AG
An der Strusbek 60-62
22926 Ahrensburg
Germany
Tel. +49 4102 463 582
Fax +49 4102 463 46 582
[email protected]
http://www.baslerweb.com
Vorstand: Dr.-Ing. Dietmar Ley (Vorsitzender) · John P. Jennings · Arndt Bake · Hardy Mehl
Aufsichtsratsvorsitzender: Norbert Basler
Basler AG · Amtsgericht Lübeck HRB 4090 · Ust-IdNr.: DE 135 098 121 · Steuer-Nr.: 30 292 04497 · WEEE-Reg.-Nr. DE 83888045
On 02.07.2014 21:38, Alan Stern wrote:
> On Wed, 2 Jul 2014, Oliver Neukum wrote:
>
>>
Stefan Klug
Software Developer
Basler AG
An der Strusbek 60-62
22926 Ahrensburg
Germany
Tel. +49 4102 463 582
Fax +49 4102 463 46 582
[email protected]
http://www.baslerweb.com
Vorstand: Dr.-Ing. Dietmar Ley (Vorsitzender) · John P. Jennings · Arndt Bake · Hardy Mehl
Aufsichtsratsvorsitzender: Norbert Basler
Basler AG · Amtsgericht Lübeck HRB 4090 · Ust-IdNr.: DE 135 098 121 · Steuer-Nr.: 30 292 04497 · WEEE-Reg.-Nr. DE 83888045
On Wed, 2014-07-02 at 17:53 +0200, Stefan Klug wrote:
>>
>>> Implementation details:
>>> The patch only touches drivers/usb/core/devio.c.
>>> In procy_do_submiturb(), it is checked if zerocopy is allowed. This is
>>> currently a rough
>>> check which compares the number of required pages to
>>> ps->dev->bus->sg_tablesize.
>> It seems to me that the check is per call, so using
>> multiple calls one could still pin unlimited amounts
>> of memory.
> usbfs keeps track of the total amount of pinned memory and enforces an
> overall limit. It will be necessary to add the size of the transfer
> buffer to that total.
Leaving the zerocopy transfers out of this limit was intentional. I
thought this is user-memory so we shouldn't add it to the overall limit
as it is not allocated by usbfs.
But I didn't think of the pinning problem. So yes, I can add it to the
overall limit.
>>> I don't know if there is more to check there.
>>> Then the user memory provided inside the usbdevfs_urb structure is
>>> pinned to
>>> physical memory using get_user_pages_fast().
>>> All the user pages are added to the scatter-gather list and the logic
>>> continues as before.
>> How do you enforce the cache coherency rules?
> There is no way to do this. If the user program accesses memory when
> it shouldn't, the transfer might not work right.
So this one is fine, right?
>> Also you don't have a fall back if get_user_pages_fast()
>> returns less than requested. It seems to me that than you
>> ought to fall back buffered IO.
> Agreed.
Good point. I'll add the fallback to the next iteration.
From: Stefan Klug
...
> Is there any way to check if the host controller supports arbitrary
> alignment?
> If I read the xhci spec correctly arbitrary alignment is explicitly
> permitted.
Not entirely.
The xhci spec has a few limits on the alignment of transfer buffer.
They seem to be designed to make life difficult for the kernel!
1) Transfer buffers cannot be longer than 64k.
2) Transfer buffers cannot cross 64k address boundaries.
3) The end of a ring segment must occur on a USB packet boundary.
The current xhci driver doesn't implement check 3 - which causes
certain devices to fail (notable the ax88179_178a usb3 ethernet).
David
????{.n?+???????+%?????ݶ??w??{.n?+????{??G?????{ay?ʇڙ?,j??f???h?????????z_??(?階?ݢj"???m??????G????????????&???~???iO???z??v?^?m????????????I?
On Thu, 3 Jul 2014, Stefan Klug wrote:
> >> Questions/Notes:
> >> - I'm quite unhappy with the added member async::is_user_mem. Is there a
> >> better place
> >> where I could store this information?
> > No, async is the right place. Why are you unhappy about it?
> A whole byte for this flag felt a bit like an overkill, but I'm fine
> with that.
It's not a big deal. Besides, the structure has to be padded to at
least 4-byte alignment anyway.
> >> - I had a look at some other drivers using the get_user_pages ->
> >> sg_set_page -> page_cache_release
> >> technique and didn't see any special code to handle corner cases.
> >> Are there corner cases? - Like usb controllers not supporting the
> >> whole memory etc. ?
> > Indeed yes. The page addresses have to be checked against the DMA
> > mask. Also, many host controllers cannot handle arbitrary alignment.
> > It would be best to require that the buffer start at a page boundary.
>
> Is there any way to check if the host controller supports arbitrary
> alignment?
No. Besides, if there were and you made use of it then users would
find that zerocopy worked correctly on some computers and did not work
(i.e., fell back to copying) mysteriously on others. Not a good
situation.
> If I read the xhci spec correctly arbitrary alignment is explicitly
> permitted.
Not all host controllers are xHCI.
> In my use case
> the user allocates large amounts of memory, which is passed down to
> submiturb in smaller chunks.
The user could allocate a little more memory than needed, so that the
start of each transfer buffer could be rounded up to a page boundary
within the memory region.
> So asking the kernel for aligned memory for every chunk would force me
> to recombine to urbs on the user side,
> adding another copy operation.
Not necessarily. See above.
> So I would vote for a solution where the
> user can allocate the memory and pass it down
> without restrictions.
> The system should fallback to copying in kernel space, if the buffer is
> not supported by the
> host-controller (but this is only the case for USB 2 and 1, right?).
Well, it's the case for non-xHCI. Your choice of words is ambiguous,
because xHCI supports USB-1 and USB-2 as well as USB-3.
> > Using a global module parameter to control the use of zerocopy (for
> > anything other than debugging or testing) is a bad idea. If you think
> > people will have a reason for avoiding zerocopy then you should make it
> > possible to decide for each URB, by adding a new flag to struct
> > usbdevfs_urb.
>
> I'd like to leave the parameter in for debugging purposes mostly, at
> least as long as this is not stable.
Then add a comment and an explanation in the patch description that the
module parameter is meant for debugging.
> And it is a security measure. If anything goes wrong with zerocopy (are
> there buggy host-controllers out there?),
> the user is able to disable it on a system wide basis.
Yes, there are buggy host controllers. But I don't see how disabling
zerocopy would improve security. Buggy hardware can mess things up
even if zerocopy isn't used.
> > People will want to use zerocopy with isochronous transfers as well as
> > bulk. Implementing that isn't going to be quite so easy; it will be
> > necessary for the user to set up a memory mapping. In fact, once
> > that's done the same mechanism could be used for bulk transfers too.
> >
> Yes you are right. I'll look into that. Is this a either both or nothing
> decision?
It doesn't have to be. If both mechanisms are present, users could get
the benefit of zerocopy without the need for setting up a memory
mapping.
Some patches along this line were posted last fall by Markus
Rechberger. See
http://marc.info/?l=linux-usb&m=138046339714340&w=2
and the following messages in that thread.
> As I don't use isochronous transfers at all, it will be quite difficult
> for me to correctly test that.
That is a problem. By far the most common uses for isochronous are
audio and video, and customizing user programs for those applications
isn't easy.
Alan Stern
On Wed, 2014-07-02 at 17:53 +0200, Stefan Klug wrote:
> @@ -1471,6 +1526,57 @@ static int proc_do_submiturb(struct
> usb_dev_state
> *ps, struct usbdevfs_urb *uurb
> }
> totlen -= u;
> }
> + } else if(num_pages) {
> + pages = kmalloc(num_pages*sizeof(struct page*), GFP_KERNEL);
> + if(!pages) {
> + ret = -ENOMEM;
> + goto error;
> + }
> +
> + //create the scatterlist
> + as->urb->sg = kmalloc(num_pages * sizeof(struct
> scatterlist),
> GFP_KERNEL);
> + if (!as->urb->sg) {
> + ret = -ENOMEM;
> + goto error;
> + }
> +
> + ret = get_user_pages_fast((unsigned long)buf_aligned,
> + num_pages,
> + is_in,
> + pages);
> +
> + if(ret < 0) {
> + printk("get_user_pages failed %i\n", ret);
> + goto error;
> + }
> +
> + //did we get all pages?
> + if(ret < num_pages) {
> + printk("get_user_pages didn't deliver all pages %i\n",
> ret);
> + //free the pages and error out
> + for(i=0; i<ret; i++) {
> + page_cache_release(pages[i]);
> + }
> + ret = -ENOMEM;
> + goto error;
> + }
> +
> + as->is_user_mem = 1;
> + as->urb->num_sgs = num_pages;
> + sg_init_table(as->urb->sg, as->urb->num_sgs);
> +
> + totlen = uurb->buffer_length + buf_offset;
> + o = buf_offset;
> + for (i = 0; i < as->urb->num_sgs; i++) {
> + u = (totlen > PAGE_SIZE) ? PAGE_SIZE : totlen;
> + u-= o;
> + sg_set_page(&as->urb->sg[i], pages[i], u, o);
> + totlen -= u + o;
> + o = 0;
> + }
> +
> + kfree(pages);
> + pages = NULL;
> } else if (uurb->buffer_length > 0) {
> as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
> GFP_KERNEL)
One more thing: Where do you check that the memory the user has passed
a pointer to is actually writable? It seems to me that for zerocopy you
must do the check before you submit the URB to the HCD.
Regards
Oliver