When IO page fault happens, DMA performance will be affected. Pin user page
can avoid IO page fault, this patch introduces a new char device named
/dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
pages under one file will be unpinned in file release process.
Signed-off-by: Zhou Wang <[email protected]>
Signed-off-by: Sihang Chen <[email protected]>
Suggested-by: Barry Song <[email protected]>
---
drivers/misc/uacce/uacce.c | 172 +++++++++++++++++++++++++++++++++++++++-
include/uapi/misc/uacce/uacce.h | 16 ++++
2 files changed, 187 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index d07af4e..b8ac408 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -2,6 +2,7 @@
#include <linux/compat.h>
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
+#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/poll.h>
#include <linux/slab.h>
@@ -12,6 +13,16 @@ static dev_t uacce_devt;
static DEFINE_MUTEX(uacce_mutex);
static DEFINE_XARRAY_ALLOC(uacce_xa);
+struct uacce_pin_container {
+ struct xarray array;
+};
+
+struct pin_pages {
+ unsigned long first;
+ unsigned long nr_pages;
+ struct page **pages;
+};
+
static int uacce_start_queue(struct uacce_queue *q)
{
int ret = 0;
@@ -497,6 +508,152 @@ void uacce_remove(struct uacce_device *uacce)
}
EXPORT_SYMBOL_GPL(uacce_remove);
+int uacce_ctrl_open(struct inode *inode, struct file *file)
+{
+ struct uacce_pin_container *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ file->private_data = p;
+
+ xa_init(&p->array);
+
+ return 0;
+}
+
+int uacce_ctrl_release(struct inode *inode, struct file *file)
+{
+ struct uacce_pin_container *priv = file->private_data;
+ struct pin_pages *p;
+ unsigned long idx;
+
+ xa_for_each(&priv->array, idx, p) {
+ unpin_user_pages(p->pages, p->nr_pages);
+ xa_erase(&priv->array, p->first);
+ vfree(p->pages);
+ kfree(p);
+ }
+
+ xa_destroy(&priv->array);
+ kfree(priv);
+
+ return 0;
+}
+
+static int uacce_pin_page(struct uacce_pin_container *priv,
+ struct uacce_pin_address *addr)
+{
+ unsigned int flags = FOLL_FORCE | FOLL_WRITE;
+ unsigned long first, last, nr_pages;
+ struct page **pages;
+ struct pin_pages *p;
+ int ret;
+
+ first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
+ last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+ nr_pages = last - first + 1;
+
+ pages = vmalloc(nr_pages * sizeof(struct page *));
+ if (!pages)
+ return -ENOMEM;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p) {
+ ret = -ENOMEM;
+ goto free;
+ }
+
+ ret = pin_user_pages_fast(addr->addr & PAGE_MASK, nr_pages,
+ flags | FOLL_LONGTERM, pages);
+ if (ret != nr_pages) {
+ pr_err("uacce: Failed to pin page\n");
+ goto free_p;
+ }
+ p->first = first;
+ p->nr_pages = nr_pages;
+ p->pages = pages;
+
+ ret = xa_err(xa_store(&priv->array, p->first, p, GFP_KERNEL));
+ if (ret)
+ goto unpin_pages;
+
+ return 0;
+
+unpin_pages:
+ unpin_user_pages(pages, nr_pages);
+free_p:
+ kfree(p);
+free:
+ vfree(pages);
+ return ret;
+}
+
+static int uacce_unpin_page(struct uacce_pin_container *priv,
+ struct uacce_pin_address *addr)
+{
+ unsigned long first, last, nr_pages;
+ struct pin_pages *p;
+
+ first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
+ last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+ nr_pages = last - first + 1;
+
+ /* find pin_pages */
+ p = xa_load(&priv->array, first);
+ if (!p)
+ return -ENODEV;
+
+ if (p->nr_pages != nr_pages)
+ return -EINVAL;
+
+ /* unpin */
+ unpin_user_pages(p->pages, p->nr_pages);
+
+ /* release resource */
+ xa_erase(&priv->array, first);
+ vfree(p->pages);
+ kfree(p);
+
+ return 0;
+}
+
+static long uacce_ctrl_unl_ioctl(struct file *filep, unsigned int cmd,
+ unsigned long arg)
+{
+ struct uacce_pin_container *p = filep->private_data;
+ struct uacce_pin_address addr;
+ int ret;
+
+ if (copy_from_user(&addr, (void __user *)arg,
+ sizeof(struct uacce_pin_address)))
+ return -EFAULT;
+
+ switch (cmd) {
+ case UACCE_CMD_PIN:
+ return uacce_pin_page(p, &addr);
+
+ case UACCE_CMD_UNPIN:
+ return uacce_unpin_page(p, &addr);
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct file_operations uacce_ctrl_fops = {
+ .owner = THIS_MODULE,
+ .open = uacce_ctrl_open,
+ .release = uacce_ctrl_release,
+ .unlocked_ioctl = uacce_ctrl_unl_ioctl,
+};
+
+static struct miscdevice uacce_ctrl_miscdev = {
+ .name = "uacce_ctrl",
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &uacce_ctrl_fops,
+};
+
static int __init uacce_init(void)
{
int ret;
@@ -507,13 +664,26 @@ static int __init uacce_init(void)
ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
if (ret)
- class_destroy(uacce_class);
+ goto destroy_class;
+
+ ret = misc_register(&uacce_ctrl_miscdev);
+ if (ret) {
+ pr_err("uacce: ctrl dev registration failed\n");
+ goto unregister_cdev;
+ }
+ return 0;
+
+unregister_cdev:
+ unregister_chrdev_region(uacce_devt, MINORMASK);
+destroy_class:
+ class_destroy(uacce_class);
return ret;
}
static __exit void uacce_exit(void)
{
+ misc_deregister(&uacce_ctrl_miscdev);
unregister_chrdev_region(uacce_devt, MINORMASK);
class_destroy(uacce_class);
}
diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h
index cc71856..f9e326f 100644
--- a/include/uapi/misc/uacce/uacce.h
+++ b/include/uapi/misc/uacce/uacce.h
@@ -35,4 +35,20 @@ enum uacce_qfrt {
UACCE_QFRT_DUS = 1,
};
+/**
+ * struct uacce_pin_address - Expected pin user space address and size
+ * @addr: Address to pin
+ * @size: Size of pin address
+ */
+struct uacce_pin_address {
+ unsigned long addr;
+ unsigned long size;
+};
+
+/* UACCE_CMD_PIN: Pin a range of memory */
+#define UACCE_CMD_PIN _IOW('W', 2, struct uacce_pin_address)
+
+/* UACCE_CMD_UNPIN: Unpin a range of memory */
+#define UACCE_CMD_UNPIN _IOW('W', 3, struct uacce_pin_address)
+
#endif
--
2.8.1
On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> When IO page fault happens, DMA performance will be affected. Pin user page
> can avoid IO page fault, this patch introduces a new char device named
> /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> pages under one file will be unpinned in file release process.
>
> Signed-off-by: Zhou Wang <[email protected]>
> Signed-off-by: Sihang Chen <[email protected]>
> Suggested-by: Barry Song <[email protected]>
> ---
> drivers/misc/uacce/uacce.c | 172 +++++++++++++++++++++++++++++++++++++++-
> include/uapi/misc/uacce/uacce.h | 16 ++++
> 2 files changed, 187 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
> index d07af4e..b8ac408 100644
> --- a/drivers/misc/uacce/uacce.c
> +++ b/drivers/misc/uacce/uacce.c
> @@ -2,6 +2,7 @@
> #include <linux/compat.h>
> #include <linux/dma-mapping.h>
> #include <linux/iommu.h>
> +#include <linux/miscdevice.h>
> #include <linux/module.h>
> #include <linux/poll.h>
> #include <linux/slab.h>
> @@ -12,6 +13,16 @@ static dev_t uacce_devt;
> static DEFINE_MUTEX(uacce_mutex);
> static DEFINE_XARRAY_ALLOC(uacce_xa);
>
> +struct uacce_pin_container {
> + struct xarray array;
> +};
> +
> +struct pin_pages {
> + unsigned long first;
> + unsigned long nr_pages;
> + struct page **pages;
> +};
> +
> static int uacce_start_queue(struct uacce_queue *q)
> {
> int ret = 0;
> @@ -497,6 +508,152 @@ void uacce_remove(struct uacce_device *uacce)
> }
> EXPORT_SYMBOL_GPL(uacce_remove);
>
> +int uacce_ctrl_open(struct inode *inode, struct file *file)
> +{
> + struct uacce_pin_container *p;
> +
> + p = kzalloc(sizeof(*p), GFP_KERNEL);
> + if (!p)
> + return -ENOMEM;
> + file->private_data = p;
> +
> + xa_init(&p->array);
> +
> + return 0;
> +}
> +
> +int uacce_ctrl_release(struct inode *inode, struct file *file)
> +{
> + struct uacce_pin_container *priv = file->private_data;
> + struct pin_pages *p;
> + unsigned long idx;
> +
> + xa_for_each(&priv->array, idx, p) {
> + unpin_user_pages(p->pages, p->nr_pages);
> + xa_erase(&priv->array, p->first);
> + vfree(p->pages);
> + kfree(p);
> + }
> +
> + xa_destroy(&priv->array);
> + kfree(priv);
> +
> + return 0;
> +}
> +
> +static int uacce_pin_page(struct uacce_pin_container *priv,
> + struct uacce_pin_address *addr)
> +{
> + unsigned int flags = FOLL_FORCE | FOLL_WRITE;
> + unsigned long first, last, nr_pages;
> + struct page **pages;
> + struct pin_pages *p;
> + int ret;
> +
> + first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
> + last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> + nr_pages = last - first + 1;
> +
> + pages = vmalloc(nr_pages * sizeof(struct page *));
> + if (!pages)
> + return -ENOMEM;
> +
> + p = kzalloc(sizeof(*p), GFP_KERNEL);
> + if (!p) {
> + ret = -ENOMEM;
> + goto free;
> + }
> +
> + ret = pin_user_pages_fast(addr->addr & PAGE_MASK, nr_pages,
> + flags | FOLL_LONGTERM, pages);
> + if (ret != nr_pages) {
> + pr_err("uacce: Failed to pin page\n");
> + goto free_p;
> + }
> + p->first = first;
> + p->nr_pages = nr_pages;
> + p->pages = pages;
> +
> + ret = xa_err(xa_store(&priv->array, p->first, p, GFP_KERNEL));
> + if (ret)
> + goto unpin_pages;
> +
> + return 0;
> +
> +unpin_pages:
> + unpin_user_pages(pages, nr_pages);
> +free_p:
> + kfree(p);
> +free:
> + vfree(pages);
> + return ret;
> +}
> +
> +static int uacce_unpin_page(struct uacce_pin_container *priv,
> + struct uacce_pin_address *addr)
> +{
> + unsigned long first, last, nr_pages;
> + struct pin_pages *p;
> +
> + first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
> + last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
> + nr_pages = last - first + 1;
> +
> + /* find pin_pages */
> + p = xa_load(&priv->array, first);
> + if (!p)
> + return -ENODEV;
> +
> + if (p->nr_pages != nr_pages)
> + return -EINVAL;
> +
> + /* unpin */
> + unpin_user_pages(p->pages, p->nr_pages);
> +
> + /* release resource */
> + xa_erase(&priv->array, first);
> + vfree(p->pages);
> + kfree(p);
> +
> + return 0;
> +}
> +
> +static long uacce_ctrl_unl_ioctl(struct file *filep, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct uacce_pin_container *p = filep->private_data;
> + struct uacce_pin_address addr;
> + int ret;
> +
> + if (copy_from_user(&addr, (void __user *)arg,
> + sizeof(struct uacce_pin_address)))
> + return -EFAULT;
> +
> + switch (cmd) {
> + case UACCE_CMD_PIN:
> + return uacce_pin_page(p, &addr);
> +
> + case UACCE_CMD_UNPIN:
> + return uacce_unpin_page(p, &addr);
> +
> + default:
> + return -EINVAL;
> + }
> +}
> +
> +static const struct file_operations uacce_ctrl_fops = {
> + .owner = THIS_MODULE,
> + .open = uacce_ctrl_open,
> + .release = uacce_ctrl_release,
> + .unlocked_ioctl = uacce_ctrl_unl_ioctl,
> +};
> +
> +static struct miscdevice uacce_ctrl_miscdev = {
> + .name = "uacce_ctrl",
> + .minor = MISC_DYNAMIC_MINOR,
> + .fops = &uacce_ctrl_fops,
> +};
> +
> static int __init uacce_init(void)
> {
> int ret;
> @@ -507,13 +664,26 @@ static int __init uacce_init(void)
>
> ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
> if (ret)
> - class_destroy(uacce_class);
> + goto destroy_class;
> +
> + ret = misc_register(&uacce_ctrl_miscdev);
> + if (ret) {
> + pr_err("uacce: ctrl dev registration failed\n");
> + goto unregister_cdev;
> + }
>
> + return 0;
> +
> +unregister_cdev:
> + unregister_chrdev_region(uacce_devt, MINORMASK);
> +destroy_class:
> + class_destroy(uacce_class);
> return ret;
> }
>
> static __exit void uacce_exit(void)
> {
> + misc_deregister(&uacce_ctrl_miscdev);
> unregister_chrdev_region(uacce_devt, MINORMASK);
> class_destroy(uacce_class);
> }
> diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h
> index cc71856..f9e326f 100644
> --- a/include/uapi/misc/uacce/uacce.h
> +++ b/include/uapi/misc/uacce/uacce.h
> @@ -35,4 +35,20 @@ enum uacce_qfrt {
> UACCE_QFRT_DUS = 1,
> };
>
> +/**
> + * struct uacce_pin_address - Expected pin user space address and size
> + * @addr: Address to pin
> + * @size: Size of pin address
> + */
> +struct uacce_pin_address {
> + unsigned long addr;
> + unsigned long size;
These are not valid ioctl structure members for crossing the user/kernel
boundry. Please make them work properly (i.e. use __u64 and friends).
thanks,
greg k-h
On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> When IO page fault happens, DMA performance will be affected. Pin user page
> can avoid IO page fault, this patch introduces a new char device named
> /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> pages under one file will be unpinned in file release process.
Also, what are you really trying to do here? If you need to mess with
memory pages, why can't the existing memory apis work properly for you?
Please work with the linux-mm developers to resolve the issue using the
standard apis and not creating a one-off char device node for this type
of thing.
thanks,
greg k-h
> -----Original Message-----
> From: Greg Kroah-Hartman [mailto:[email protected]]
> Sent: Thursday, January 21, 2021 10:46 PM
> To: Wangzhou (B) <[email protected]>
> Cc: Zhangfei Gao <[email protected]>; Arnd Bergmann <[email protected]>;
> [email protected]; [email protected];
> chensihang (A) <[email protected]>
> Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
>
> On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> > When IO page fault happens, DMA performance will be affected. Pin user page
> > can avoid IO page fault, this patch introduces a new char device named
> > /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> > pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> > pages under one file will be unpinned in file release process.
>
> Also, what are you really trying to do here? If you need to mess with
> memory pages, why can't the existing memory apis work properly for you?
> Please work with the linux-mm developers to resolve the issue using the
> standard apis and not creating a one-off char device node for this type
> of thing.
Basically the purpose is implementing a pinned memory poll for userspace
DMA to achieve better performance by removing io page fault.
I really like this can be done in generic mm code. Unfortunately there is no
this standard API in kernel to support userspace pin. Right now, various
subsystems depend on the ioctl of /dev/<name> to implement the pin, for example,
v4l2, gpu, infiniband, media etc.
I feel it is extremely hard to sell a standard mpin() API like mlock()
for this stage as mm could hardly buy this. And it will require
huge changes in kernel.
We need a way to manage what pages are pinned by process and ensure the
pages can be unpinned while the process is killed abnormally. otherwise,
memory gets leaked.
file_operations release() is a good entry for this kind of things. In
this way, we don't have to maintain the pinned page set in task_struct
and unpin them during exit().
If there is anything to make it better by doing this in a driver. I
would believe we could have a generic misc driver for pin like
vms_ballon.c for ballon. The driver doesn't have to bind with uacce.
In this way, the pinned memory pool implementation in userspace doesn't
need to depend on a specific uacce driver any more.
>
> thanks,
>
> greg k-h
Thanks
Barry
Hi Zhou,
I love your patch! Perhaps something to improve:
[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on soc/for-next linux/master linus/master v5.11-rc4 next-20210120]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
base: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git f6f1f8e6e3eea25f539105d48166e91f0ab46dd1
config: nios2-randconfig-r023-20210121 (attached as .config)
compiler: nios2-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/4dc40d891a7e60ed79e6b9460a38a142d3d1a965
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
git checkout 4dc40d891a7e60ed79e6b9460a38a142d3d1a965
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=nios2
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> drivers/misc/uacce/uacce.c:511:5: warning: no previous prototype for 'uacce_ctrl_open' [-Wmissing-prototypes]
511 | int uacce_ctrl_open(struct inode *inode, struct file *file)
| ^~~~~~~~~~~~~~~
>> drivers/misc/uacce/uacce.c:525:5: warning: no previous prototype for 'uacce_ctrl_release' [-Wmissing-prototypes]
525 | int uacce_ctrl_release(struct inode *inode, struct file *file)
| ^~~~~~~~~~~~~~~~~~
drivers/misc/uacce/uacce.c: In function 'uacce_ctrl_unl_ioctl':
drivers/misc/uacce/uacce.c:626:6: warning: unused variable 'ret' [-Wunused-variable]
626 | int ret;
| ^~~
vim +/uacce_ctrl_open +511 drivers/misc/uacce/uacce.c
510
> 511 int uacce_ctrl_open(struct inode *inode, struct file *file)
512 {
513 struct uacce_pin_container *p;
514
515 p = kzalloc(sizeof(*p), GFP_KERNEL);
516 if (!p)
517 return -ENOMEM;
518 file->private_data = p;
519
520 xa_init(&p->array);
521
522 return 0;
523 }
524
> 525 int uacce_ctrl_release(struct inode *inode, struct file *file)
526 {
527 struct uacce_pin_container *priv = file->private_data;
528 struct pin_pages *p;
529 unsigned long idx;
530
531 xa_for_each(&priv->array, idx, p) {
532 unpin_user_pages(p->pages, p->nr_pages);
533 xa_erase(&priv->array, p->first);
534 vfree(p->pages);
535 kfree(p);
536 }
537
538 xa_destroy(&priv->array);
539 kfree(priv);
540
541 return 0;
542 }
543
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Reported-by: kernel test robot <[email protected]>
Signed-off-by: kernel test robot <[email protected]>
---
uacce.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index b8ac4080a7d6f76..11cbb69422221a5 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -508,7 +508,7 @@ void uacce_remove(struct uacce_device *uacce)
}
EXPORT_SYMBOL_GPL(uacce_remove);
-int uacce_ctrl_open(struct inode *inode, struct file *file)
+static int uacce_ctrl_open(struct inode *inode, struct file *file)
{
struct uacce_pin_container *p;
@@ -522,7 +522,7 @@ int uacce_ctrl_open(struct inode *inode, struct file *file)
return 0;
}
-int uacce_ctrl_release(struct inode *inode, struct file *file)
+static int uacce_ctrl_release(struct inode *inode, struct file *file)
{
struct uacce_pin_container *priv = file->private_data;
struct pin_pages *p;
Hi Zhou,
I love your patch! Perhaps something to improve:
[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on soc/for-next linux/master linus/master v5.11-rc4 next-20210120]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
base: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git f6f1f8e6e3eea25f539105d48166e91f0ab46dd1
config: x86_64-randconfig-s021-20210121 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.3-208-g46a52ca4-dirty
# https://github.com/0day-ci/linux/commit/4dc40d891a7e60ed79e6b9460a38a142d3d1a965
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
git checkout 4dc40d891a7e60ed79e6b9460a38a142d3d1a965
# save the attached .config to linux build tree
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
"sparse warnings: (new ones prefixed by >>)"
>> drivers/misc/uacce/uacce.c:511:5: sparse: sparse: symbol 'uacce_ctrl_open' was not declared. Should it be static?
>> drivers/misc/uacce/uacce.c:525:5: sparse: sparse: symbol 'uacce_ctrl_release' was not declared. Should it be static?
Please review and possibly fold the followup patch.
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
> -----Original Message-----
> From: Greg Kroah-Hartman [mailto:[email protected]]
> Sent: Friday, January 22, 2021 12:19 AM
> To: Song Bao Hua (Barry Song) <[email protected]>
> Cc: Wangzhou (B) <[email protected]>; Zhangfei Gao
> <[email protected]>; Arnd Bergmann <[email protected]>;
> [email protected]; [email protected];
> chensihang (A) <[email protected]>
> Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
>
> On Thu, Jan 21, 2021 at 10:18:24AM +0000, Song Bao Hua (Barry Song) wrote:
> >
> >
> > > -----Original Message-----
> > > From: Greg Kroah-Hartman [mailto:[email protected]]
> > > Sent: Thursday, January 21, 2021 10:46 PM
> > > To: Wangzhou (B) <[email protected]>
> > > Cc: Zhangfei Gao <[email protected]>; Arnd Bergmann <[email protected]>;
> > > [email protected]; [email protected];
> > > chensihang (A) <[email protected]>
> > > Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
> > >
> > > On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> > > > When IO page fault happens, DMA performance will be affected. Pin user
> page
> > > > can avoid IO page fault, this patch introduces a new char device named
> > > > /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> > > > pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> > > > pages under one file will be unpinned in file release process.
> > >
> > > Also, what are you really trying to do here? If you need to mess with
> > > memory pages, why can't the existing memory apis work properly for you?
> > > Please work with the linux-mm developers to resolve the issue using the
> > > standard apis and not creating a one-off char device node for this type
> > > of thing.
> >
> > Basically the purpose is implementing a pinned memory poll for userspace
> > DMA to achieve better performance by removing io page fault.
>
> And what could possibly go wrong with that :)
I think we have resolved this concern while uacce came in :-)
Uacce is based on SVA so devices are accessing memory in userspace
by strict permission control.
>
> > I really like this can be done in generic mm code. Unfortunately there is
> no
> > this standard API in kernel to support userspace pin. Right now, various
> > subsystems depend on the ioctl of /dev/<name> to implement the pin, for example,
> > v4l2, gpu, infiniband, media etc.
> >
> > I feel it is extremely hard to sell a standard mpin() API like mlock()
> > for this stage as mm could hardly buy this. And it will require
> > huge changes in kernel.
>
> Why? This is what mlock() is for, why can't you use it?
mlock() can only guarantee memory won't be swapped out, it doesn't
make sure memory won't move. alloc_pages() can cause memory compaction,
cma, numa balance, huge pages etc can move mlock()-ed pages.
We would still see many I/O page faults for mlock() area.
>
> > We need a way to manage what pages are pinned by process and ensure the
> > pages can be unpinned while the process is killed abnormally. otherwise,
> > memory gets leaked.
>
> Can't mlock() handle that? It works on the process that called it.
>
> > file_operations release() is a good entry for this kind of things. In
> > this way, we don't have to maintain the pinned page set in task_struct
> > and unpin them during exit().
> >
> > If there is anything to make it better by doing this in a driver. I
> > would believe we could have a generic misc driver for pin like
> > vms_ballon.c for ballon. The driver doesn't have to bind with uacce.
> >
> > In this way, the pinned memory pool implementation in userspace doesn't
> > need to depend on a specific uacce driver any more.
>
> Please work with the mm developers to get them to agree with this type
> of thing, as well as the dma developers, both of which you didn't cc: on
> this patch :(
Yep.
>
> Remember, you are creating a new api for Linux that goes around existing
> syscalls, but is in reality, a new syscall, so why not just make it a
> new syscall?
The difficulty would be how to record which pages are pinned for a process
if it is done by a new syscall.
For mlock(), it can be much easier as it will change VMA. Hardly we can
change VMA for pin. On the other hand, if the implementation is done in
driver, with file_operations, we can record pinned pages in the private
data of an opened file.
>
> thanks,
>
> greg k-h
Thanks
Barry
On Thu, Jan 21, 2021 at 10:18:24AM +0000, Song Bao Hua (Barry Song) wrote:
>
>
> > -----Original Message-----
> > From: Greg Kroah-Hartman [mailto:[email protected]]
> > Sent: Thursday, January 21, 2021 10:46 PM
> > To: Wangzhou (B) <[email protected]>
> > Cc: Zhangfei Gao <[email protected]>; Arnd Bergmann <[email protected]>;
> > [email protected]; [email protected];
> > chensihang (A) <[email protected]>
> > Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
> >
> > On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> > > When IO page fault happens, DMA performance will be affected. Pin user page
> > > can avoid IO page fault, this patch introduces a new char device named
> > > /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> > > pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> > > pages under one file will be unpinned in file release process.
> >
> > Also, what are you really trying to do here? If you need to mess with
> > memory pages, why can't the existing memory apis work properly for you?
> > Please work with the linux-mm developers to resolve the issue using the
> > standard apis and not creating a one-off char device node for this type
> > of thing.
>
> Basically the purpose is implementing a pinned memory poll for userspace
> DMA to achieve better performance by removing io page fault.
And what could possibly go wrong with that :)
> I really like this can be done in generic mm code. Unfortunately there is no
> this standard API in kernel to support userspace pin. Right now, various
> subsystems depend on the ioctl of /dev/<name> to implement the pin, for example,
> v4l2, gpu, infiniband, media etc.
>
> I feel it is extremely hard to sell a standard mpin() API like mlock()
> for this stage as mm could hardly buy this. And it will require
> huge changes in kernel.
Why? This is what mlock() is for, why can't you use it?
> We need a way to manage what pages are pinned by process and ensure the
> pages can be unpinned while the process is killed abnormally. otherwise,
> memory gets leaked.
Can't mlock() handle that? It works on the process that called it.
> file_operations release() is a good entry for this kind of things. In
> this way, we don't have to maintain the pinned page set in task_struct
> and unpin them during exit().
>
> If there is anything to make it better by doing this in a driver. I
> would believe we could have a generic misc driver for pin like
> vms_ballon.c for ballon. The driver doesn't have to bind with uacce.
>
> In this way, the pinned memory pool implementation in userspace doesn't
> need to depend on a specific uacce driver any more.
Please work with the mm developers to get them to agree with this type
of thing, as well as the dma developers, both of which you didn't cc: on
this patch :(
Remember, you are creating a new api for Linux that goes around existing
syscalls, but is in reality, a new syscall, so why not just make it a
new syscall?
thanks,
greg k-h
On Thu, Jan 21, 2021 at 11:52:57AM +0000, Song Bao Hua (Barry Song) wrote:
>
>
> > -----Original Message-----
> > From: Greg Kroah-Hartman [mailto:[email protected]]
> > Sent: Friday, January 22, 2021 12:19 AM
> > To: Song Bao Hua (Barry Song) <[email protected]>
> > Cc: Wangzhou (B) <[email protected]>; Zhangfei Gao
> > <[email protected]>; Arnd Bergmann <[email protected]>;
> > [email protected]; [email protected];
> > chensihang (A) <[email protected]>
> > Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
> >
> > On Thu, Jan 21, 2021 at 10:18:24AM +0000, Song Bao Hua (Barry Song) wrote:
> > >
> > >
> > > > -----Original Message-----
> > > > From: Greg Kroah-Hartman [mailto:[email protected]]
> > > > Sent: Thursday, January 21, 2021 10:46 PM
> > > > To: Wangzhou (B) <[email protected]>
> > > > Cc: Zhangfei Gao <[email protected]>; Arnd Bergmann <[email protected]>;
> > > > [email protected]; [email protected];
> > > > chensihang (A) <[email protected]>
> > > > Subject: Re: [PATCH] uacce: Add uacce_ctrl misc device
> > > >
> > > > On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
> > > > > When IO page fault happens, DMA performance will be affected. Pin user
> > page
> > > > > can avoid IO page fault, this patch introduces a new char device named
> > > > > /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
> > > > > pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
> > > > > pages under one file will be unpinned in file release process.
> > > >
> > > > Also, what are you really trying to do here? If you need to mess with
> > > > memory pages, why can't the existing memory apis work properly for you?
> > > > Please work with the linux-mm developers to resolve the issue using the
> > > > standard apis and not creating a one-off char device node for this type
> > > > of thing.
> > >
> > > Basically the purpose is implementing a pinned memory poll for userspace
> > > DMA to achieve better performance by removing io page fault.
> >
> > And what could possibly go wrong with that :)
>
> I think we have resolved this concern while uacce came in :-)
> Uacce is based on SVA so devices are accessing memory in userspace
> by strict permission control.
>
> >
> > > I really like this can be done in generic mm code. Unfortunately there is
> > no
> > > this standard API in kernel to support userspace pin. Right now, various
> > > subsystems depend on the ioctl of /dev/<name> to implement the pin, for example,
> > > v4l2, gpu, infiniband, media etc.
> > >
> > > I feel it is extremely hard to sell a standard mpin() API like mlock()
> > > for this stage as mm could hardly buy this. And it will require
> > > huge changes in kernel.
> >
> > Why? This is what mlock() is for, why can't you use it?
>
> mlock() can only guarantee memory won't be swapped out, it doesn't
> make sure memory won't move. alloc_pages() can cause memory compaction,
> cma, numa balance, huge pages etc can move mlock()-ed pages.
> We would still see many I/O page faults for mlock() area.
>
> >
> > > We need a way to manage what pages are pinned by process and ensure the
> > > pages can be unpinned while the process is killed abnormally. otherwise,
> > > memory gets leaked.
> >
> > Can't mlock() handle that? It works on the process that called it.
> >
> > > file_operations release() is a good entry for this kind of things. In
> > > this way, we don't have to maintain the pinned page set in task_struct
> > > and unpin them during exit().
> > >
> > > If there is anything to make it better by doing this in a driver. I
> > > would believe we could have a generic misc driver for pin like
> > > vms_ballon.c for ballon. The driver doesn't have to bind with uacce.
> > >
> > > In this way, the pinned memory pool implementation in userspace doesn't
> > > need to depend on a specific uacce driver any more.
> >
> > Please work with the mm developers to get them to agree with this type
> > of thing, as well as the dma developers, both of which you didn't cc: on
> > this patch :(
>
> Yep.
>
> >
> > Remember, you are creating a new api for Linux that goes around existing
> > syscalls, but is in reality, a new syscall, so why not just make it a
> > new syscall?
>
> The difficulty would be how to record which pages are pinned for a process
> if it is done by a new syscall.
>
> For mlock(), it can be much easier as it will change VMA. Hardly we can
> change VMA for pin. On the other hand, if the implementation is done in
> driver, with file_operations, we can record pinned pages in the private
> data of an opened file.
Then work with the mm developers on this, there shouldn't be anything
"special" about how this memory is handled vs. other backing memory
types, right?
Also, be sure this works properly with the dma layer as there has been
lots of work happening there recently. Odds are passing in random
address values in an unknown process context is not going to be correct,
but I could be wrong.
thanks,
greg k-h
Hi Zhou,
I love your patch! Perhaps something to improve:
[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on soc/for-next linux/master linus/master v5.11-rc4 next-20210121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
base: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git f6f1f8e6e3eea25f539105d48166e91f0ab46dd1
config: x86_64-randconfig-r014-20210121 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 22b68440e1647e16b5ee24b924986207173c02d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install x86_64 cross compiling tool for clang build
# apt-get install binutils-x86-64-linux-gnu
# https://github.com/0day-ci/linux/commit/4dc40d891a7e60ed79e6b9460a38a142d3d1a965
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
git checkout 4dc40d891a7e60ed79e6b9460a38a142d3d1a965
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All warnings (new ones prefixed by >>):
>> drivers/misc/uacce/uacce.c:511:5: warning: no previous prototype for function 'uacce_ctrl_open' [-Wmissing-prototypes]
int uacce_ctrl_open(struct inode *inode, struct file *file)
^
drivers/misc/uacce/uacce.c:511:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int uacce_ctrl_open(struct inode *inode, struct file *file)
^
static
>> drivers/misc/uacce/uacce.c:525:5: warning: no previous prototype for function 'uacce_ctrl_release' [-Wmissing-prototypes]
int uacce_ctrl_release(struct inode *inode, struct file *file)
^
drivers/misc/uacce/uacce.c:525:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int uacce_ctrl_release(struct inode *inode, struct file *file)
^
static
drivers/misc/uacce/uacce.c:626:6: warning: unused variable 'ret' [-Wunused-variable]
int ret;
^
3 warnings generated.
vim +/uacce_ctrl_open +511 drivers/misc/uacce/uacce.c
510
> 511 int uacce_ctrl_open(struct inode *inode, struct file *file)
512 {
513 struct uacce_pin_container *p;
514
515 p = kzalloc(sizeof(*p), GFP_KERNEL);
516 if (!p)
517 return -ENOMEM;
518 file->private_data = p;
519
520 xa_init(&p->array);
521
522 return 0;
523 }
524
> 525 int uacce_ctrl_release(struct inode *inode, struct file *file)
526 {
527 struct uacce_pin_container *priv = file->private_data;
528 struct pin_pages *p;
529 unsigned long idx;
530
531 xa_for_each(&priv->array, idx, p) {
532 unpin_user_pages(p->pages, p->nr_pages);
533 xa_erase(&priv->array, p->first);
534 vfree(p->pages);
535 kfree(p);
536 }
537
538 xa_destroy(&priv->array);
539 kfree(priv);
540
541 return 0;
542 }
543
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
Hi Zhou,
I love your patch! Yet something to improve:
[auto build test ERROR on char-misc/char-misc-testing]
[also build test ERROR on soc/for-next linux/master linus/master v5.11-rc4 next-20210121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
base: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git f6f1f8e6e3eea25f539105d48166e91f0ab46dd1
config: alpha-randconfig-p002-20210121 (attached as .config)
compiler: alpha-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/4dc40d891a7e60ed79e6b9460a38a142d3d1a965
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Zhou-Wang/uacce-Add-uacce_ctrl-misc-device/20210121-172139
git checkout 4dc40d891a7e60ed79e6b9460a38a142d3d1a965
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=alpha
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>
All error/warnings (new ones prefixed by >>):
drivers/misc/uacce/uacce.c:511:5: warning: no previous prototype for 'uacce_ctrl_open' [-Wmissing-prototypes]
511 | int uacce_ctrl_open(struct inode *inode, struct file *file)
| ^~~~~~~~~~~~~~~
drivers/misc/uacce/uacce.c:525:5: warning: no previous prototype for 'uacce_ctrl_release' [-Wmissing-prototypes]
525 | int uacce_ctrl_release(struct inode *inode, struct file *file)
| ^~~~~~~~~~~~~~~~~~
drivers/misc/uacce/uacce.c: In function 'uacce_ctrl_release':
>> drivers/misc/uacce/uacce.c:534:3: error: implicit declaration of function 'vfree'; did you mean 'kfree'? [-Werror=implicit-function-declaration]
534 | vfree(p->pages);
| ^~~~~
| kfree
drivers/misc/uacce/uacce.c: In function 'uacce_pin_page':
>> drivers/misc/uacce/uacce.c:557:10: error: implicit declaration of function 'vmalloc'; did you mean 'kmalloc'? [-Werror=implicit-function-declaration]
557 | pages = vmalloc(nr_pages * sizeof(struct page *));
| ^~~~~~~
| kmalloc
>> drivers/misc/uacce/uacce.c:557:8: warning: assignment to 'struct page **' from 'int' makes pointer from integer without a cast [-Wint-conversion]
557 | pages = vmalloc(nr_pages * sizeof(struct page *));
| ^
drivers/misc/uacce/uacce.c: In function 'uacce_ctrl_unl_ioctl':
drivers/misc/uacce/uacce.c:626:6: warning: unused variable 'ret' [-Wunused-variable]
626 | int ret;
| ^~~
cc1: some warnings being treated as errors
vim +534 drivers/misc/uacce/uacce.c
524
525 int uacce_ctrl_release(struct inode *inode, struct file *file)
526 {
527 struct uacce_pin_container *priv = file->private_data;
528 struct pin_pages *p;
529 unsigned long idx;
530
531 xa_for_each(&priv->array, idx, p) {
532 unpin_user_pages(p->pages, p->nr_pages);
533 xa_erase(&priv->array, p->first);
> 534 vfree(p->pages);
535 kfree(p);
536 }
537
538 xa_destroy(&priv->array);
539 kfree(priv);
540
541 return 0;
542 }
543
544 static int uacce_pin_page(struct uacce_pin_container *priv,
545 struct uacce_pin_address *addr)
546 {
547 unsigned int flags = FOLL_FORCE | FOLL_WRITE;
548 unsigned long first, last, nr_pages;
549 struct page **pages;
550 struct pin_pages *p;
551 int ret;
552
553 first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
554 last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
555 nr_pages = last - first + 1;
556
> 557 pages = vmalloc(nr_pages * sizeof(struct page *));
558 if (!pages)
559 return -ENOMEM;
560
561 p = kzalloc(sizeof(*p), GFP_KERNEL);
562 if (!p) {
563 ret = -ENOMEM;
564 goto free;
565 }
566
567 ret = pin_user_pages_fast(addr->addr & PAGE_MASK, nr_pages,
568 flags | FOLL_LONGTERM, pages);
569 if (ret != nr_pages) {
570 pr_err("uacce: Failed to pin page\n");
571 goto free_p;
572 }
573 p->first = first;
574 p->nr_pages = nr_pages;
575 p->pages = pages;
576
577 ret = xa_err(xa_store(&priv->array, p->first, p, GFP_KERNEL));
578 if (ret)
579 goto unpin_pages;
580
581 return 0;
582
583 unpin_pages:
584 unpin_user_pages(pages, nr_pages);
585 free_p:
586 kfree(p);
587 free:
588 vfree(pages);
589 return ret;
590 }
591
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/[email protected]
On 2021/1/21 17:44, Greg Kroah-Hartman wrote:
> On Thu, Jan 21, 2021 at 05:09:14PM +0800, Zhou Wang wrote:
>> When IO page fault happens, DMA performance will be affected. Pin user page
>> can avoid IO page fault, this patch introduces a new char device named
>> /dev/uacce_ctrl to help to maintain pin/unpin pages. User space can do
>> pin/unpin pages by ioctls of an open file of /dev/uacce_ctrl, all pinned
>> pages under one file will be unpinned in file release process.
>>
>> Signed-off-by: Zhou Wang <[email protected]>
>> Signed-off-by: Sihang Chen <[email protected]>
>> Suggested-by: Barry Song <[email protected]>
>> ---
>> drivers/misc/uacce/uacce.c | 172 +++++++++++++++++++++++++++++++++++++++-
>> include/uapi/misc/uacce/uacce.h | 16 ++++
>> 2 files changed, 187 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
>> index d07af4e..b8ac408 100644
>> --- a/drivers/misc/uacce/uacce.c
>> +++ b/drivers/misc/uacce/uacce.c
>> @@ -2,6 +2,7 @@
>> #include <linux/compat.h>
>> #include <linux/dma-mapping.h>
>> #include <linux/iommu.h>
>> +#include <linux/miscdevice.h>
>> #include <linux/module.h>
>> #include <linux/poll.h>
>> #include <linux/slab.h>
>> @@ -12,6 +13,16 @@ static dev_t uacce_devt;
>> static DEFINE_MUTEX(uacce_mutex);
>> static DEFINE_XARRAY_ALLOC(uacce_xa);
>>
>> +struct uacce_pin_container {
>> + struct xarray array;
>> +};
>> +
>> +struct pin_pages {
>> + unsigned long first;
>> + unsigned long nr_pages;
>> + struct page **pages;
>> +};
>> +
>> static int uacce_start_queue(struct uacce_queue *q)
>> {
>> int ret = 0;
>> @@ -497,6 +508,152 @@ void uacce_remove(struct uacce_device *uacce)
>> }
>> EXPORT_SYMBOL_GPL(uacce_remove);
>>
>> +int uacce_ctrl_open(struct inode *inode, struct file *file)
>> +{
>> + struct uacce_pin_container *p;
>> +
>> + p = kzalloc(sizeof(*p), GFP_KERNEL);
>> + if (!p)
>> + return -ENOMEM;
>> + file->private_data = p;
>> +
>> + xa_init(&p->array);
>> +
>> + return 0;
>> +}
>> +
>> +int uacce_ctrl_release(struct inode *inode, struct file *file)
>> +{
>> + struct uacce_pin_container *priv = file->private_data;
>> + struct pin_pages *p;
>> + unsigned long idx;
>> +
>> + xa_for_each(&priv->array, idx, p) {
>> + unpin_user_pages(p->pages, p->nr_pages);
>> + xa_erase(&priv->array, p->first);
>> + vfree(p->pages);
>> + kfree(p);
>> + }
>> +
>> + xa_destroy(&priv->array);
>> + kfree(priv);
>> +
>> + return 0;
>> +}
>> +
>> +static int uacce_pin_page(struct uacce_pin_container *priv,
>> + struct uacce_pin_address *addr)
>> +{
>> + unsigned int flags = FOLL_FORCE | FOLL_WRITE;
>> + unsigned long first, last, nr_pages;
>> + struct page **pages;
>> + struct pin_pages *p;
>> + int ret;
>> +
>> + first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
>> + last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
>> + nr_pages = last - first + 1;
>> +
>> + pages = vmalloc(nr_pages * sizeof(struct page *));
>> + if (!pages)
>> + return -ENOMEM;
>> +
>> + p = kzalloc(sizeof(*p), GFP_KERNEL);
>> + if (!p) {
>> + ret = -ENOMEM;
>> + goto free;
>> + }
>> +
>> + ret = pin_user_pages_fast(addr->addr & PAGE_MASK, nr_pages,
>> + flags | FOLL_LONGTERM, pages);
>> + if (ret != nr_pages) {
>> + pr_err("uacce: Failed to pin page\n");
>> + goto free_p;
>> + }
>> + p->first = first;
>> + p->nr_pages = nr_pages;
>> + p->pages = pages;
>> +
>> + ret = xa_err(xa_store(&priv->array, p->first, p, GFP_KERNEL));
>> + if (ret)
>> + goto unpin_pages;
>> +
>> + return 0;
>> +
>> +unpin_pages:
>> + unpin_user_pages(pages, nr_pages);
>> +free_p:
>> + kfree(p);
>> +free:
>> + vfree(pages);
>> + return ret;
>> +}
>> +
>> +static int uacce_unpin_page(struct uacce_pin_container *priv,
>> + struct uacce_pin_address *addr)
>> +{
>> + unsigned long first, last, nr_pages;
>> + struct pin_pages *p;
>> +
>> + first = (addr->addr & PAGE_MASK) >> PAGE_SHIFT;
>> + last = ((addr->addr + addr->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
>> + nr_pages = last - first + 1;
>> +
>> + /* find pin_pages */
>> + p = xa_load(&priv->array, first);
>> + if (!p)
>> + return -ENODEV;
>> +
>> + if (p->nr_pages != nr_pages)
>> + return -EINVAL;
>> +
>> + /* unpin */
>> + unpin_user_pages(p->pages, p->nr_pages);
>> +
>> + /* release resource */
>> + xa_erase(&priv->array, first);
>> + vfree(p->pages);
>> + kfree(p);
>> +
>> + return 0;
>> +}
>> +
>> +static long uacce_ctrl_unl_ioctl(struct file *filep, unsigned int cmd,
>> + unsigned long arg)
>> +{
>> + struct uacce_pin_container *p = filep->private_data;
>> + struct uacce_pin_address addr;
>> + int ret;
>> +
>> + if (copy_from_user(&addr, (void __user *)arg,
>> + sizeof(struct uacce_pin_address)))
>> + return -EFAULT;
>> +
>> + switch (cmd) {
>> + case UACCE_CMD_PIN:
>> + return uacce_pin_page(p, &addr);
>> +
>> + case UACCE_CMD_UNPIN:
>> + return uacce_unpin_page(p, &addr);
>> +
>> + default:
>> + return -EINVAL;
>> + }
>> +}
>> +
>> +static const struct file_operations uacce_ctrl_fops = {
>> + .owner = THIS_MODULE,
>> + .open = uacce_ctrl_open,
>> + .release = uacce_ctrl_release,
>> + .unlocked_ioctl = uacce_ctrl_unl_ioctl,
>> +};
>> +
>> +static struct miscdevice uacce_ctrl_miscdev = {
>> + .name = "uacce_ctrl",
>> + .minor = MISC_DYNAMIC_MINOR,
>> + .fops = &uacce_ctrl_fops,
>> +};
>> +
>> static int __init uacce_init(void)
>> {
>> int ret;
>> @@ -507,13 +664,26 @@ static int __init uacce_init(void)
>>
>> ret = alloc_chrdev_region(&uacce_devt, 0, MINORMASK, UACCE_NAME);
>> if (ret)
>> - class_destroy(uacce_class);
>> + goto destroy_class;
>> +
>> + ret = misc_register(&uacce_ctrl_miscdev);
>> + if (ret) {
>> + pr_err("uacce: ctrl dev registration failed\n");
>> + goto unregister_cdev;
>> + }
>>
>> + return 0;
>> +
>> +unregister_cdev:
>> + unregister_chrdev_region(uacce_devt, MINORMASK);
>> +destroy_class:
>> + class_destroy(uacce_class);
>> return ret;
>> }
>>
>> static __exit void uacce_exit(void)
>> {
>> + misc_deregister(&uacce_ctrl_miscdev);
>> unregister_chrdev_region(uacce_devt, MINORMASK);
>> class_destroy(uacce_class);
>> }
>> diff --git a/include/uapi/misc/uacce/uacce.h b/include/uapi/misc/uacce/uacce.h
>> index cc71856..f9e326f 100644
>> --- a/include/uapi/misc/uacce/uacce.h
>> +++ b/include/uapi/misc/uacce/uacce.h
>> @@ -35,4 +35,20 @@ enum uacce_qfrt {
>> UACCE_QFRT_DUS = 1,
>> };
>>
>> +/**
>> + * struct uacce_pin_address - Expected pin user space address and size
>> + * @addr: Address to pin
>> + * @size: Size of pin address
>> + */
>> +struct uacce_pin_address {
>> + unsigned long addr;
>> + unsigned long size;
>
> These are not valid ioctl structure members for crossing the user/kernel
> boundry. Please make them work properly (i.e. use __u64 and friends).
Got it, will modify this to __u64 together with other problems found by 0-day robot.
Thanks,
Zhou
>
> thanks,
>
> greg k-h
>
> .
>
On Fri, Jan 22, 2021 at 07:33:40PM +0800, Zhou Wang wrote:
> >> +struct uacce_pin_address {
> >> + unsigned long addr;
> >> + unsigned long size;
> >
> > These are not valid ioctl structure members for crossing the user/kernel
> > boundry. Please make them work properly (i.e. use __u64 and friends).
>
> Got it, will modify this to __u64 together with other problems found by 0-day robot.
Please also properly involve the mm and dma developers and, again,
consider just making this a syscall instead of an ioctl.
thanks,
greg k-h