2022-05-10 13:37:40

by Guixin Liu

[permalink] [raw]
Subject: [PATCH] uio: Replace mutex info_lock with percpu_ref

If the underlying driver works in parallel, the mutex info_lock in uio
will force driver to work sequentially, so that become performance
bottleneck. Lets replace it with percpu_ref for better performance.

Use tcm_loop and tcmu(backstore is file, and I did some work to make tcmu
work in parallel at uio_write() path) to evaluate performance,
fio job: fio -filename=/dev/sdb -direct=1 -size=2G -name=1 -thread
-runtime=60 -time_based -rw=randread -numjobs=16 -iodepth=16 -bs=128k

Without this patch:
READ: bw=2828MiB/s (2965MB/s), 176MiB/s-177MiB/s (185MB/s-186MB/s),
io=166GiB (178GB), run=60000-60001msec

With this patch:
READ: bw=3382MiB/s (3546MB/s), 211MiB/s-212MiB/s (221MB/s-222MB/s),
io=198GiB (213GB), run=60001-60001msec

Reviewed-by: Xiaoguang Wang <[email protected]>
Signed-off-by: Guixin Liu <[email protected]>
---
drivers/uio/uio.c | 95 ++++++++++++++++++++++++++++++++++------------
include/linux/uio_driver.h | 5 ++-
2 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 43afbb7..72c16ba 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -24,6 +24,8 @@
#include <linux/kobject.h>
#include <linux/cdev.h>
#include <linux/uio_driver.h>
+#include <linux/completion.h>
+#include <linux/percpu-refcount.h>

#define UIO_MAX_DEVICES (1U << MINORBITS)

@@ -218,7 +220,9 @@ static ssize_t name_show(struct device *dev,
struct uio_device *idev = dev_get_drvdata(dev);
int ret;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EINVAL;
+
if (!idev->info) {
ret = -EINVAL;
dev_err(dev, "the device has been unregistered\n");
@@ -228,7 +232,7 @@ static ssize_t name_show(struct device *dev,
ret = sprintf(buf, "%s\n", idev->info->name);

out:
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
return ret;
}
static DEVICE_ATTR_RO(name);
@@ -239,7 +243,9 @@ static ssize_t version_show(struct device *dev,
struct uio_device *idev = dev_get_drvdata(dev);
int ret;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EINVAL;
+
if (!idev->info) {
ret = -EINVAL;
dev_err(dev, "the device has been unregistered\n");
@@ -249,7 +255,7 @@ static ssize_t version_show(struct device *dev,
ret = sprintf(buf, "%s\n", idev->info->version);

out:
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
return ret;
}
static DEVICE_ATTR_RO(version);
@@ -489,16 +495,20 @@ static int uio_open(struct inode *inode, struct file *filep)
listener->event_count = atomic_read(&idev->event);
filep->private_data = listener;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref)) {
+ ret = -EINVAL;
+ goto err_alloc_listener;
+ }
+
if (!idev->info) {
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
ret = -EINVAL;
goto err_infoopen;
}

if (idev->info->open)
ret = idev->info->open(idev->info, inode);
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
if (ret)
goto err_infoopen;

@@ -531,10 +541,12 @@ static int uio_release(struct inode *inode, struct file *filep)
struct uio_listener *listener = filep->private_data;
struct uio_device *idev = listener->dev;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EINVAL;
+
if (idev->info && idev->info->release)
ret = idev->info->release(idev->info, inode);
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);

module_put(idev->owner);
kfree(listener);
@@ -548,10 +560,12 @@ static __poll_t uio_poll(struct file *filep, poll_table *wait)
struct uio_device *idev = listener->dev;
__poll_t ret = 0;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EIO;
+
if (!idev->info || !idev->info->irq)
ret = -EIO;
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);

if (ret)
return ret;
@@ -577,13 +591,17 @@ static ssize_t uio_read(struct file *filep, char __user *buf,
add_wait_queue(&idev->wait, &wait);

do {
- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref)) {
+ retval = -EINVAL;
+ break;
+ }
+
if (!idev->info || !idev->info->irq) {
retval = -EIO;
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
break;
}
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);

set_current_state(TASK_INTERRUPTIBLE);

@@ -631,7 +649,9 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
if (copy_from_user(&irq_on, buf, count))
return -EFAULT;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EINVAL;
+
if (!idev->info) {
retval = -EINVAL;
goto out;
@@ -650,7 +670,7 @@ static ssize_t uio_write(struct file *filep, const char __user *buf,
retval = idev->info->irqcontrol(idev->info, irq_on);

out:
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
return retval ? retval : sizeof(s32);
}

@@ -675,7 +695,9 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
vm_fault_t ret = 0;
int mi;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return VM_FAULT_SIGBUS;
+
if (!idev->info) {
ret = VM_FAULT_SIGBUS;
goto out;
@@ -702,8 +724,7 @@ static vm_fault_t uio_vma_fault(struct vm_fault *vmf)
vmf->page = page;

out:
- mutex_unlock(&idev->info_lock);
-
+ percpu_ref_put(&idev->info_ref);
return ret;
}

@@ -772,7 +793,9 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)

vma->vm_private_data = idev;

- mutex_lock(&idev->info_lock);
+ if (!percpu_ref_tryget_live(&idev->info_ref))
+ return -EINVAL;
+
if (!idev->info) {
ret = -EINVAL;
goto out;
@@ -811,7 +834,7 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
}

out:
- mutex_unlock(&idev->info_lock);
+ percpu_ref_put(&idev->info_ref);
return ret;
}

@@ -907,6 +930,13 @@ static void uio_device_release(struct device *dev)
kfree(idev);
}

+static void uio_info_free(struct percpu_ref *ref)
+{
+ struct uio_device *idev = container_of(ref, struct uio_device, info_ref);
+
+ complete(&idev->free_done);
+}
+
/**
* __uio_register_device - register a new userspace IO device
* @owner: module that creates the new device
@@ -937,10 +967,17 @@ int __uio_register_device(struct module *owner,

idev->owner = owner;
idev->info = info;
- mutex_init(&idev->info_lock);
init_waitqueue_head(&idev->wait);
atomic_set(&idev->event, 0);

+ ret = percpu_ref_init(&idev->info_ref, uio_info_free, 0, GFP_KERNEL);
+ if (ret) {
+ pr_err("percpu_ref init failed!\n");
+ return ret;
+ }
+ init_completion(&idev->confirm_done);
+ init_completion(&idev->free_done);
+
ret = uio_get_minor(idev);
if (ret) {
kfree(idev);
@@ -1036,6 +1073,13 @@ int __devm_uio_register_device(struct module *owner,
}
EXPORT_SYMBOL_GPL(__devm_uio_register_device);

+static void uio_confirm_info(struct percpu_ref *ref)
+{
+ struct uio_device *idev = container_of(ref, struct uio_device, info_ref);
+
+ complete(&idev->confirm_done);
+}
+
/**
* uio_unregister_device - unregister a industrial IO device
* @info: UIO device capabilities
@@ -1052,14 +1096,17 @@ void uio_unregister_device(struct uio_info *info)
idev = info->uio_dev;
minor = idev->minor;

- mutex_lock(&idev->info_lock);
+ percpu_ref_kill_and_confirm(&idev->info_ref, uio_confirm_info);
+ wait_for_completion(&idev->confirm_done);
+ wait_for_completion(&idev->free_done);
+
+ /* now, we can set info to NULL */
uio_dev_del_attributes(idev);

if (info->irq && info->irq != UIO_IRQ_CUSTOM)
free_irq(info->irq, idev);

idev->info = NULL;
- mutex_unlock(&idev->info_lock);

wake_up_interruptible(&idev->wait);
kill_fasync(&idev->async_queue, SIGIO, POLL_HUP);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 47c5962..6d3d87f 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/interrupt.h>
+#include <linux/percpu-refcount.h>

struct module;
struct uio_map;
@@ -74,9 +75,11 @@ struct uio_device {
struct fasync_struct *async_queue;
wait_queue_head_t wait;
struct uio_info *info;
- struct mutex info_lock;
struct kobject *map_dir;
struct kobject *portio_dir;
+ struct percpu_ref info_ref;
+ struct completion confirm_done;
+ struct completion free_done;
};

/**
--
1.8.3.1



2022-05-10 13:40:17

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH] uio: Replace mutex info_lock with percpu_ref

Hi Guixin,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on char-misc/char-misc-testing]
[also build test WARNING on linux/master linus/master v5.18-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/intel-lab-lkp/linux/commits/Guixin-Liu/uio-Replace-mutex-info_lock-with-percpu_ref/20220510-135202
base: https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git 33a1c6618677fe33f8e84cb7bedc45abbce89a50
config: s390-randconfig-s031-20220509 (https://download.01.org/0day-ci/archive/20220510/[email protected]/config)
compiler: s390-linux-gcc (GCC) 11.3.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-dirty
# https://github.com/intel-lab-lkp/linux/commit/4d3e70c70de7bcf57d176790db79b8cbee990171
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Guixin-Liu/uio-Replace-mutex-info_lock-with-percpu_ref/20220510-135202
git checkout 4d3e70c70de7bcf57d176790db79b8cbee990171
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=s390 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <[email protected]>


sparse warnings: (new ones prefixed by >>)
>> drivers/uio/uio.c:564:24: sparse: sparse: incorrect type in return expression (different base types) @@ expected restricted __poll_t @@ got int @@
drivers/uio/uio.c:564:24: sparse: expected restricted __poll_t
drivers/uio/uio.c:564:24: sparse: got int
drivers/uio/uio.c:567:21: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __poll_t [usertype] ret @@ got int @@
drivers/uio/uio.c:567:21: sparse: expected restricted __poll_t [usertype] ret
drivers/uio/uio.c:567:21: sparse: got int

vim +564 drivers/uio/uio.c

556
557 static __poll_t uio_poll(struct file *filep, poll_table *wait)
558 {
559 struct uio_listener *listener = filep->private_data;
560 struct uio_device *idev = listener->dev;
561 __poll_t ret = 0;
562
563 if (!percpu_ref_tryget_live(&idev->info_ref))
> 564 return -EIO;
565
566 if (!idev->info || !idev->info->irq)
567 ret = -EIO;
568 percpu_ref_put(&idev->info_ref);
569
570 if (ret)
571 return ret;
572
573 poll_wait(filep, &idev->wait, wait);
574 if (listener->event_count != atomic_read(&idev->event))
575 return EPOLLIN | EPOLLRDNORM;
576 return 0;
577 }
578

--
0-DAY CI Kernel Test Service
https://01.org/lkp

2022-05-17 12:53:31

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] uio: Replace mutex info_lock with percpu_ref

On Tue, May 10, 2022 at 01:50:31PM +0800, Guixin Liu wrote:
> If the underlying driver works in parallel, the mutex info_lock in uio
> will force driver to work sequentially, so that become performance
> bottleneck. Lets replace it with percpu_ref for better performance.
>
> Use tcm_loop and tcmu(backstore is file, and I did some work to make tcmu
> work in parallel at uio_write() path) to evaluate performance,
> fio job: fio -filename=/dev/sdb -direct=1 -size=2G -name=1 -thread
> -runtime=60 -time_based -rw=randread -numjobs=16 -iodepth=16 -bs=128k
>
> Without this patch:
> READ: bw=2828MiB/s (2965MB/s), 176MiB/s-177MiB/s (185MB/s-186MB/s),
> io=166GiB (178GB), run=60000-60001msec
>
> With this patch:
> READ: bw=3382MiB/s (3546MB/s), 211MiB/s-212MiB/s (221MB/s-222MB/s),
> io=198GiB (213GB), run=60001-60001msec
>
> Reviewed-by: Xiaoguang Wang <[email protected]>
> Signed-off-by: Guixin Liu <[email protected]>

Why is UIO being used for a block device? Why not use a real block
driver instead that can properly handle the locking issues involved
here?



> ---
> drivers/uio/uio.c | 95 ++++++++++++++++++++++++++++++++++------------
> include/linux/uio_driver.h | 5 ++-
> 2 files changed, 75 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
> index 43afbb7..72c16ba 100644
> --- a/drivers/uio/uio.c
> +++ b/drivers/uio/uio.c
> @@ -24,6 +24,8 @@
> #include <linux/kobject.h>
> #include <linux/cdev.h>
> #include <linux/uio_driver.h>
> +#include <linux/completion.h>
> +#include <linux/percpu-refcount.h>
>
> #define UIO_MAX_DEVICES (1U << MINORBITS)
>
> @@ -218,7 +220,9 @@ static ssize_t name_show(struct device *dev,
> struct uio_device *idev = dev_get_drvdata(dev);
> int ret;
>
> - mutex_lock(&idev->info_lock);
> + if (!percpu_ref_tryget_live(&idev->info_ref))
> + return -EINVAL;
> +

You are now just putting the contention to a per-cpu lock, so any
single-cpu load will have the same issue, right? And your example above
is a single-cpu load, so how is this any faster? Is the mutex going
across all cpus to sync such a load that moving this to a percpu thing
that much better?

And as you have now split this into one-lock-per-cpu instead of
one-lock-per-device, you just broke the situation where multiple threads
are accessing the same device at the same time, right?

You have also changed the functionality of the driver to force userspace
to handle when the lock can not be taken as previously it would always
work and just delay until it did happen. What workflow does that now
affect that always assumed that these code paths would succeed?

Also the kernel test bot found problems with the patch :(

thanks,

greg k-h

2022-05-18 04:14:46

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [PATCH] uio: Replace mutex info_lock with percpu_ref

On Tue, May 10, 2022 at 01:50:31PM +0800, Guixin Liu wrote:
> If the underlying driver works in parallel, the mutex info_lock in uio
> will force driver to work sequentially, so that become performance
> bottleneck. Lets replace it with percpu_ref for better performance.
>
> Use tcm_loop and tcmu(backstore is file, and I did some work to make tcmu
> work in parallel at uio_write() path) to evaluate performance,
> fio job: fio -filename=/dev/sdb -direct=1 -size=2G -name=1 -thread
> -runtime=60 -time_based -rw=randread -numjobs=16 -iodepth=16 -bs=128k
>
> Without this patch:
> READ: bw=2828MiB/s (2965MB/s), 176MiB/s-177MiB/s (185MB/s-186MB/s),
> io=166GiB (178GB), run=60000-60001msec
>
> With this patch:
> READ: bw=3382MiB/s (3546MB/s), 211MiB/s-212MiB/s (221MB/s-222MB/s),
> io=198GiB (213GB), run=60001-60001msec
>
> Reviewed-by: Xiaoguang Wang <[email protected]>

Any reason why you didn't cc: this person on the patch submission so
that they would be part of the conversation about it?

thanks,

greg k-h