If two userspace programs both open the PCI UIO fd, when one
of the program exits uncleanly, the other will cause IO hang
due to bus-mastering disabled.
It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
to avoid such problems.
Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
Reported-by: Xiu Yang <[email protected]>
Signed-off-by: Yao Hongbo <[email protected]>
---
drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index e03f9b5..8add2cf 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -31,6 +31,7 @@
struct uio_pci_generic_dev {
struct uio_info info;
struct pci_dev *pdev;
+ atomic_t refcnt;
};
static inline struct uio_pci_generic_dev *
@@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
return container_of(info, struct uio_pci_generic_dev, info);
}
+static int open(struct uio_info *info, struct inode *inode)
+{
+ struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+
+ atomic_inc(&gdev->refcnt);
+ return 0;
+}
+
static int release(struct uio_info *info, struct inode *inode)
{
struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
@@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
* Note that there's a non-zero chance doing this will wedge the device
* at least until reset.
*/
- pci_clear_master(gdev->pdev);
+ if (atomic_dec_and_test(&gdev->refcnt))
+ pci_clear_master(gdev->pdev);
+
return 0;
}
@@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
gdev->info.name = "uio_pci_generic";
gdev->info.version = DRIVER_VERSION;
+ gdev->info.open = open;
gdev->info.release = release;
gdev->pdev = pdev;
+ atomic_set(&gdev->refcnt, 0);
+
if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
gdev->info.irq = pdev->irq;
gdev->info.irq_flags = IRQF_SHARED;
--
1.8.3.1
On 3/31/22 10:23 AM, Yao Hongbo wrote:
> If two userspace programs both open the PCI UIO fd, when one
> of the program exits uncleanly, the other will cause IO hang
> due to bus-mastering disabled.
>
> It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
> to avoid such problems.
>
> Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
Fixes commit id should be 12 abbrev instead.
Thanks,
Joseph
> Reported-by: Xiu Yang <[email protected]>
> Signed-off-by: Yao Hongbo <[email protected]>
> ---
> drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
> 1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
> index e03f9b5..8add2cf 100644
> --- a/drivers/uio/uio_pci_generic.c
> +++ b/drivers/uio/uio_pci_generic.c
> @@ -31,6 +31,7 @@
> struct uio_pci_generic_dev {
> struct uio_info info;
> struct pci_dev *pdev;
> + atomic_t refcnt;
> };
>
> static inline struct uio_pci_generic_dev *
> @@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
> return container_of(info, struct uio_pci_generic_dev, info);
> }
>
> +static int open(struct uio_info *info, struct inode *inode)
> +{
> + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> +
> + atomic_inc(&gdev->refcnt);
> + return 0;
> +}
> +
> static int release(struct uio_info *info, struct inode *inode)
> {
> struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> @@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
> * Note that there's a non-zero chance doing this will wedge the device
> * at least until reset.
> */
> - pci_clear_master(gdev->pdev);
> + if (atomic_dec_and_test(&gdev->refcnt))
> + pci_clear_master(gdev->pdev);
> +
> return 0;
> }
>
> @@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
>
> gdev->info.name = "uio_pci_generic";
> gdev->info.version = DRIVER_VERSION;
> + gdev->info.open = open;
> gdev->info.release = release;
> gdev->pdev = pdev;
> + atomic_set(&gdev->refcnt, 0);
> +
> if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
> gdev->info.irq = pdev->irq;
> gdev->info.irq_flags = IRQF_SHARED;
On Thu, Mar 31, 2022 at 10:23:52AM +0800, Yao Hongbo wrote:
> If two userspace programs both open the PCI UIO fd, when one
> of the program exits uncleanly, the other will cause IO hang
> due to bus-mastering disabled.
With two programs poking at the same device, how is this ever
supposed to work even while both are alive?
> It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
> to avoid such problems.
>
> Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
> Reported-by: Xiu Yang <[email protected]>
> Signed-off-by: Yao Hongbo <[email protected]>
> ---
> drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
> 1 file changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
> index e03f9b5..8add2cf 100644
> --- a/drivers/uio/uio_pci_generic.c
> +++ b/drivers/uio/uio_pci_generic.c
> @@ -31,6 +31,7 @@
> struct uio_pci_generic_dev {
> struct uio_info info;
> struct pci_dev *pdev;
> + atomic_t refcnt;
> };
>
> static inline struct uio_pci_generic_dev *
> @@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
> return container_of(info, struct uio_pci_generic_dev, info);
> }
>
> +static int open(struct uio_info *info, struct inode *inode)
> +{
> + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> +
> + atomic_inc(&gdev->refcnt);
> + return 0;
> +}
> +
> static int release(struct uio_info *info, struct inode *inode)
> {
> struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> @@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
> ?* Note that there's a non-zero chance doing this will wedge the device
> ?* at least until reset.
> */
> - pci_clear_master(gdev->pdev);
> + if (atomic_dec_and_test(&gdev->refcnt))
> + pci_clear_master(gdev->pdev);
> +
> return 0;
> }
>
> @@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
>
> gdev->info.name = "uio_pci_generic";
> gdev->info.version = DRIVER_VERSION;
> + gdev->info.open = open;
> gdev->info.release = release;
> gdev->pdev = pdev;
> + atomic_set(&gdev->refcnt, 0);
> +
> if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
> gdev->info.irq = pdev->irq;
> gdev->info.irq_flags = IRQF_SHARED;
> --
> 1.8.3.1
On Fri, Apr 01, 2022 at 11:26:25AM +0800, guanghui.fgh wrote:
>
> 在 2022/4/1 4:30, Michael S. Tsirkin 写道:
> > On Thu, Mar 31, 2022 at 10:23:52AM +0800, Yao Hongbo wrote:
> > > If two userspace programs both open the PCI UIO fd, when one
> > > of the program exits uncleanly, the other will cause IO hang
> > > due to bus-mastering disabled.
> > With two programs poking at the same device, how is this ever
> > supposed to work even while both are alive?
>
> When using dpdk/spdk, there may be one primary process with multiple
> secondary processes while they can use same virtual address to access
> the same hugepage memory and device BAR(the primary process mmap hugepge
> and device BAR, record the relation between virtual and physical address,
> then the multi secondary processes will do the MAP_FIXED mmap). With this
> method, we can solve many troublesome problems. So there may be many
> processes own the same deivce at the same time.
>
> The SPDK links:
> https://spdk.io/doc/app_overview.html
>
> "Multi process mode
> When --shm-id is specified, the application is started in multi-process
> mode.
>
> Applications using the same shm-id share their memory and NVMe devices.
>
> The first app to start with a given id becomes a primary process, with the
> rest,
>
> called secondary processes, only attaching to it. When the primary process
> exits,
>
> the secondary ones continue to operate, but no new processes can be attached
>
> at this point. All processes within the same shm-id group must use the same
> --single-file-segments setting."
>
> > > It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
> > > to avoid such problems.
> > >
> > > Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
> > > Reported-by: Xiu Yang <[email protected]>
> > > Signed-off-by: Yao Hongbo <[email protected]>
> > > ---
> > > drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
> > > 1 file changed, 15 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
> > > index e03f9b5..8add2cf 100644
> > > --- a/drivers/uio/uio_pci_generic.c
> > > +++ b/drivers/uio/uio_pci_generic.c
> > > @@ -31,6 +31,7 @@
> > > struct uio_pci_generic_dev {
> > > struct uio_info info;
> > > struct pci_dev *pdev;
> > > + atomic_t refcnt;
> > > };
> > > static inline struct uio_pci_generic_dev *
> > > @@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
> > > return container_of(info, struct uio_pci_generic_dev, info);
> > > }
> > > +static int open(struct uio_info *info, struct inode *inode)
> > > +{
> > > + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> > > +
> > > + atomic_inc(&gdev->refcnt);
> > > + return 0;
> > > +}
> > > +
> > > static int release(struct uio_info *info, struct inode *inode)
> > > {
> > > struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
> > > @@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
> > > * Note that there's a non-zero chance doing this will wedge the device
> > > * at least until reset.
> > > */
> > > - pci_clear_master(gdev->pdev);
> > > + if (atomic_dec_and_test(&gdev->refcnt))
> > > + pci_clear_master(gdev->pdev);
> > > +
> > > return 0;
> > > }
> > > @@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
> > > gdev->info.name = "uio_pci_generic";
> > > gdev->info.version = DRIVER_VERSION;
> > > + gdev->info.open = open;
> > > gdev->info.release = release;
> > > gdev->pdev = pdev;
> > > + atomic_set(&gdev->refcnt, 0);
> > > +
> > > if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
> > > gdev->info.irq = pdev->irq;
> > > gdev->info.irq_flags = IRQF_SHARED;
I think we should use something like kref or refcount, and
think hard about handling the overflow here.
> > > --
> > > 1.8.3.1
在 2022/4/1 4:30, Michael S. Tsirkin 写道:
> On Thu, Mar 31, 2022 at 10:23:52AM +0800, Yao Hongbo wrote:
>> If two userspace programs both open the PCI UIO fd, when one
>> of the program exits uncleanly, the other will cause IO hang
>> due to bus-mastering disabled.
> With two programs poking at the same device, how is this ever
> supposed to work even while both are alive?
When using dpdk/spdk, there may be one primary process with multiple
secondary processes while they can use same virtual address to access
the same hugepage memory and device BAR(the primary process mmap hugepge
and device BAR, record the relation between virtual and physical address,
then the multi secondary processes will do the MAP_FIXED mmap). With this
method, we can solve many troublesome problems. So there may be many
processes own the same deivce at the same time.
The SPDK links:
https://spdk.io/doc/app_overview.html
"Multi process mode
When --shm-id is specified, the application is started in multi-process
mode.
Applications using the same shm-id share their memory and NVMe devices.
The first app to start with a given id becomes a primary process, with
the rest,
called secondary processes, only attaching to it. When the primary
process exits,
the secondary ones continue to operate, but no new processes can be
attached
at this point. All processes within the same shm-id group must use the
same --single-file-segments setting."
>> It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
>> to avoid such problems.
>>
>> Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
>> Reported-by: Xiu Yang <[email protected]>
>> Signed-off-by: Yao Hongbo <[email protected]>
>> ---
>> drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
>> 1 file changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
>> index e03f9b5..8add2cf 100644
>> --- a/drivers/uio/uio_pci_generic.c
>> +++ b/drivers/uio/uio_pci_generic.c
>> @@ -31,6 +31,7 @@
>> struct uio_pci_generic_dev {
>> struct uio_info info;
>> struct pci_dev *pdev;
>> + atomic_t refcnt;
>> };
>>
>> static inline struct uio_pci_generic_dev *
>> @@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
>> return container_of(info, struct uio_pci_generic_dev, info);
>> }
>>
>> +static int open(struct uio_info *info, struct inode *inode)
>> +{
>> + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
>> +
>> + atomic_inc(&gdev->refcnt);
>> + return 0;
>> +}
>> +
>> static int release(struct uio_info *info, struct inode *inode)
>> {
>> struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
>> @@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
>> * Note that there's a non-zero chance doing this will wedge the device
>> * at least until reset.
>> */
>> - pci_clear_master(gdev->pdev);
>> + if (atomic_dec_and_test(&gdev->refcnt))
>> + pci_clear_master(gdev->pdev);
>> +
>> return 0;
>> }
>>
>> @@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
>>
>> gdev->info.name = "uio_pci_generic";
>> gdev->info.version = DRIVER_VERSION;
>> + gdev->info.open = open;
>> gdev->info.release = release;
>> gdev->pdev = pdev;
>> + atomic_set(&gdev->refcnt, 0);
>> +
>> if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
>> gdev->info.irq = pdev->irq;
>> gdev->info.irq_flags = IRQF_SHARED;
>> --
>> 1.8.3.1
+ cc.
在 2022/4/1 上午4:30, Michael S. Tsirkin 写道:
> On Thu, Mar 31, 2022 at 10:23:52AM +0800, Yao Hongbo wrote:
>> If two userspace programs both open the PCI UIO fd, when one
>> of the program exits uncleanly, the other will cause IO hang
>> due to bus-mastering disabled.
>
> With two programs poking at the same device, how is this ever
> supposed to work even while both are alive?
>
>> It's a common usage for spdk/dpdk to use UIO. So, introduce refcnt
>> to avoid such problems.
>>
>> Fixes: 865a11f("uio/uio_pci_generic: Disable bus-mastering on release")
>> Reported-by: Xiu Yang <[email protected]>
>> Signed-off-by: Yao Hongbo <[email protected]>
>> ---
>> drivers/uio/uio_pci_generic.c | 16 +++++++++++++++-
>> 1 file changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
>> index e03f9b5..8add2cf 100644
>> --- a/drivers/uio/uio_pci_generic.c
>> +++ b/drivers/uio/uio_pci_generic.c
>> @@ -31,6 +31,7 @@
>> struct uio_pci_generic_dev {
>> struct uio_info info;
>> struct pci_dev *pdev;
>> + atomic_t refcnt;
>> };
>>
>> static inline struct uio_pci_generic_dev *
>> @@ -39,6 +40,14 @@ struct uio_pci_generic_dev {
>> return container_of(info, struct uio_pci_generic_dev, info);
>> }
>>
>> +static int open(struct uio_info *info, struct inode *inode)
>> +{
>> + struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
>> +
>> + atomic_inc(&gdev->refcnt);
>> + return 0;
>> +}
>> +
>> static int release(struct uio_info *info, struct inode *inode)
>> {
>> struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
>> @@ -51,7 +60,9 @@ static int release(struct uio_info *info, struct inode *inode)
>> * Note that there's a non-zero chance doing this will wedge the device
>> * at least until reset.
>> */
>> - pci_clear_master(gdev->pdev);
>> + if (atomic_dec_and_test(&gdev->refcnt))
>> + pci_clear_master(gdev->pdev);
>> +
>> return 0;
>> }
>>
>> @@ -92,8 +103,11 @@ static int probe(struct pci_dev *pdev,
>>
>> gdev->info.name = "uio_pci_generic";
>> gdev->info.version = DRIVER_VERSION;
>> + gdev->info.open = open;
>> gdev->info.release = release;
>> gdev->pdev = pdev;
>> + atomic_set(&gdev->refcnt, 0);
>> +
>> if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
>> gdev->info.irq = pdev->irq;
>> gdev->info.irq_flags = IRQF_SHARED;
>> --
>> 1.8.3.1