A race condition may occur if the user physically removes the ilo device
while calling ilo_open.
If we remove the driver which will call ilo_remove to make cleanup, there
is no guarantee to make sure there's no more ilo_open invoking.
The possible sequence is as follows:
Fix it by adding a refcount check to ilo_remove() function to free the
"ilo_hwinfo " structure after the last file is closed.
CPU0 CPU1
|ilo_open
ilo_remove |
kfree(ilo_hw) |
//free |
|hw = container_of(ip->i_cdev,
|struct ilo_hwinfo, cdev);
|hw->ccb_alloc[slot]
|//use
Fixes: 89bcb05d9bbf ("HP iLO driver")
Signed-off-by: Zheng Wang <[email protected]>
---
drivers/misc/hpilo.c | 16 +++++++++++++++-
drivers/misc/hpilo.h | 1 +
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index 8d00df9243c4..2e9af39e91a4 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -37,6 +37,8 @@ static const struct pci_device_id ilo_blacklist[] = {
{}
};
+static void ilo_delete(struct kref *kref);
+
static inline int get_entry_id(int entry)
{
return (entry & ENTRY_MASK_DESCRIPTOR) >> ENTRY_BITPOS_DESCRIPTOR;
@@ -559,6 +561,7 @@ static int ilo_close(struct inode *ip, struct file *fp)
hw->ccb_alloc[slot]->ccb_cnt--;
spin_unlock(&hw->open_lock);
+ kref_put(&hw->refcnt, ilo_delete);
return 0;
}
@@ -578,6 +581,7 @@ static int ilo_open(struct inode *ip, struct file *fp)
if (!data)
return -ENOMEM;
+ kref_get(&hw->refcnt);
spin_lock(&hw->open_lock);
/* each fd private_data holds sw/hw view of ccb */
@@ -633,6 +637,8 @@ static int ilo_open(struct inode *ip, struct file *fp)
if (!error)
fp->private_data = hw->ccb_alloc[slot];
+ else
+ kref_put(&hw->refcnt, ilo_delete);
return error;
}
@@ -742,8 +748,15 @@ static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
static void ilo_remove(struct pci_dev *pdev)
{
- int i, minor;
struct ilo_hwinfo *ilo_hw = pci_get_drvdata(pdev);
+ kref_put(&ilo_hw->refcnt, ilo_delete);
+}
+
+static void ilo_delete(struct kref *kref)
+{
+ int i, minor;
+ struct ilo_hwinfo *ilo_hw = container_of(kref, struct ilo_hwinfo, refcnt);
+ struct pci_dev *pdev = ilo_hw->ilo_dev;
if (!ilo_hw)
return;
@@ -807,6 +820,7 @@ static int ilo_probe(struct pci_dev *pdev,
goto out;
ilo_hw->ilo_dev = pdev;
+ kref_init(&ilo_hw->refcnt);
spin_lock_init(&ilo_hw->alloc_lock);
spin_lock_init(&ilo_hw->fifo_lock);
spin_lock_init(&ilo_hw->open_lock);
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
index d57c34680b09..ebc677eb45ae 100644
--- a/drivers/misc/hpilo.h
+++ b/drivers/misc/hpilo.h
@@ -62,6 +62,7 @@ struct ilo_hwinfo {
spinlock_t fifo_lock;
struct cdev cdev;
+ struct kref refcnt;
};
/* offset from mmio_vaddr for enabling doorbell interrupts */
--
2.25.1
On Tue, Apr 18, 2023 at 12:52:46AM +0800, Zheng Wang wrote:
> --- a/drivers/misc/hpilo.h
> +++ b/drivers/misc/hpilo.h
> @@ -62,6 +62,7 @@ struct ilo_hwinfo {
> spinlock_t fifo_lock;
>
> struct cdev cdev;
> + struct kref refcnt;
You now have 2 different structures doing reference counting logic on
the same structure. That's just not ok at all, and is totally broken.
greg k-h
Greg KH <[email protected]> 于2023年4月18日周二 01:43写道:
>
> On Tue, Apr 18, 2023 at 12:52:46AM +0800, Zheng Wang wrote:
> > --- a/drivers/misc/hpilo.h
> > +++ b/drivers/misc/hpilo.h
> > @@ -62,6 +62,7 @@ struct ilo_hwinfo {
> > spinlock_t fifo_lock;
> >
> > struct cdev cdev;
> > + struct kref refcnt;
>
> You now have 2 different structures doing reference counting logic on
> the same structure. That's just not ok at all, and is totally broken.
>
Thanks for your reply. I'll look into it and figure out effective fix.
Best regards,
Zheng
> greg k-h
Hi Zheng,
kernel test robot noticed the following build warnings:
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Zheng-Wang/misc-hpilo-Fix-use-after-free-bug-in-ilo_remove-due-to-race-condition-with-ilo_open/20230418-005502
base: char-misc/char-misc-testing
patch link: https://lore.kernel.org/r/20230417165246.467723-1-zyytlz.wz%40163.com
patch subject: [PATCH] misc: hpilo: Fix use after free bug in ilo_remove due to race condition with ilo_open
config: i386-randconfig-m021 (https://download.01.org/0day-ci/archive/20230430/[email protected]/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>
| Reported-by: Dan Carpenter <[email protected]>
| Link: https://lore.kernel.org/r/[email protected]/
smatch warnings:
drivers/misc/hpilo.c:755 ilo_delete() warn: can 'ilo_hw' even be NULL?
vim +/ilo_hw +755 drivers/misc/hpilo.c
13a22f45e6cb1e Zheng Wang 2023-04-18 749 static void ilo_delete(struct kref *kref)
13a22f45e6cb1e Zheng Wang 2023-04-18 750 {
13a22f45e6cb1e Zheng Wang 2023-04-18 751 int i, minor;
13a22f45e6cb1e Zheng Wang 2023-04-18 752 struct ilo_hwinfo *ilo_hw = container_of(kref, struct ilo_hwinfo, refcnt);
13a22f45e6cb1e Zheng Wang 2023-04-18 753 struct pci_dev *pdev = ilo_hw->ilo_dev;
89bcb05d9bbf8b David Altobelli 2008-07-02 754
ebf1b764aa5cb3 Mark Rusk 2012-11-06 @755 if (!ilo_hw)
^^^^^^^
->refcnt member isn't the first struct member so this NULL check
doesn't make sense. Generally checking container_of() is ugly. Some
people do it and add BUILD_BUG_ON() to ensure that they're checking the
first struct member but really it's best to avoid that if possible.
ebf1b764aa5cb3 Mark Rusk 2012-11-06 756 return;
ebf1b764aa5cb3 Mark Rusk 2012-11-06 757
89bcb05d9bbf8b David Altobelli 2008-07-02 758 clear_device(ilo_hw);
89bcb05d9bbf8b David Altobelli 2008-07-02 759
89bcb05d9bbf8b David Altobelli 2008-07-02 760 minor = MINOR(ilo_hw->cdev.dev);
98dcd59dd063dd Camuso, Tony 2012-06-10 761 for (i = minor; i < minor + max_ccb; i++)
89bcb05d9bbf8b David Altobelli 2008-07-02 762 device_destroy(ilo_class, MKDEV(ilo_major, i));
89bcb05d9bbf8b David Altobelli 2008-07-02 763
89bcb05d9bbf8b David Altobelli 2008-07-02 764 cdev_del(&ilo_hw->cdev);
9f7048412163d8 David Altobelli 2009-08-17 765 ilo_disable_interrupts(ilo_hw);
9f7048412163d8 David Altobelli 2009-08-17 766 free_irq(pdev->irq, ilo_hw);
89bcb05d9bbf8b David Altobelli 2008-07-02 767 ilo_unmap_device(pdev, ilo_hw);
89bcb05d9bbf8b David Altobelli 2008-07-02 768 pci_release_regions(pdev);
bcdee04ea7ae04 Jiri Slaby 2012-09-13 769 /*
bcdee04ea7ae04 Jiri Slaby 2012-09-13 770 * pci_disable_device(pdev) used to be here. But this PCI device has
bcdee04ea7ae04 Jiri Slaby 2012-09-13 771 * two functions with interrupt lines connected to a single pin. The
bcdee04ea7ae04 Jiri Slaby 2012-09-13 772 * other one is a USB host controller. So when we disable the PIN here
bcdee04ea7ae04 Jiri Slaby 2012-09-13 773 * e.g. by rmmod hpilo, the controller stops working. It is because
bcdee04ea7ae04 Jiri Slaby 2012-09-13 774 * the interrupt link is disabled in ACPI since it is not refcounted
bcdee04ea7ae04 Jiri Slaby 2012-09-13 775 * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
bcdee04ea7ae04 Jiri Slaby 2012-09-13 776 */
89bcb05d9bbf8b David Altobelli 2008-07-02 777 kfree(ilo_hw);
98dcd59dd063dd Camuso, Tony 2012-06-10 778 ilo_hwdev[(minor / max_ccb)] = 0;
89bcb05d9bbf8b David Altobelli 2008-07-02 779 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests
Dear Dan,
Thanks for your reply. I'm not so familiar with the code here. Is the
warning says we should check ilo_hw to see if it's not NULL?
Best regards,
Zheng
Dan Carpenter <[email protected]> 于2023年5月2日周二 19:46写道:
>
> Hi Zheng,
>
> kernel test robot noticed the following build warnings:
>
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Zheng-Wang/misc-hpilo-Fix-use-after-free-bug-in-ilo_remove-due-to-race-condition-with-ilo_open/20230418-005502
> base: char-misc/char-misc-testing
> patch link: https://lore.kernel.org/r/20230417165246.467723-1-zyytlz.wz%40163.com
> patch subject: [PATCH] misc: hpilo: Fix use after free bug in ilo_remove due to race condition with ilo_open
> config: i386-randconfig-m021 (https://download.01.org/0day-ci/archive/20230430/[email protected]/config)
> compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
>
> If you fix the issue, kindly add following tag where applicable
> | Reported-by: kernel test robot <[email protected]>
> | Reported-by: Dan Carpenter <[email protected]>
> | Link: https://lore.kernel.org/r/[email protected]/
>
> smatch warnings:
> drivers/misc/hpilo.c:755 ilo_delete() warn: can 'ilo_hw' even be NULL?
>
> vim +/ilo_hw +755 drivers/misc/hpilo.c
>
> 13a22f45e6cb1e Zheng Wang 2023-04-18 749 static void ilo_delete(struct kref *kref)
> 13a22f45e6cb1e Zheng Wang 2023-04-18 750 {
> 13a22f45e6cb1e Zheng Wang 2023-04-18 751 int i, minor;
> 13a22f45e6cb1e Zheng Wang 2023-04-18 752 struct ilo_hwinfo *ilo_hw = container_of(kref, struct ilo_hwinfo, refcnt);
> 13a22f45e6cb1e Zheng Wang 2023-04-18 753 struct pci_dev *pdev = ilo_hw->ilo_dev;
> 89bcb05d9bbf8b David Altobelli 2008-07-02 754
> ebf1b764aa5cb3 Mark Rusk 2012-11-06 @755 if (!ilo_hw)
> ^^^^^^^
>
> ->refcnt member isn't the first struct member so this NULL check
> doesn't make sense. Generally checking container_of() is ugly. Some
> people do it and add BUILD_BUG_ON() to ensure that they're checking the
> first struct member but really it's best to avoid that if possible.
>
> ebf1b764aa5cb3 Mark Rusk 2012-11-06 756 return;
> ebf1b764aa5cb3 Mark Rusk 2012-11-06 757
> 89bcb05d9bbf8b David Altobelli 2008-07-02 758 clear_device(ilo_hw);
> 89bcb05d9bbf8b David Altobelli 2008-07-02 759
> 89bcb05d9bbf8b David Altobelli 2008-07-02 760 minor = MINOR(ilo_hw->cdev.dev);
> 98dcd59dd063dd Camuso, Tony 2012-06-10 761 for (i = minor; i < minor + max_ccb; i++)
> 89bcb05d9bbf8b David Altobelli 2008-07-02 762 device_destroy(ilo_class, MKDEV(ilo_major, i));
> 89bcb05d9bbf8b David Altobelli 2008-07-02 763
> 89bcb05d9bbf8b David Altobelli 2008-07-02 764 cdev_del(&ilo_hw->cdev);
> 9f7048412163d8 David Altobelli 2009-08-17 765 ilo_disable_interrupts(ilo_hw);
> 9f7048412163d8 David Altobelli 2009-08-17 766 free_irq(pdev->irq, ilo_hw);
> 89bcb05d9bbf8b David Altobelli 2008-07-02 767 ilo_unmap_device(pdev, ilo_hw);
> 89bcb05d9bbf8b David Altobelli 2008-07-02 768 pci_release_regions(pdev);
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 769 /*
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 770 * pci_disable_device(pdev) used to be here. But this PCI device has
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 771 * two functions with interrupt lines connected to a single pin. The
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 772 * other one is a USB host controller. So when we disable the PIN here
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 773 * e.g. by rmmod hpilo, the controller stops working. It is because
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 774 * the interrupt link is disabled in ACPI since it is not refcounted
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 775 * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
> bcdee04ea7ae04 Jiri Slaby 2012-09-13 776 */
> 89bcb05d9bbf8b David Altobelli 2008-07-02 777 kfree(ilo_hw);
> 98dcd59dd063dd Camuso, Tony 2012-06-10 778 ilo_hwdev[(minor / max_ccb)] = 0;
> 89bcb05d9bbf8b David Altobelli 2008-07-02 779 }
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests
>
On Fri, May 05, 2023 at 12:01:17PM +0800, Zheng Hacker wrote:
> Dear Dan,
>
> Thanks for your reply. I'm not so familiar with the code here. Is the
> warning says we should check ilo_hw to see if it's not NULL?
>
Just delete the NULL check. It's impossible.
regards,
dan carpenter
> Best regards,
> Zheng
>
> Dan Carpenter <[email protected]> 于2023年5月2日周二 19:46写道:
> >
> > Hi Zheng,
> >
> > kernel test robot noticed the following build warnings:
> >
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> >
> > url: https://github.com/intel-lab-lkp/linux/commits/Zheng-Wang/misc-hpilo-Fix-use-after-free-bug-in-ilo_remove-due-to-race-condition-with-ilo_open/20230418-005502
> > base: char-misc/char-misc-testing
> > patch link: https://lore.kernel.org/r/20230417165246.467723-1-zyytlz.wz%40163.com
> > patch subject: [PATCH] misc: hpilo: Fix use after free bug in ilo_remove due to race condition with ilo_open
> > config: i386-randconfig-m021 (https://download.01.org/0day-ci/archive/20230430/[email protected]/config)
> > compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
> >
> > If you fix the issue, kindly add following tag where applicable
> > | Reported-by: kernel test robot <[email protected]>
> > | Reported-by: Dan Carpenter <[email protected]>
> > | Link: https://lore.kernel.org/r/[email protected]/
> >
> > smatch warnings:
> > drivers/misc/hpilo.c:755 ilo_delete() warn: can 'ilo_hw' even be NULL?
> >
> > vim +/ilo_hw +755 drivers/misc/hpilo.c
> >
> > 13a22f45e6cb1e Zheng Wang 2023-04-18 749 static void ilo_delete(struct kref *kref)
> > 13a22f45e6cb1e Zheng Wang 2023-04-18 750 {
> > 13a22f45e6cb1e Zheng Wang 2023-04-18 751 int i, minor;
> > 13a22f45e6cb1e Zheng Wang 2023-04-18 752 struct ilo_hwinfo *ilo_hw = container_of(kref, struct ilo_hwinfo, refcnt);
> > 13a22f45e6cb1e Zheng Wang 2023-04-18 753 struct pci_dev *pdev = ilo_hw->ilo_dev;
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 754
> > ebf1b764aa5cb3 Mark Rusk 2012-11-06 @755 if (!ilo_hw)
> > ^^^^^^^
> >
> > ->refcnt member isn't the first struct member so this NULL check
> > doesn't make sense. Generally checking container_of() is ugly. Some
> > people do it and add BUILD_BUG_ON() to ensure that they're checking the
> > first struct member but really it's best to avoid that if possible.
> >
> > ebf1b764aa5cb3 Mark Rusk 2012-11-06 756 return;
> > ebf1b764aa5cb3 Mark Rusk 2012-11-06 757
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 758 clear_device(ilo_hw);
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 759
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 760 minor = MINOR(ilo_hw->cdev.dev);
> > 98dcd59dd063dd Camuso, Tony 2012-06-10 761 for (i = minor; i < minor + max_ccb; i++)
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 762 device_destroy(ilo_class, MKDEV(ilo_major, i));
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 763
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 764 cdev_del(&ilo_hw->cdev);
> > 9f7048412163d8 David Altobelli 2009-08-17 765 ilo_disable_interrupts(ilo_hw);
> > 9f7048412163d8 David Altobelli 2009-08-17 766 free_irq(pdev->irq, ilo_hw);
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 767 ilo_unmap_device(pdev, ilo_hw);
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 768 pci_release_regions(pdev);
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 769 /*
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 770 * pci_disable_device(pdev) used to be here. But this PCI device has
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 771 * two functions with interrupt lines connected to a single pin. The
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 772 * other one is a USB host controller. So when we disable the PIN here
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 773 * e.g. by rmmod hpilo, the controller stops working. It is because
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 774 * the interrupt link is disabled in ACPI since it is not refcounted
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 775 * yet. See acpi_pci_link_free_irq called from acpi_pci_irq_disable.
> > bcdee04ea7ae04 Jiri Slaby 2012-09-13 776 */
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 777 kfree(ilo_hw);
> > 98dcd59dd063dd Camuso, Tony 2012-06-10 778 ilo_hwdev[(minor / max_ccb)] = 0;
> > 89bcb05d9bbf8b David Altobelli 2008-07-02 779 }
> >
> > --
> > 0-DAY CI Kernel Test Service
> > https://github.com/intel/lkp-tests
> >