2021-12-10 22:19:43

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V3 09/35] PCI/MSI: Allocate MSI device data on first use

From: Thomas Gleixner <[email protected]>

Allocate MSI device data on first use, i.e. when a PCI driver invokes one
of the PCI/MSI enablement functions.

Signed-off-by: Thomas Gleixner <[email protected]>
Reviewed-by: Greg Kroah-Hartman <[email protected]>
Reviewed-by: Jason Gunthorpe <[email protected]>
Acked-by: Bjorn Helgaas <[email protected]>
---
drivers/pci/msi/msi.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)

--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -900,10 +900,12 @@ static int __pci_enable_msi_range(struct
/* deprecated, don't use */
int pci_enable_msi(struct pci_dev *dev)
{
- int rc = __pci_enable_msi_range(dev, 1, 1, NULL);
- if (rc < 0)
- return rc;
- return 0;
+ int rc = msi_setup_device_data(&dev->dev);
+
+ if (!rc)
+ rc = __pci_enable_msi_range(dev, 1, 1, NULL);
+
+ return rc < 0 ? rc : 0;
}
EXPORT_SYMBOL(pci_enable_msi);

@@ -958,7 +960,11 @@ static int __pci_enable_msix_range(struc
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec)
{
- return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
+ int ret = msi_setup_device_data(&dev->dev);
+
+ if (!ret)
+ ret = __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
+ return ret;
}
EXPORT_SYMBOL(pci_enable_msix_range);

@@ -985,8 +991,12 @@ int pci_alloc_irq_vectors_affinity(struc
struct irq_affinity *affd)
{
struct irq_affinity msi_default_affd = {0};
+ int ret = msi_setup_device_data(&dev->dev);
int nvecs = -ENOSPC;

+ if (ret)
+ return ret;
+
if (flags & PCI_IRQ_AFFINITY) {
if (!affd)
affd = &msi_default_affd;



2021-12-15 17:16:49

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V4 09-01/35] PCI/MSI: Decouple MSI[-X] disable from pcim_release()

The MSI core will introduce runtime allocation of MSI related data. This
data will be devres managed and has to be set up before enabling
PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release().

The setup order is:

pcim_enable_device()
devres_alloc(pcim_release...);
...
pci_irq_alloc()
msi_setup_device_data()
devres_alloc(msi_device_data_release, ...)

and once the device is released these release functions are invoked in the
opposite order:

msi_device_data_release()
...
pcim_release()
pci_disable_msi[x]()

which is obviously wrong, because pci_disable_msi[x]() requires the MSI
data to be available to tear down the MSI[-X] interrupts.

Remove the MSI[-X] teardown from pcim_release() and add an explicit action
to be installed on the attempt of enabling PCI/MSI[-X].

This allows the MSI core data allocation to be ordered correctly in a
subsequent step.

Reported-by: Nishanth Menon <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
---
V4: New patch
---
drivers/pci/msi/msi.c | 33 +++++++++++++++++++++++++++++++++
drivers/pci/pci.c | 5 -----
include/linux/pci.h | 3 ++-
3 files changed, 35 insertions(+), 6 deletions(-)

--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -341,6 +341,31 @@ void pci_restore_msi_state(struct pci_de
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);

+static void pcim_msi_release(void *pcidev)
+{
+ struct pci_dev *dev = pcidev;
+
+ dev->is_msi_managed = false;
+ pci_free_irq_vectors(dev);
+}
+
+/*
+ * Needs to be separate from pcim_release to prevent an ordering problem
+ * vs. msi_device_data_release() in the MSI core code.
+ */
+static int pcim_setup_msi_release(struct pci_dev *dev)
+{
+ int ret;
+
+ if (!pci_is_managed(dev) || dev->is_msi_managed)
+ return 0;
+
+ ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
+ if (!ret)
+ dev->is_msi_managed = true;
+ return ret;
+}
+
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
{
@@ -884,6 +909,10 @@ static int __pci_enable_msi_range(struct
if (nvec > maxvec)
nvec = maxvec;

+ rc = pcim_setup_msi_release(dev);
+ if (rc)
+ return rc;
+
for (;;) {
if (affd) {
nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
@@ -927,6 +956,10 @@ static int __pci_enable_msix_range(struc
if (WARN_ON_ONCE(dev->msix_enabled))
return -EINVAL;

+ rc = pcim_setup_msi_release(dev);
+ if (rc)
+ return rc;
+
for (;;) {
if (affd) {
nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2024,11 +2024,6 @@ static void pcim_release(struct device *
struct pci_devres *this = res;
int i;

- if (dev->msi_enabled)
- pci_disable_msi(dev);
- if (dev->msix_enabled)
- pci_disable_msix(dev);
-
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
if (this->region_mask & (1 << i))
pci_release_region(dev, i);
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -425,7 +425,8 @@ struct pci_dev {
unsigned int ats_enabled:1; /* Address Translation Svc */
unsigned int pasid_enabled:1; /* Process Address Space ID */
unsigned int pri_enabled:1; /* Page Request Interface */
- unsigned int is_managed:1;
+ unsigned int is_managed:1; /* Managed via devres */
+ unsigned int is_msi_managed:1; /* MSI release via devres installed */
unsigned int needs_freset:1; /* Requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;

2021-12-15 17:19:53

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V4 09-02/35] PCI/MSI: Allocate MSI device data on first use

Allocate MSI device data on first use, i.e. when a PCI driver invokes one
of the PCI/MSI enablement functions.

Add a wrapper function to ensure that the ordering vs. pcim_msi_release()
is correct.

Signed-off-by: Thomas Gleixner <[email protected]>
---
V4: Adopted to ensure devres ordering
---
drivers/pci/msi/msi.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)

--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -366,6 +366,19 @@ static int pcim_setup_msi_release(struct
return ret;
}

+/*
+ * Ordering vs. devres: msi device data has to be installed first so that
+ * pcim_msi_release() is invoked before it on device release.
+ */
+static int pci_setup_msi_context(struct pci_dev *dev)
+{
+ int ret = msi_setup_device_data(&dev->dev);
+
+ if (!ret)
+ ret = pcim_setup_msi_release(dev);
+ return ret;
+}
+
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
{
@@ -909,7 +922,7 @@ static int __pci_enable_msi_range(struct
if (nvec > maxvec)
nvec = maxvec;

- rc = pcim_setup_msi_release(dev);
+ rc = pci_setup_msi_context(dev);
if (rc)
return rc;

@@ -956,7 +969,7 @@ static int __pci_enable_msix_range(struc
if (WARN_ON_ONCE(dev->msix_enabled))
return -EINVAL;

- rc = pcim_setup_msi_release(dev);
+ rc = pci_setup_msi_context(dev);
if (rc)
return rc;


2021-12-15 17:46:39

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [patch V4 09-01/35] PCI/MSI: Decouple MSI[-X] disable from pcim_release()

On Wed, Dec 15, 2021 at 06:16:44PM +0100, Thomas Gleixner wrote:
> The MSI core will introduce runtime allocation of MSI related data. This
> data will be devres managed and has to be set up before enabling
> PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release().
>
> The setup order is:
>
> pcim_enable_device()
> devres_alloc(pcim_release...);
> ...
> pci_irq_alloc()
> msi_setup_device_data()
> devres_alloc(msi_device_data_release, ...)
>
> and once the device is released these release functions are invoked in the
> opposite order:
>
> msi_device_data_release()
> ...
> pcim_release()
> pci_disable_msi[x]()
>
> which is obviously wrong, because pci_disable_msi[x]() requires the MSI
> data to be available to tear down the MSI[-X] interrupts.
>
> Remove the MSI[-X] teardown from pcim_release() and add an explicit action
> to be installed on the attempt of enabling PCI/MSI[-X].
>
> This allows the MSI core data allocation to be ordered correctly in a
> subsequent step.
>
> Reported-by: Nishanth Menon <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> V4: New patch


Reviewed-by: Greg Kroah-Hartman <[email protected]>

2021-12-15 17:46:59

by Greg Kroah-Hartman

[permalink] [raw]
Subject: Re: [patch V4 09-02/35] PCI/MSI: Allocate MSI device data on first use

On Wed, Dec 15, 2021 at 06:19:49PM +0100, Thomas Gleixner wrote:
> Allocate MSI device data on first use, i.e. when a PCI driver invokes one
> of the PCI/MSI enablement functions.
>
> Add a wrapper function to ensure that the ordering vs. pcim_msi_release()
> is correct.
>
> Signed-off-by: Thomas Gleixner <[email protected]>

Reviewed-by: Greg Kroah-Hartman <[email protected]>

Subject: [tip: irq/msi] PCI/MSI: Decouple MSI[-X] disable from pcim_release()

The following commit has been merged into the irq/msi branch of tip:

Commit-ID: 3f35d2cf9fbc656db82579d849cc69c373b1ad0d
Gitweb: https://git.kernel.org/tip/3f35d2cf9fbc656db82579d849cc69c373b1ad0d
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 15 Dec 2021 18:16:44 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Thu, 16 Dec 2021 22:16:38 +01:00

PCI/MSI: Decouple MSI[-X] disable from pcim_release()

The MSI core will introduce runtime allocation of MSI related data. This
data will be devres managed and has to be set up before enabling
PCI/MSI[-X]. This would introduce an ordering issue vs. pcim_release().

The setup order is:

pcim_enable_device()
devres_alloc(pcim_release...);
...
pci_irq_alloc()
msi_setup_device_data()
devres_alloc(msi_device_data_release, ...)

and once the device is released these release functions are invoked in the
opposite order:

msi_device_data_release()
...
pcim_release()
pci_disable_msi[x]()

which is obviously wrong, because pci_disable_msi[x]() requires the MSI
data to be available to tear down the MSI[-X] interrupts.

Remove the MSI[-X] teardown from pcim_release() and add an explicit action
to be installed on the attempt of enabling PCI/MSI[-X].

This allows the MSI core data allocation to be ordered correctly in a
subsequent step.

Reported-by: Nishanth Menon <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Michael Kelley <[email protected]>
Tested-by: Nishanth Menon <[email protected]>
Reviewed-by: Greg Kroah-Hartman <[email protected]>
Link: https://lore.kernel.org/r/87tuf9rdoj.ffs@tglx

---
drivers/pci/msi/msi.c | 33 +++++++++++++++++++++++++++++++++
drivers/pci/pci.c | 5 -----
include/linux/pci.h | 3 ++-
3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 5af8d9b..358b63e 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -341,6 +341,31 @@ void pci_restore_msi_state(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_restore_msi_state);

+static void pcim_msi_release(void *pcidev)
+{
+ struct pci_dev *dev = pcidev;
+
+ dev->is_msi_managed = false;
+ pci_free_irq_vectors(dev);
+}
+
+/*
+ * Needs to be separate from pcim_release to prevent an ordering problem
+ * vs. msi_device_data_release() in the MSI core code.
+ */
+static int pcim_setup_msi_release(struct pci_dev *dev)
+{
+ int ret;
+
+ if (!pci_is_managed(dev) || dev->is_msi_managed)
+ return 0;
+
+ ret = devm_add_action(&dev->dev, pcim_msi_release, dev);
+ if (!ret)
+ dev->is_msi_managed = true;
+ return ret;
+}
+
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
{
@@ -884,6 +909,10 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
if (nvec > maxvec)
nvec = maxvec;

+ rc = pcim_setup_msi_release(dev);
+ if (rc)
+ return rc;
+
for (;;) {
if (affd) {
nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
@@ -927,6 +956,10 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
if (WARN_ON_ONCE(dev->msix_enabled))
return -EINVAL;

+ rc = pcim_setup_msi_release(dev);
+ if (rc)
+ return rc;
+
for (;;) {
if (affd) {
nvec = irq_calc_affinity_vectors(minvec, nvec, affd);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 3d2fb39..f3f606c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2024,11 +2024,6 @@ static void pcim_release(struct device *gendev, void *res)
struct pci_devres *this = res;
int i;

- if (dev->msi_enabled)
- pci_disable_msi(dev);
- if (dev->msix_enabled)
- pci_disable_msix(dev);
-
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
if (this->region_mask & (1 << i))
pci_release_region(dev, i);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5cc46ba..a09736d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -425,7 +425,8 @@ struct pci_dev {
unsigned int ats_enabled:1; /* Address Translation Svc */
unsigned int pasid_enabled:1; /* Process Address Space ID */
unsigned int pri_enabled:1; /* Page Request Interface */
- unsigned int is_managed:1;
+ unsigned int is_managed:1; /* Managed via devres */
+ unsigned int is_msi_managed:1; /* MSI release via devres installed */
unsigned int needs_freset:1; /* Requires fundamental reset */
unsigned int state_saved:1;
unsigned int is_physfn:1;

Subject: [tip: irq/msi] PCI/MSI: Allocate MSI device data on first use

The following commit has been merged into the irq/msi branch of tip:

Commit-ID: 93296cd1325d1d9afede60202d8833011c9001f2
Gitweb: https://git.kernel.org/tip/93296cd1325d1d9afede60202d8833011c9001f2
Author: Thomas Gleixner <[email protected]>
AuthorDate: Wed, 15 Dec 2021 18:19:49 +01:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Thu, 16 Dec 2021 22:16:38 +01:00

PCI/MSI: Allocate MSI device data on first use

Allocate MSI device data on first use, i.e. when a PCI driver invokes one
of the PCI/MSI enablement functions.

Add a wrapper function to ensure that the ordering vs. pcim_msi_release()
is correct.

Signed-off-by: Thomas Gleixner <[email protected]>
Tested-by: Michael Kelley <[email protected]>
Tested-by: Nishanth Menon <[email protected]>
Reviewed-by: Greg Kroah-Hartman <[email protected]>
Link: https://lore.kernel.org/r/87r1adrdje.ffs@tglx

---
drivers/pci/msi/msi.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c
index 358b63e..369e3c5 100644
--- a/drivers/pci/msi/msi.c
+++ b/drivers/pci/msi/msi.c
@@ -366,6 +366,19 @@ static int pcim_setup_msi_release(struct pci_dev *dev)
return ret;
}

+/*
+ * Ordering vs. devres: msi device data has to be installed first so that
+ * pcim_msi_release() is invoked before it on device release.
+ */
+static int pci_setup_msi_context(struct pci_dev *dev)
+{
+ int ret = msi_setup_device_data(&dev->dev);
+
+ if (!ret)
+ ret = pcim_setup_msi_release(dev);
+ return ret;
+}
+
static struct msi_desc *
msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
{
@@ -909,7 +922,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
if (nvec > maxvec)
nvec = maxvec;

- rc = pcim_setup_msi_release(dev);
+ rc = pci_setup_msi_context(dev);
if (rc)
return rc;

@@ -956,7 +969,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
if (WARN_ON_ONCE(dev->msix_enabled))
return -EINVAL;

- rc = pcim_setup_msi_release(dev);
+ rc = pci_setup_msi_context(dev);
if (rc)
return rc;