2024-01-12 01:56:14

by Ira Weiny

[permalink] [raw]
Subject: [PATCH] cxl/pci: Skip irq features if MSI/MSI-X are not supported

CXL 3.1 Section 3.1.1 states:

"A Function on a CXL device must not generate INTx messages if
that Function participates in CXL.cache protocol or CXL.mem
protocols."

The generic CXL memory driver only supports devices which use the
CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X
vectors in anticipation of their need for mailbox interrupts or event
processing. However, the above requirement does not require a device to
support interrupts, only that they use MSI/MSI-X. For example, a device
may disable mailbox interrupts and be configured for firmware first
event processing and function well with the driver.

Rather than fail device probe if interrupts are not supported; flag that
irqs are not enabled and avoid features which require interrupts.
Emit messages appropriate for the situation to aid in debugging should
device behavior be unexpected due to a failure to allocate vectors.

Note that it is possible for a device to have host based event
processing through polling. However, the driver does not support
polling and it is not anticipated to be required. Leave that case to
the future if such a device comes along.

Signed-off-by: Ira Weiny <[email protected]>
---
Changes in v1:
- [djbw: remove persistent irq boolean]
- [djbw: Simplify error messages]
- [Alison: spell check]
- [iweiny: test]
- Link to RFC: https://lore.kernel.org/r/[email protected]
---
drivers/cxl/pci.c | 29 +++++++++++++++++++----------
1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 0155fb66b580..bd12b97bb38e 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -381,7 +381,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
return rc;
}

-static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
+static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
@@ -443,6 +443,11 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
return 0;

+ if (!irq_avail) {
+ dev_err(cxlds->dev, "Mailbox irq enabled but no interrupt vectors.\n");
+ return 0;
+ }
+
msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
if (irq < 0)
@@ -587,7 +592,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
}

-static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
+static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
{
int nvecs;

@@ -604,9 +609,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
PCI_IRQ_MSIX | PCI_IRQ_MSI);
if (nvecs < 1) {
dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
- return -ENXIO;
+ return false;
}
- return 0;
+ return true;
}

static irqreturn_t cxl_event_thread(int irq, void *id)
@@ -742,7 +747,7 @@ static bool cxl_event_int_is_fw(u8 setting)
}

static int cxl_event_config(struct pci_host_bridge *host_bridge,
- struct cxl_memdev_state *mds)
+ struct cxl_memdev_state *mds, bool irq_avail)
{
struct cxl_event_interrupt_policy policy;
int rc;
@@ -754,6 +759,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
if (!host_bridge->native_cxl_error)
return 0;

+ if (!irq_avail) {
+ dev_info(mds->cxlds.dev, "No interrupt vectors, no polling, skip event processing.\n");
+ return 0;
+ }
+
rc = cxl_mem_alloc_event_buf(mds);
if (rc)
return rc;
@@ -788,6 +798,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
int i, rc, pmu_count;
+ bool irq_avail;

/*
* Double check the anonymous union trickery in struct cxl_regs
@@ -845,11 +856,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
else
dev_warn(&pdev->dev, "Media not active (%d)\n", rc);

- rc = cxl_alloc_irq_vectors(pdev);
- if (rc)
- return rc;
+ irq_avail = cxl_alloc_irq_vectors(pdev);

- rc = cxl_pci_setup_mailbox(mds);
+ rc = cxl_pci_setup_mailbox(mds, irq_avail);
if (rc)
return rc;

@@ -908,7 +917,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
}

- rc = cxl_event_config(host_bridge, mds);
+ rc = cxl_event_config(host_bridge, mds, irq_avail);
if (rc)
return rc;


---
base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
change-id: 20240108-dont-fail-irq-a96310368f0f

Best regards,
--
Ira Weiny <[email protected]>



2024-01-12 16:06:12

by Dave Jiang

[permalink] [raw]
Subject: Re: [PATCH] cxl/pci: Skip irq features if MSI/MSI-X are not supported



On 1/11/24 18:54, Ira Weiny wrote:
> CXL 3.1 Section 3.1.1 states:
>
> "A Function on a CXL device must not generate INTx messages if
> that Function participates in CXL.cache protocol or CXL.mem
> protocols."
>
> The generic CXL memory driver only supports devices which use the
> CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X
> vectors in anticipation of their need for mailbox interrupts or event
> processing. However, the above requirement does not require a device to
> support interrupts, only that they use MSI/MSI-X. For example, a device
> may disable mailbox interrupts and be configured for firmware first
> event processing and function well with the driver.
>
> Rather than fail device probe if interrupts are not supported; flag that
> irqs are not enabled and avoid features which require interrupts.
> Emit messages appropriate for the situation to aid in debugging should
> device behavior be unexpected due to a failure to allocate vectors.
>
> Note that it is possible for a device to have host based event
> processing through polling. However, the driver does not support
> polling and it is not anticipated to be required. Leave that case to
> the future if such a device comes along.
>
> Signed-off-by: Ira Weiny <[email protected]>

v2?

Reviewed-by: Dave Jiang <[email protected]>

> ---
> Changes in v1:
> - [djbw: remove persistent irq boolean]
> - [djbw: Simplify error messages]
> - [Alison: spell check]
> - [iweiny: test]
> - Link to RFC: https://lore.kernel.org/r/[email protected]
> ---
> drivers/cxl/pci.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 0155fb66b580..bd12b97bb38e 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -381,7 +381,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
> return rc;
> }
>
> -static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_dev_state *cxlds = &mds->cxlds;
> const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> @@ -443,6 +443,11 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
> return 0;
>
> + if (!irq_avail) {
> + dev_err(cxlds->dev, "Mailbox irq enabled but no interrupt vectors.\n");
> + return 0;
> + }
> +
> msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
> irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
> if (irq < 0)
> @@ -587,7 +592,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
> return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
> }
>
> -static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> +static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
> {
> int nvecs;
>
> @@ -604,9 +609,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> PCI_IRQ_MSIX | PCI_IRQ_MSI);
> if (nvecs < 1) {
> dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
> - return -ENXIO;
> + return false;
> }
> - return 0;
> + return true;
> }
>
> static irqreturn_t cxl_event_thread(int irq, void *id)
> @@ -742,7 +747,7 @@ static bool cxl_event_int_is_fw(u8 setting)
> }
>
> static int cxl_event_config(struct pci_host_bridge *host_bridge,
> - struct cxl_memdev_state *mds)
> + struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_event_interrupt_policy policy;
> int rc;
> @@ -754,6 +759,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
> if (!host_bridge->native_cxl_error)
> return 0;
>
> + if (!irq_avail) {
> + dev_info(mds->cxlds.dev, "No interrupt vectors, no polling, skip event processing.\n");
> + return 0;
> + }
> +
> rc = cxl_mem_alloc_event_buf(mds);
> if (rc)
> return rc;
> @@ -788,6 +798,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> struct cxl_register_map map;
> struct cxl_memdev *cxlmd;
> int i, rc, pmu_count;
> + bool irq_avail;
>
> /*
> * Double check the anonymous union trickery in struct cxl_regs
> @@ -845,11 +856,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> else
> dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
>
> - rc = cxl_alloc_irq_vectors(pdev);
> - if (rc)
> - return rc;
> + irq_avail = cxl_alloc_irq_vectors(pdev);
>
> - rc = cxl_pci_setup_mailbox(mds);
> + rc = cxl_pci_setup_mailbox(mds, irq_avail);
> if (rc)
> return rc;
>
> @@ -908,7 +917,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> }
> }
>
> - rc = cxl_event_config(host_bridge, mds);
> + rc = cxl_event_config(host_bridge, mds, irq_avail);
> if (rc)
> return rc;
>
>
> ---
> base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
> change-id: 20240108-dont-fail-irq-a96310368f0f
>
> Best regards,

2024-01-12 17:43:13

by Ira Weiny

[permalink] [raw]
Subject: Re: [PATCH] cxl/pci: Skip irq features if MSI/MSI-X are not supported

Dave Jiang wrote:
>
> On 1/11/24 18:54, Ira Weiny wrote:

[snip]

> >
> > Note that it is possible for a device to have host based event
> > processing through polling. However, the driver does not support
> > polling and it is not anticipated to be required. Leave that case to
> > the future if such a device comes along.
> >
> > Signed-off-by: Ira Weiny <[email protected]>
>
> v2?

Previous was RFC and not tested. This one is tested and ready. Hence V1.

>
> Reviewed-by: Dave Jiang <[email protected]>

Thanks,
Ira

[snip]

2024-01-12 19:40:51

by fan

[permalink] [raw]
Subject: Re: [PATCH] cxl/pci: Skip irq features if MSI/MSI-X are not supported

On Thu, Jan 11, 2024 at 05:54:45PM -0800, Ira Weiny wrote:
> CXL 3.1 Section 3.1.1 states:
>
> "A Function on a CXL device must not generate INTx messages if
> that Function participates in CXL.cache protocol or CXL.mem
> protocols."
>
> The generic CXL memory driver only supports devices which use the
> CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X
> vectors in anticipation of their need for mailbox interrupts or event
> processing. However, the above requirement does not require a device to
> support interrupts, only that they use MSI/MSI-X. For example, a device
> may disable mailbox interrupts and be configured for firmware first
> event processing and function well with the driver.
>
> Rather than fail device probe if interrupts are not supported; flag that
> irqs are not enabled and avoid features which require interrupts.
> Emit messages appropriate for the situation to aid in debugging should
> device behavior be unexpected due to a failure to allocate vectors.
>
> Note that it is possible for a device to have host based event
> processing through polling. However, the driver does not support
> polling and it is not anticipated to be required. Leave that case to
> the future if such a device comes along.
>
> Signed-off-by: Ira Weiny <[email protected]>
> ---

Reviewed-by: Fan Ni <[email protected]>

> Changes in v1:
> - [djbw: remove persistent irq boolean]
> - [djbw: Simplify error messages]
> - [Alison: spell check]
> - [iweiny: test]
> - Link to RFC: https://lore.kernel.org/r/[email protected]
> ---
> drivers/cxl/pci.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 0155fb66b580..bd12b97bb38e 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -381,7 +381,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
> return rc;
> }
>
> -static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_dev_state *cxlds = &mds->cxlds;
> const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> @@ -443,6 +443,11 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
> return 0;
>
> + if (!irq_avail) {
> + dev_err(cxlds->dev, "Mailbox irq enabled but no interrupt vectors.\n");
> + return 0;
> + }
> +
> msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
> irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
> if (irq < 0)
> @@ -587,7 +592,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
> return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
> }
>
> -static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> +static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
> {
> int nvecs;
>
> @@ -604,9 +609,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> PCI_IRQ_MSIX | PCI_IRQ_MSI);
> if (nvecs < 1) {
> dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
> - return -ENXIO;
> + return false;
> }
> - return 0;
> + return true;
> }
>
> static irqreturn_t cxl_event_thread(int irq, void *id)
> @@ -742,7 +747,7 @@ static bool cxl_event_int_is_fw(u8 setting)
> }
>
> static int cxl_event_config(struct pci_host_bridge *host_bridge,
> - struct cxl_memdev_state *mds)
> + struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_event_interrupt_policy policy;
> int rc;
> @@ -754,6 +759,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
> if (!host_bridge->native_cxl_error)
> return 0;
>
> + if (!irq_avail) {
> + dev_info(mds->cxlds.dev, "No interrupt vectors, no polling, skip event processing.\n");
> + return 0;
> + }
> +
> rc = cxl_mem_alloc_event_buf(mds);
> if (rc)
> return rc;
> @@ -788,6 +798,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> struct cxl_register_map map;
> struct cxl_memdev *cxlmd;
> int i, rc, pmu_count;
> + bool irq_avail;
>
> /*
> * Double check the anonymous union trickery in struct cxl_regs
> @@ -845,11 +856,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> else
> dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
>
> - rc = cxl_alloc_irq_vectors(pdev);
> - if (rc)
> - return rc;
> + irq_avail = cxl_alloc_irq_vectors(pdev);
>
> - rc = cxl_pci_setup_mailbox(mds);
> + rc = cxl_pci_setup_mailbox(mds, irq_avail);
> if (rc)
> return rc;
>
> @@ -908,7 +917,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> }
> }
>
> - rc = cxl_event_config(host_bridge, mds);
> + rc = cxl_event_config(host_bridge, mds, irq_avail);
> if (rc)
> return rc;
>
>
> ---
> base-commit: 0dd3ee31125508cd67f7e7172247f05b7fd1753a
> change-id: 20240108-dont-fail-irq-a96310368f0f
>
> Best regards,
> --
> Ira Weiny <[email protected]>
>

2024-01-12 23:17:13

by Dan Williams

[permalink] [raw]
Subject: RE: [PATCH] cxl/pci: Skip irq features if MSI/MSI-X are not supported

Ira Weiny wrote:
> CXL 3.1 Section 3.1.1 states:
>
> "A Function on a CXL device must not generate INTx messages if
> that Function participates in CXL.cache protocol or CXL.mem
> protocols."
>
> The generic CXL memory driver only supports devices which use the
> CXL.mem protocol. The current driver attempts to allocate MSI/MSI-X
> vectors in anticipation of their need for mailbox interrupts or event
> processing. However, the above requirement does not require a device to
> support interrupts, only that they use MSI/MSI-X. For example, a device
> may disable mailbox interrupts and be configured for firmware first
> event processing and function well with the driver.
>
> Rather than fail device probe if interrupts are not supported; flag that
> irqs are not enabled and avoid features which require interrupts.
> Emit messages appropriate for the situation to aid in debugging should
> device behavior be unexpected due to a failure to allocate vectors.
>
> Note that it is possible for a device to have host based event
> processing through polling. However, the driver does not support
> polling and it is not anticipated to be required. Leave that case to
> the future if such a device comes along.
>
> Signed-off-by: Ira Weiny <[email protected]>
> ---
> Changes in v1:
> - [djbw: remove persistent irq boolean]
> - [djbw: Simplify error messages]
> - [Alison: spell check]
> - [iweiny: test]
> - Link to RFC: https://lore.kernel.org/r/[email protected]
> ---
> drivers/cxl/pci.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 0155fb66b580..bd12b97bb38e 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -381,7 +381,7 @@ static int cxl_pci_mbox_send(struct cxl_memdev_state *mds,
> return rc;
> }
>
> -static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> +static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_dev_state *cxlds = &mds->cxlds;
> const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> @@ -443,6 +443,11 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds)
> if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ))
> return 0;
>
> + if (!irq_avail) {
> + dev_err(cxlds->dev, "Mailbox irq enabled but no interrupt vectors.\n");
> + return 0;
> + }

It is just odd for this to be dev_err() when the very next thing this
function does is:

irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
if (irq < 0)
return 0;

..which is silently return if using the interrupt does not work, or
there is a problem with the device where it returns an invalid @msgnum.
Why does one device "bug" deserve a dev_err() callout and the other
doesn't?

So at a minimum the dev_err() should be a dev_dbg(), but I would just
fold the @irq_avail check with the @cap check and be silent, because
nobody will care that this device is not sending mailbox interrupts
polling will just work.

> +
> msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
> irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
> if (irq < 0)
> @@ -587,7 +592,7 @@ static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
> return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
> }
>
> -static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> +static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
> {
> int nvecs;
>
> @@ -604,9 +609,9 @@ static int cxl_alloc_irq_vectors(struct pci_dev *pdev)
> PCI_IRQ_MSIX | PCI_IRQ_MSI);
> if (nvecs < 1) {
> dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
> - return -ENXIO;
> + return false;
> }
> - return 0;
> + return true;
> }
>
> static irqreturn_t cxl_event_thread(int irq, void *id)
> @@ -742,7 +747,7 @@ static bool cxl_event_int_is_fw(u8 setting)
> }
>
> static int cxl_event_config(struct pci_host_bridge *host_bridge,
> - struct cxl_memdev_state *mds)
> + struct cxl_memdev_state *mds, bool irq_avail)
> {
> struct cxl_event_interrupt_policy policy;
> int rc;
> @@ -754,6 +759,11 @@ static int cxl_event_config(struct pci_host_bridge *host_bridge,
> if (!host_bridge->native_cxl_error)
> return 0;
>
> + if (!irq_avail) {
> + dev_info(mds->cxlds.dev, "No interrupt vectors, no polling, skip event processing.\n");

Not going to demand dev_dbg() here just because a device is mandated to
have event command support and it is driver policy to skip enabling it.
However, do drop the "no polling" that's implied. So I think it is:

"No interrupt support, disable event processing."