2023-06-13 03:12:53

by Sui Jingfeng

[permalink] [raw]
Subject: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

From: Sui Jingfeng <[email protected]>

Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
device(pdev->class != 0x0300) out. There no need to process the non-display
PCI device.

Cc: Bjorn Helgaas <[email protected]>
Signed-off-by: Sui Jingfeng <[email protected]>
---
drivers/pci/vgaarb.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
index c1bc6c983932..22a505e877dc 100644
--- a/drivers/pci/vgaarb.c
+++ b/drivers/pci/vgaarb.c
@@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
struct pci_dev *bridge;
u16 cmd;

- /* Only deal with VGA class devices */
- if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
- return false;
-
/* Allocate structure */
vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
if (vgadev == NULL) {
@@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
struct pci_dev *pdev = to_pci_dev(dev);
bool notify = false;

- vgaarb_dbg(dev, "%s\n", __func__);
+ /* Only deal with VGA class devices */
+ if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
+ return 0;

/* For now we're only intereted in devices added and removed. I didn't
* test this thing here, so someone needs to double check for the
@@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
else if (action == BUS_NOTIFY_DEL_DEVICE)
notify = vga_arbiter_del_pci_device(pdev);

+ vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
+
if (notify)
vga_arbiter_notify_clients();
return 0;
@@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {

static int __init vga_arb_device_init(void)
{
+ struct pci_dev *pdev = NULL;
int rc;
- struct pci_dev *pdev;

rc = misc_register(&vga_arb_device);
if (rc < 0)
@@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)

/* We add all PCI devices satisfying VGA class in the arbiter by
* default */
- pdev = NULL;
- while ((pdev =
- pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
- PCI_ANY_ID, pdev)) != NULL)
+ while (1) {
+ pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
+ if (!pdev)
+ break;
+
vga_arbiter_add_pci_device(pdev);
+ }

pr_info("loaded\n");
return rc;
--
2.25.1



2023-06-14 11:11:18

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/13 11:01, Sui Jingfeng wrote:
> From: Sui Jingfeng <[email protected]>
>
> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
> device(pdev->class != 0x0300) out. There no need to process the non-display
> PCI device.
>
> Cc: Bjorn Helgaas <[email protected]>
> Signed-off-by: Sui Jingfeng <[email protected]>
> ---
> drivers/pci/vgaarb.c | 22 ++++++++++++----------
> 1 file changed, 12 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index c1bc6c983932..22a505e877dc 100644
> --- a/drivers/pci/vgaarb.c
> +++ b/drivers/pci/vgaarb.c
> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
> struct pci_dev *bridge;
> u16 cmd;
>
> - /* Only deal with VGA class devices */
> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> - return false;
> -

Hi, here is probably a bug fixing.

For an example, nvidia render only GPU typically has 0x0380.

at its PCI class number, but  render only GPU should not participate in
the arbitration.

As it shouldn't snoop the legacy fixed VGA address.

It(render only GPU) can not display anything.


But 0x0380 >> 8 = 0x03, the filter  failed.


> /* Allocate structure */
> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
> if (vgadev == NULL) {
> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> struct pci_dev *pdev = to_pci_dev(dev);
> bool notify = false;
>
> - vgaarb_dbg(dev, "%s\n", __func__);
> + /* Only deal with VGA class devices */
> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
> + return 0;

So here we only care 0x0300, my initial intent is to make an optimization,

nowadays sane display graphic card should all has 0x0300 as its PCI
class number, is this complete right?

```

#define PCI_BASE_CLASS_DISPLAY        0x03
#define PCI_CLASS_DISPLAY_VGA        0x0300
#define PCI_CLASS_DISPLAY_XGA        0x0301
#define PCI_CLASS_DISPLAY_3D        0x0302
#define PCI_CLASS_DISPLAY_OTHER        0x0380

```

Any ideas ?

> /* For now we're only intereted in devices added and removed. I didn't
> * test this thing here, so someone needs to double check for the
> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> else if (action == BUS_NOTIFY_DEL_DEVICE)
> notify = vga_arbiter_del_pci_device(pdev);
>
> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
> +
> if (notify)
> vga_arbiter_notify_clients();
> return 0;
> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>
> static int __init vga_arb_device_init(void)
> {
> + struct pci_dev *pdev = NULL;
> int rc;
> - struct pci_dev *pdev;
>
> rc = misc_register(&vga_arb_device);
> if (rc < 0)
> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>
> /* We add all PCI devices satisfying VGA class in the arbiter by
> * default */
> - pdev = NULL;
> - while ((pdev =
> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
> - PCI_ANY_ID, pdev)) != NULL)
> + while (1) {
> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
> + if (!pdev)
> + break;
> +
> vga_arbiter_add_pci_device(pdev);
> + }
>
> pr_info("loaded\n");
> return rc;

--
Jingfeng


2023-06-15 21:23:27

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>
> Hi,
>
> On 2023/6/13 11:01, Sui Jingfeng wrote:
> > From: Sui Jingfeng <[email protected]>
> >
> > Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
> > pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
> > device(pdev->class != 0x0300) out. There no need to process the non-display
> > PCI device.
> >
> > Cc: Bjorn Helgaas <[email protected]>
> > Signed-off-by: Sui Jingfeng <[email protected]>
> > ---
> > drivers/pci/vgaarb.c | 22 ++++++++++++----------
> > 1 file changed, 12 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> > index c1bc6c983932..22a505e877dc 100644
> > --- a/drivers/pci/vgaarb.c
> > +++ b/drivers/pci/vgaarb.c
> > @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
> > struct pci_dev *bridge;
> > u16 cmd;
> >
> > - /* Only deal with VGA class devices */
> > - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> > - return false;
> > -
>
> Hi, here is probably a bug fixing.
>
> For an example, nvidia render only GPU typically has 0x0380.
>
> at its PCI class number, but render only GPU should not participate in
> the arbitration.
>
> As it shouldn't snoop the legacy fixed VGA address.
>
> It(render only GPU) can not display anything.
>
>
> But 0x0380 >> 8 = 0x03, the filter failed.
>
>
> > /* Allocate structure */
> > vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
> > if (vgadev == NULL) {
> > @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> > struct pci_dev *pdev = to_pci_dev(dev);
> > bool notify = false;
> >
> > - vgaarb_dbg(dev, "%s\n", __func__);
> > + /* Only deal with VGA class devices */
> > + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
> > + return 0;
>
> So here we only care 0x0300, my initial intent is to make an optimization,
>
> nowadays sane display graphic card should all has 0x0300 as its PCI
> class number, is this complete right?
>
> ```
>
> #define PCI_BASE_CLASS_DISPLAY 0x03
> #define PCI_CLASS_DISPLAY_VGA 0x0300
> #define PCI_CLASS_DISPLAY_XGA 0x0301
> #define PCI_CLASS_DISPLAY_3D 0x0302
> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>
> ```
>
> Any ideas ?

I'm not quite sure what you are asking about here. For vga_arb, we
only care about VGA class devices since those should be on the only
ones that might have VGA routed to them. However, as VGA gets
deprecated, you'll have more non VGA PCI classes for devices which
could be the pre-OS console device.

Alex

>
> > /* For now we're only intereted in devices added and removed. I didn't
> > * test this thing here, so someone needs to double check for the
> > @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> > else if (action == BUS_NOTIFY_DEL_DEVICE)
> > notify = vga_arbiter_del_pci_device(pdev);
> >
> > + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
> > +
> > if (notify)
> > vga_arbiter_notify_clients();
> > return 0;
> > @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
> >
> > static int __init vga_arb_device_init(void)
> > {
> > + struct pci_dev *pdev = NULL;
> > int rc;
> > - struct pci_dev *pdev;
> >
> > rc = misc_register(&vga_arb_device);
> > if (rc < 0)
> > @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
> >
> > /* We add all PCI devices satisfying VGA class in the arbiter by
> > * default */
> > - pdev = NULL;
> > - while ((pdev =
> > - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
> > - PCI_ANY_ID, pdev)) != NULL)
> > + while (1) {
> > + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
> > + if (!pdev)
> > + break;
> > +
> > vga_arbiter_add_pci_device(pdev);
> > + }
> >
> > pr_info("loaded\n");
> > return rc;
>
> --
> Jingfeng
>

2023-06-16 07:48:04

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/16 05:11, Alex Deucher wrote:
> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>> Hi,
>>
>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>> From: Sui Jingfeng <[email protected]>
>>>
>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>> PCI device.
>>>
>>> Cc: Bjorn Helgaas <[email protected]>
>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>> ---
>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>> index c1bc6c983932..22a505e877dc 100644
>>> --- a/drivers/pci/vgaarb.c
>>> +++ b/drivers/pci/vgaarb.c
>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>> struct pci_dev *bridge;
>>> u16 cmd;
>>>
>>> - /* Only deal with VGA class devices */
>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>> - return false;
>>> -
>> Hi, here is probably a bug fixing.
>>
>> For an example, nvidia render only GPU typically has 0x0380.
>>
>> as its PCI class number, but render only GPU should not participate in
>> the arbitration.
>>
>> As it shouldn't snoop the legacy fixed VGA address.
>>
>> It(render only GPU) can not display anything.
>>
>>
>> But 0x0380 >> 8 = 0x03, the filter failed.
>>
>>
>>> /* Allocate structure */
>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>> if (vgadev == NULL) {
>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>> struct pci_dev *pdev = to_pci_dev(dev);
>>> bool notify = false;
>>>
>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>> + /* Only deal with VGA class devices */
>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>> + return 0;
>> So here we only care 0x0300, my initial intent is to make an optimization,
>>
>> nowadays sane display graphic card should all has 0x0300 as its PCI
>> class number, is this complete right?
>>
>> ```
>>
>> #define PCI_BASE_CLASS_DISPLAY 0x03
>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>> #define PCI_CLASS_DISPLAY_3D 0x0302
>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>
>> ```
>>
>> Any ideas ?
> I'm not quite sure what you are asking about here.

To be honest, I'm worried about the PCI devices which has a

PCI_CLASS_DISPLAY_XGA as its PCI class number.

As those devices are very uncommon in the real world.


$ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"


Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,

there no code reference this macro. So I think it seems safe to ignore
the XGA ?


PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
the render-only GPU.

And render-only GPU can't decode the fixed VGA address space, it is safe
to ignore them.


> For vga_arb, we
> only care about VGA class devices since those should be on the only
> ones that might have VGA routed to them.

> However, as VGA gets deprecated,

We need the vgaarb for a system with multiple video card.

Not only because some Legacy VGA devices implemented

on PCI will typically have the same "hard-decoded" addresses;

But also these video card need to participate in the arbitration,

determine the default boot device.


Nowadays, the 'VGA devices' here is stand for the Graphics card

which is capable of display something on the screen.

We still need vgaarb to select the default boot device.


> you'll have more non VGA PCI classes for devices which
> could be the pre-OS console device.

Ah, we still want  do this(by applying this patch) first,

and then we will have the opportunity to see who will crying if
something is broken. Will know more then.

But drop this patch or revise it with more consideration is also
acceptable.


I asking about suggestion and/or review.

> Alex
>
>>> /* For now we're only intereted in devices added and removed. I didn't
>>> * test this thing here, so someone needs to double check for the
>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>> notify = vga_arbiter_del_pci_device(pdev);
>>>
>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>> +
>>> if (notify)
>>> vga_arbiter_notify_clients();
>>> return 0;
>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>
>>> static int __init vga_arb_device_init(void)
>>> {
>>> + struct pci_dev *pdev = NULL;
>>> int rc;
>>> - struct pci_dev *pdev;
>>>
>>> rc = misc_register(&vga_arb_device);
>>> if (rc < 0)
>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>
>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>> * default */
>>> - pdev = NULL;
>>> - while ((pdev =
>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>> - PCI_ANY_ID, pdev)) != NULL)
>>> + while (1) {
>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>> + if (!pdev)
>>> + break;
>>> +
>>> vga_arbiter_add_pci_device(pdev);
>>> + }
>>>
>>> pr_info("loaded\n");
>>> return rc;
>> --
>> Jingfeng
>>
--
Jingfeng


2023-06-16 13:56:04

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>
> Hi,
>
> On 2023/6/16 05:11, Alex Deucher wrote:
> > On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
> >> Hi,
> >>
> >> On 2023/6/13 11:01, Sui Jingfeng wrote:
> >>> From: Sui Jingfeng <[email protected]>
> >>>
> >>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
> >>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
> >>> device(pdev->class != 0x0300) out. There no need to process the non-display
> >>> PCI device.
> >>>
> >>> Cc: Bjorn Helgaas <[email protected]>
> >>> Signed-off-by: Sui Jingfeng <[email protected]>
> >>> ---
> >>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
> >>> 1 file changed, 12 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> >>> index c1bc6c983932..22a505e877dc 100644
> >>> --- a/drivers/pci/vgaarb.c
> >>> +++ b/drivers/pci/vgaarb.c
> >>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
> >>> struct pci_dev *bridge;
> >>> u16 cmd;
> >>>
> >>> - /* Only deal with VGA class devices */
> >>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> >>> - return false;
> >>> -
> >> Hi, here is probably a bug fixing.
> >>
> >> For an example, nvidia render only GPU typically has 0x0380.
> >>
> >> as its PCI class number, but render only GPU should not participate in
> >> the arbitration.
> >>
> >> As it shouldn't snoop the legacy fixed VGA address.
> >>
> >> It(render only GPU) can not display anything.
> >>
> >>
> >> But 0x0380 >> 8 = 0x03, the filter failed.
> >>
> >>
> >>> /* Allocate structure */
> >>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
> >>> if (vgadev == NULL) {
> >>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> >>> struct pci_dev *pdev = to_pci_dev(dev);
> >>> bool notify = false;
> >>>
> >>> - vgaarb_dbg(dev, "%s\n", __func__);
> >>> + /* Only deal with VGA class devices */
> >>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
> >>> + return 0;
> >> So here we only care 0x0300, my initial intent is to make an optimization,
> >>
> >> nowadays sane display graphic card should all has 0x0300 as its PCI
> >> class number, is this complete right?
> >>
> >> ```
> >>
> >> #define PCI_BASE_CLASS_DISPLAY 0x03
> >> #define PCI_CLASS_DISPLAY_VGA 0x0300
> >> #define PCI_CLASS_DISPLAY_XGA 0x0301
> >> #define PCI_CLASS_DISPLAY_3D 0x0302
> >> #define PCI_CLASS_DISPLAY_OTHER 0x0380
> >>
> >> ```
> >>
> >> Any ideas ?
> > I'm not quite sure what you are asking about here.
>
> To be honest, I'm worried about the PCI devices which has a
>
> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>
> As those devices are very uncommon in the real world.
>
>
> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>
>
> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>
> there no code reference this macro. So I think it seems safe to ignore
> the XGA ?
>
>
> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
> the render-only GPU.
>
> And render-only GPU can't decode the fixed VGA address space, it is safe
> to ignore them.
>
>
> > For vga_arb, we
> > only care about VGA class devices since those should be on the only
> > ones that might have VGA routed to them.
>
> > However, as VGA gets deprecated,
>
> We need the vgaarb for a system with multiple video card.
>
> Not only because some Legacy VGA devices implemented
>
> on PCI will typically have the same "hard-decoded" addresses;
>
> But also these video card need to participate in the arbitration,
>
> determine the default boot device.

But couldn't the boot device be determined via what whatever resources
were used by the pre-OS console? I feel like that should be separate
from vgaarb. vgaarb should handle PCI VGA routing and some other
mechanism should be used to determine what device provided the pre-OS
console.

Alex


>
>
> Nowadays, the 'VGA devices' here is stand for the Graphics card
>
> which is capable of display something on the screen.
>
> We still need vgaarb to select the default boot device.
>
>
> > you'll have more non VGA PCI classes for devices which
> > could be the pre-OS console device.
>
> Ah, we still want do this(by applying this patch) first,
>
> and then we will have the opportunity to see who will crying if
> something is broken. Will know more then.
>
> But drop this patch or revise it with more consideration is also
> acceptable.
>
>
> I asking about suggestion and/or review.
>
> > Alex
> >
> >>> /* For now we're only intereted in devices added and removed. I didn't
> >>> * test this thing here, so someone needs to double check for the
> >>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> >>> else if (action == BUS_NOTIFY_DEL_DEVICE)
> >>> notify = vga_arbiter_del_pci_device(pdev);
> >>>
> >>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
> >>> +
> >>> if (notify)
> >>> vga_arbiter_notify_clients();
> >>> return 0;
> >>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
> >>>
> >>> static int __init vga_arb_device_init(void)
> >>> {
> >>> + struct pci_dev *pdev = NULL;
> >>> int rc;
> >>> - struct pci_dev *pdev;
> >>>
> >>> rc = misc_register(&vga_arb_device);
> >>> if (rc < 0)
> >>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
> >>>
> >>> /* We add all PCI devices satisfying VGA class in the arbiter by
> >>> * default */
> >>> - pdev = NULL;
> >>> - while ((pdev =
> >>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
> >>> - PCI_ANY_ID, pdev)) != NULL)
> >>> + while (1) {
> >>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
> >>> + if (!pdev)
> >>> + break;
> >>> +
> >>> vga_arbiter_add_pci_device(pdev);
> >>> + }
> >>>
> >>> pr_info("loaded\n");
> >>> return rc;
> >> --
> >> Jingfeng
> >>
> --
> Jingfeng
>

2023-06-16 14:35:02

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices


On 2023/6/16 21:41, Alex Deucher wrote:
> On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>> Hi,
>>
>> On 2023/6/16 05:11, Alex Deucher wrote:
>>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>>>> Hi,
>>>>
>>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>>>> From: Sui Jingfeng <[email protected]>
>>>>>
>>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>>>> PCI device.
>>>>>
>>>>> Cc: Bjorn Helgaas <[email protected]>
>>>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>>>> ---
>>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>>>> index c1bc6c983932..22a505e877dc 100644
>>>>> --- a/drivers/pci/vgaarb.c
>>>>> +++ b/drivers/pci/vgaarb.c
>>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>>>> struct pci_dev *bridge;
>>>>> u16 cmd;
>>>>>
>>>>> - /* Only deal with VGA class devices */
>>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>>>> - return false;
>>>>> -
>>>> Hi, here is probably a bug fixing.
>>>>
>>>> For an example, nvidia render only GPU typically has 0x0380.
>>>>
>>>> as its PCI class number, but render only GPU should not participate in
>>>> the arbitration.
>>>>
>>>> As it shouldn't snoop the legacy fixed VGA address.
>>>>
>>>> It(render only GPU) can not display anything.
>>>>
>>>>
>>>> But 0x0380 >> 8 = 0x03, the filter failed.
>>>>
>>>>
>>>>> /* Allocate structure */
>>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>>>> if (vgadev == NULL) {
>>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>> struct pci_dev *pdev = to_pci_dev(dev);
>>>>> bool notify = false;
>>>>>
>>>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>>>> + /* Only deal with VGA class devices */
>>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>>>> + return 0;
>>>> So here we only care 0x0300, my initial intent is to make an optimization,
>>>>
>>>> nowadays sane display graphic card should all has 0x0300 as its PCI
>>>> class number, is this complete right?
>>>>
>>>> ```
>>>>
>>>> #define PCI_BASE_CLASS_DISPLAY 0x03
>>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>>>> #define PCI_CLASS_DISPLAY_3D 0x0302
>>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>>>
>>>> ```
>>>>
>>>> Any ideas ?
>>> I'm not quite sure what you are asking about here.
>> To be honest, I'm worried about the PCI devices which has a
>>
>> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>>
>> As those devices are very uncommon in the real world.
>>
>>
>> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>>
>>
>> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>>
>> there no code reference this macro. So I think it seems safe to ignore
>> the XGA ?
>>
>>
>> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
>> the render-only GPU.
>>
>> And render-only GPU can't decode the fixed VGA address space, it is safe
>> to ignore them.
>>
>>
>>> For vga_arb, we
>>> only care about VGA class devices since those should be on the only
>>> ones that might have VGA routed to them.
>>> However, as VGA gets deprecated,
>> We need the vgaarb for a system with multiple video card.
>>
>> Not only because some Legacy VGA devices implemented
>>
>> on PCI will typically have the same "hard-decoded" addresses;
>>
>> But also these video card need to participate in the arbitration,
>>
>> determine the default boot device.
> But couldn't the boot device be determined via what whatever resources
> were used by the pre-OS console?

I don't know what you are refer to by saying  pre-OS console, UEFI
SHELL,  UEFI GOP  or something like that.

If you are referring to the framebuffer driver which light up the screen
before the Linux kernel is loaded .


Then, what you have said is true,  the boot device is determined by the
pre-OS console.

But the problem is how does the Linux kernel(vgaarb) could know which
one is the default boot device

on a multiple GPU machine.  Relaying on the firmware fb's address and
size is what the mechanism

we already in using.


> I feel like that should be separate from vgaarb.

Emm, this really deserved another patch, please ?

> vgaarb should handle PCI VGA routing and some other
> mechanism should be used to determine what device provided the pre-OS
> console.

If the new mechanism need the firmware changed, then this probably break
the old machine.

Also, this probably will get all arch involved. to get the new mechanism
supported.

The testing pressure and review power needed is quite large.

drm/amdgpu and drm/radeon already being used on X86, ARM64,  Mips and
more arch...

The reviewing process will became quite difficult then.

vgaarb is really what we already in use, and being used more than ten
years ...


> Alex
>

>>
>> Nowadays, the 'VGA devices' here is stand for the Graphics card
>>
>> which is capable of display something on the screen.
>>
>> We still need vgaarb to select the default boot device.
>>
>>
>>> you'll have more non VGA PCI classes for devices which
>>> could be the pre-OS console device.
>> Ah, we still want do this(by applying this patch) first,
>>
>> and then we will have the opportunity to see who will crying if
>> something is broken. Will know more then.
>>
>> But drop this patch or revise it with more consideration is also
>> acceptable.
>>
>>
>> I asking about suggestion and/or review.
>>
>>> Alex
>>>
>>>>> /* For now we're only intereted in devices added and removed. I didn't
>>>>> * test this thing here, so someone needs to double check for the
>>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>>>> notify = vga_arbiter_del_pci_device(pdev);
>>>>>
>>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>>>> +
>>>>> if (notify)
>>>>> vga_arbiter_notify_clients();
>>>>> return 0;
>>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>>>
>>>>> static int __init vga_arb_device_init(void)
>>>>> {
>>>>> + struct pci_dev *pdev = NULL;
>>>>> int rc;
>>>>> - struct pci_dev *pdev;
>>>>>
>>>>> rc = misc_register(&vga_arb_device);
>>>>> if (rc < 0)
>>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>>>
>>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>>>> * default */
>>>>> - pdev = NULL;
>>>>> - while ((pdev =
>>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>>>> - PCI_ANY_ID, pdev)) != NULL)
>>>>> + while (1) {
>>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>>>> + if (!pdev)
>>>>> + break;
>>>>> +
>>>>> vga_arbiter_add_pci_device(pdev);
>>>>> + }
>>>>>
>>>>> pr_info("loaded\n");
>>>>> return rc;
>>>> --
>>>> Jingfeng
>>>>
>> --
>> Jingfeng
>>
--
Jingfeng


2023-06-16 15:03:02

by Alex Deucher

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng <[email protected]> wrote:
>
>
> On 2023/6/16 21:41, Alex Deucher wrote:
> > On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
> >> Hi,
> >>
> >> On 2023/6/16 05:11, Alex Deucher wrote:
> >>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
> >>>> Hi,
> >>>>
> >>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
> >>>>> From: Sui Jingfeng <[email protected]>
> >>>>>
> >>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
> >>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
> >>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
> >>>>> PCI device.
> >>>>>
> >>>>> Cc: Bjorn Helgaas <[email protected]>
> >>>>> Signed-off-by: Sui Jingfeng <[email protected]>
> >>>>> ---
> >>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
> >>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> >>>>> index c1bc6c983932..22a505e877dc 100644
> >>>>> --- a/drivers/pci/vgaarb.c
> >>>>> +++ b/drivers/pci/vgaarb.c
> >>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
> >>>>> struct pci_dev *bridge;
> >>>>> u16 cmd;
> >>>>>
> >>>>> - /* Only deal with VGA class devices */
> >>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
> >>>>> - return false;
> >>>>> -
> >>>> Hi, here is probably a bug fixing.
> >>>>
> >>>> For an example, nvidia render only GPU typically has 0x0380.
> >>>>
> >>>> as its PCI class number, but render only GPU should not participate in
> >>>> the arbitration.
> >>>>
> >>>> As it shouldn't snoop the legacy fixed VGA address.
> >>>>
> >>>> It(render only GPU) can not display anything.
> >>>>
> >>>>
> >>>> But 0x0380 >> 8 = 0x03, the filter failed.
> >>>>
> >>>>
> >>>>> /* Allocate structure */
> >>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
> >>>>> if (vgadev == NULL) {
> >>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> >>>>> struct pci_dev *pdev = to_pci_dev(dev);
> >>>>> bool notify = false;
> >>>>>
> >>>>> - vgaarb_dbg(dev, "%s\n", __func__);
> >>>>> + /* Only deal with VGA class devices */
> >>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
> >>>>> + return 0;
> >>>> So here we only care 0x0300, my initial intent is to make an optimization,
> >>>>
> >>>> nowadays sane display graphic card should all has 0x0300 as its PCI
> >>>> class number, is this complete right?
> >>>>
> >>>> ```
> >>>>
> >>>> #define PCI_BASE_CLASS_DISPLAY 0x03
> >>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
> >>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
> >>>> #define PCI_CLASS_DISPLAY_3D 0x0302
> >>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
> >>>>
> >>>> ```
> >>>>
> >>>> Any ideas ?
> >>> I'm not quite sure what you are asking about here.
> >> To be honest, I'm worried about the PCI devices which has a
> >>
> >> PCI_CLASS_DISPLAY_XGA as its PCI class number.
> >>
> >> As those devices are very uncommon in the real world.
> >>
> >>
> >> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
> >>
> >>
> >> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
> >>
> >> there no code reference this macro. So I think it seems safe to ignore
> >> the XGA ?
> >>
> >>
> >> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
> >> the render-only GPU.
> >>
> >> And render-only GPU can't decode the fixed VGA address space, it is safe
> >> to ignore them.
> >>
> >>
> >>> For vga_arb, we
> >>> only care about VGA class devices since those should be on the only
> >>> ones that might have VGA routed to them.
> >>> However, as VGA gets deprecated,
> >> We need the vgaarb for a system with multiple video card.
> >>
> >> Not only because some Legacy VGA devices implemented
> >>
> >> on PCI will typically have the same "hard-decoded" addresses;
> >>
> >> But also these video card need to participate in the arbitration,
> >>
> >> determine the default boot device.
> > But couldn't the boot device be determined via what whatever resources
> > were used by the pre-OS console?
>
> I don't know what you are refer to by saying pre-OS console, UEFI
> SHELL, UEFI GOP or something like that.
>

Right. Before the OS loads the platform firmware generally sets up
something for display. That could be GOP or vesa or some other
platform specific protocol.

> If you are referring to the framebuffer driver which light up the screen
> before the Linux kernel is loaded .
>
>
> Then, what you have said is true, the boot device is determined by the
> pre-OS console.
>
> But the problem is how does the Linux kernel(vgaarb) could know which
> one is the default boot device
>
> on a multiple GPU machine. Relaying on the firmware fb's address and
> size is what the mechanism
>
> we already in using.

Right. It shouldn't need to depend on vgaarb.

>
>
> > I feel like that should be separate from vgaarb.
>
> Emm, this really deserved another patch, please ?
>
> > vgaarb should handle PCI VGA routing and some other
> > mechanism should be used to determine what device provided the pre-OS
> > console.
>
> If the new mechanism need the firmware changed, then this probably break
> the old machine.
>
> Also, this probably will get all arch involved. to get the new mechanism
> supported.
>
> The testing pressure and review power needed is quite large.
>
> drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and
> more arch...
>
> The reviewing process will became quite difficult then.
>
> vgaarb is really what we already in use, and being used more than ten
> years ...

Yes, it works for x86 (and a few other platforms) today because of the
VGA legacy, so we can look at VGA routing to determine this. But even
today, we don't need VGA routing to determine what was the primary
display before starting the OS. We could probably have a platform
independent way to handle this by looking at the bread crumbs leftover
from the pre-OS environment. E.g., for pre-UEFI platforms, we can
look at VGA routing. For UEFI platforms we can look at what GOP left
us. For various non-UEFI ARM/PPC/MIPS/etc. platforms we can look at
whatever breadcrumbs those pre-OS environments left. That way when
VGA goes away, we can have a clean break and you won't need vgaarb if
the platform has no VGA devices.

Alex

>
>
> > Alex
> >
>
> >>
> >> Nowadays, the 'VGA devices' here is stand for the Graphics card
> >>
> >> which is capable of display something on the screen.
> >>
> >> We still need vgaarb to select the default boot device.
> >>
> >>
> >>> you'll have more non VGA PCI classes for devices which
> >>> could be the pre-OS console device.
> >> Ah, we still want do this(by applying this patch) first,
> >>
> >> and then we will have the opportunity to see who will crying if
> >> something is broken. Will know more then.
> >>
> >> But drop this patch or revise it with more consideration is also
> >> acceptable.
> >>
> >>
> >> I asking about suggestion and/or review.
> >>
> >>> Alex
> >>>
> >>>>> /* For now we're only intereted in devices added and removed. I didn't
> >>>>> * test this thing here, so someone needs to double check for the
> >>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> >>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
> >>>>> notify = vga_arbiter_del_pci_device(pdev);
> >>>>>
> >>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
> >>>>> +
> >>>>> if (notify)
> >>>>> vga_arbiter_notify_clients();
> >>>>> return 0;
> >>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
> >>>>>
> >>>>> static int __init vga_arb_device_init(void)
> >>>>> {
> >>>>> + struct pci_dev *pdev = NULL;
> >>>>> int rc;
> >>>>> - struct pci_dev *pdev;
> >>>>>
> >>>>> rc = misc_register(&vga_arb_device);
> >>>>> if (rc < 0)
> >>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
> >>>>>
> >>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
> >>>>> * default */
> >>>>> - pdev = NULL;
> >>>>> - while ((pdev =
> >>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
> >>>>> - PCI_ANY_ID, pdev)) != NULL)
> >>>>> + while (1) {
> >>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
> >>>>> + if (!pdev)
> >>>>> + break;
> >>>>> +
> >>>>> vga_arbiter_add_pci_device(pdev);
> >>>>> + }
> >>>>>
> >>>>> pr_info("loaded\n");
> >>>>> return rc;
> >>>> --
> >>>> Jingfeng
> >>>>
> >> --
> >> Jingfeng
> >>
> --
> Jingfeng
>

2023-06-16 16:00:49

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/16 22:34, Alex Deucher wrote:
> On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng <[email protected]> wrote:
>>
>> On 2023/6/16 21:41, Alex Deucher wrote:
>>> On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>>>> Hi,
>>>>
>>>> On 2023/6/16 05:11, Alex Deucher wrote:
>>>>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>>>>>> From: Sui Jingfeng <[email protected]>
>>>>>>>
>>>>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>>>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>>>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>>>>>> PCI device.
>>>>>>>
>>>>>>> Cc: Bjorn Helgaas <[email protected]>
>>>>>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>>>>>> ---
>>>>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>>>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>>>>>> index c1bc6c983932..22a505e877dc 100644
>>>>>>> --- a/drivers/pci/vgaarb.c
>>>>>>> +++ b/drivers/pci/vgaarb.c
>>>>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>>>>>> struct pci_dev *bridge;
>>>>>>> u16 cmd;
>>>>>>>
>>>>>>> - /* Only deal with VGA class devices */
>>>>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>>>>>> - return false;
>>>>>>> -
>>>>>> Hi, here is probably a bug fixing.
>>>>>>
>>>>>> For an example, nvidia render only GPU typically has 0x0380.
>>>>>>
>>>>>> as its PCI class number, but render only GPU should not participate in
>>>>>> the arbitration.
>>>>>>
>>>>>> As it shouldn't snoop the legacy fixed VGA address.
>>>>>>
>>>>>> It(render only GPU) can not display anything.
>>>>>>
>>>>>>
>>>>>> But 0x0380 >> 8 = 0x03, the filter failed.
>>>>>>
>>>>>>
>>>>>>> /* Allocate structure */
>>>>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>>>>>> if (vgadev == NULL) {
>>>>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> bool notify = false;
>>>>>>>
>>>>>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>>>>>> + /* Only deal with VGA class devices */
>>>>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>>>>>> + return 0;
>>>>>> So here we only care 0x0300, my initial intent is to make an optimization,
>>>>>>
>>>>>> nowadays sane display graphic card should all has 0x0300 as its PCI
>>>>>> class number, is this complete right?
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> #define PCI_BASE_CLASS_DISPLAY 0x03
>>>>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>>>>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>>>>>> #define PCI_CLASS_DISPLAY_3D 0x0302
>>>>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> Any ideas ?
>>>>> I'm not quite sure what you are asking about here.
>>>> To be honest, I'm worried about the PCI devices which has a
>>>>
>>>> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>>>>
>>>> As those devices are very uncommon in the real world.
>>>>
>>>>
>>>> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>>>>
>>>>
>>>> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>>>>
>>>> there no code reference this macro. So I think it seems safe to ignore
>>>> the XGA ?
>>>>
>>>>
>>>> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
>>>> the render-only GPU.
>>>>
>>>> And render-only GPU can't decode the fixed VGA address space, it is safe
>>>> to ignore them.
>>>>
>>>>
>>>>> For vga_arb, we
>>>>> only care about VGA class devices since those should be on the only
>>>>> ones that might have VGA routed to them.
>>>>> However, as VGA gets deprecated,
>>>> We need the vgaarb for a system with multiple video card.
>>>>
>>>> Not only because some Legacy VGA devices implemented
>>>>
>>>> on PCI will typically have the same "hard-decoded" addresses;
>>>>
>>>> But also these video card need to participate in the arbitration,
>>>>
>>>> determine the default boot device.
>>> But couldn't the boot device be determined via what whatever resources
>>> were used by the pre-OS console?
>> I don't know what you are refer to by saying pre-OS console, UEFI
>> SHELL, UEFI GOP or something like that.
>>
> Right. Before the OS loads the platform firmware generally sets up
> something for display. That could be GOP or vesa or some other
> platform specific protocol.
>
>> If you are referring to the framebuffer driver which light up the screen
>> before the Linux kernel is loaded .
>>
>>
>> Then, what you have said is true, the boot device is determined by the
>> pre-OS console.
>>
>> But the problem is how does the Linux kernel(vgaarb) could know which
>> one is the default boot device
>>
>> on a multiple GPU machine. Relaying on the firmware fb's address and
>> size is what the mechanism
>>
>> we already in using.
> Right. It shouldn't need to depend on vgaarb.
>
>>
>>> I feel like that should be separate from vgaarb.
>> Emm, this really deserved another patch, please ?
>>
>>> vgaarb should handle PCI VGA routing and some other
>>> mechanism should be used to determine what device provided the pre-OS
>>> console.
>> If the new mechanism need the firmware changed, then this probably break
>> the old machine.
>>
>> Also, this probably will get all arch involved. to get the new mechanism
>> supported.
>>
>> The testing pressure and review power needed is quite large.
>>
>> drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and
>> more arch...
>>
>> The reviewing process will became quite difficult then.
>>
>> vgaarb is really what we already in use, and being used more than ten
>> years ...
> Yes, it works for x86 (and a few other platforms) today because of the
> VGA legacy, so we can look at VGA routing to determine this. But even
> today, we don't need VGA routing to determine what was the primary
> display before starting the OS. We could probably have a platform
> independent way to handle this by looking at the bread crumbs leftover
> from the pre-OS environment. E.g., for pre-UEFI platforms, we can
> look at VGA routing. For UEFI platforms we can look at what GOP left
> us. For various non-UEFI ARM/PPC/MIPS/etc. platforms we can look at
> whatever breadcrumbs those pre-OS environments left. That way when
> VGA goes away, we can have a clean break and you won't need vgaarb if
> the platform has no VGA devices.

Yes, I'm complete agree the spirit you are convey here.

Patch 5[1] of this series is actually does what you have told me just now.

I put this function in video/aperture intended.


Its does not rely on "VGA routing".

Its also does not required the display device is PCI vga device.

Platform display controller drivers(drm/ingenic, drm/imx, drm/rockchip etc)

can also call this function(aperture_contain_firmware_fb).

It only require the platform has a firmware framebuffer driver
registered successfully.

Both EFIFB are SIMPLEFB will be OK.


Beside  this, on systems with a platform display controller device and a
PCI GPU device,

the current implement may always set the PCI GPU device as the default
boot device.

This is probably true for the arm64 platform.


On the past, loongson LS2K1000 SoC  has a platform display controller,

the SoC also has PCIE controller, So drm/radeon driver can be used on it.

When discrete card is mounted on the system, the discrete gpu is always been

selected as the default boot device in the past.

Because radeon gpu is more faster, this behavior meet the user's
expectation by accident.

Its funny and lucky.


But the problem is sometime you want to use the integrated one,

for example in the process of developing driver, or discrete GPU driver

has a bug panic X server,  the user(or the device driver) don't has the
right to override.


Also with the current implement, even you disable the device driver.

vgaarb still make the decision for you,  which made the X server
confused and panic.


Say on machine with loongson integrated display controller and a rx5700 
machine.

By passing the cmdline "modprobe.blacklist=amdgpu", then vgaarb will not
agree.

He still mark the rx5700 as the default boot device. Then X server crash.

Because xf86-video-amdgpu(still loaded) has no root in the kernel do the
service for it.


I'm doing such a thing because I need developing the drivers for this

integrated display controller, but I don't want unmounted the discrete
GPU every time.

Because I also need to do the PRIME buffer sharing developing and test.


For AMD APU, this is also the case.

R5 200GE and 3000G processor has the integrated GPU,

On a machine with R5 5600G + nvidia gtx1060, a user may want to compare

which GPU is more powerful. But with the current implement, both the device

driver and the end user don't have a choice.


This is the reason why patch 6, 7 and 8[2] of this series is introduced.

Device driver could do the force override if it isn't the default device.

(by introducing another kernel cmd line)

Because of device driver(.ko) get loaded latter than vgaarb.


So feel free send a reviewed by ?


[1] https://patchwork.freedesktop.org/patch/542253/?series=119250&rev=1

[2] https://patchwork.freedesktop.org/patch/542255/?series=119250&rev=1

> Alex
>
>>
>>> Alex
>>>
>>>> Nowadays, the 'VGA devices' here is stand for the Graphics card
>>>>
>>>> which is capable of display something on the screen.
>>>>
>>>> We still need vgaarb to select the default boot device.
>>>>
>>>>
>>>>> you'll have more non VGA PCI classes for devices which
>>>>> could be the pre-OS console device.
>>>> Ah, we still want do this(by applying this patch) first,
>>>>
>>>> and then we will have the opportunity to see who will crying if
>>>> something is broken. Will know more then.
>>>>
>>>> But drop this patch or revise it with more consideration is also
>>>> acceptable.
>>>>
>>>>
>>>> I asking about suggestion and/or review.
>>>>
>>>>> Alex
>>>>>
>>>>>>> /* For now we're only intereted in devices added and removed. I didn't
>>>>>>> * test this thing here, so someone needs to double check for the
>>>>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>>>>>> notify = vga_arbiter_del_pci_device(pdev);
>>>>>>>
>>>>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>>>>>> +
>>>>>>> if (notify)
>>>>>>> vga_arbiter_notify_clients();
>>>>>>> return 0;
>>>>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>>>>>
>>>>>>> static int __init vga_arb_device_init(void)
>>>>>>> {
>>>>>>> + struct pci_dev *pdev = NULL;
>>>>>>> int rc;
>>>>>>> - struct pci_dev *pdev;
>>>>>>>
>>>>>>> rc = misc_register(&vga_arb_device);
>>>>>>> if (rc < 0)
>>>>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>>>>>
>>>>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>>>>>> * default */
>>>>>>> - pdev = NULL;
>>>>>>> - while ((pdev =
>>>>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>>>>>> - PCI_ANY_ID, pdev)) != NULL)
>>>>>>> + while (1) {
>>>>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>>>>>> + if (!pdev)
>>>>>>> + break;
>>>>>>> +
>>>>>>> vga_arbiter_add_pci_device(pdev);
>>>>>>> + }
>>>>>>>
>>>>>>> pr_info("loaded\n");
>>>>>>> return rc;
>>>>>> --
>>>>>> Jingfeng
>>>>>>
>>>> --
>>>> Jingfeng
>>>>
>> --
>> Jingfeng
>>
--
Jingfeng


2023-06-18 12:43:15

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/16 22:34, Alex Deucher wrote:
> On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng <[email protected]> wrote:
>>
>> On 2023/6/16 21:41, Alex Deucher wrote:
>>> On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>>>> Hi,
>>>>
>>>> On 2023/6/16 05:11, Alex Deucher wrote:
>>>>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>>>>>> From: Sui Jingfeng <[email protected]>
>>>>>>>
>>>>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>>>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>>>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>>>>>> PCI device.
>>>>>>>
>>>>>>> Cc: Bjorn Helgaas <[email protected]>
>>>>>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>>>>>> ---
>>>>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>>>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>>>>>> index c1bc6c983932..22a505e877dc 100644
>>>>>>> --- a/drivers/pci/vgaarb.c
>>>>>>> +++ b/drivers/pci/vgaarb.c
>>>>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>>>>>> struct pci_dev *bridge;
>>>>>>> u16 cmd;
>>>>>>>
>>>>>>> - /* Only deal with VGA class devices */
>>>>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>>>>>> - return false;
>>>>>>> -
>>>>>> Hi, here is probably a bug fixing.
>>>>>>
>>>>>> For an example, nvidia render only GPU typically has 0x0380.
>>>>>>
>>>>>> as its PCI class number, but render only GPU should not participate in
>>>>>> the arbitration.
>>>>>>
>>>>>> As it shouldn't snoop the legacy fixed VGA address.
>>>>>>
>>>>>> It(render only GPU) can not display anything.
>>>>>>
>>>>>>
>>>>>> But 0x0380 >> 8 = 0x03, the filter failed.
>>>>>>
>>>>>>
>>>>>>> /* Allocate structure */
>>>>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>>>>>> if (vgadev == NULL) {
>>>>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> bool notify = false;
>>>>>>>
>>>>>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>>>>>> + /* Only deal with VGA class devices */
>>>>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>>>>>> + return 0;
>>>>>> So here we only care 0x0300, my initial intent is to make an optimization,
>>>>>>
>>>>>> nowadays sane display graphic card should all has 0x0300 as its PCI
>>>>>> class number, is this complete right?
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> #define PCI_BASE_CLASS_DISPLAY 0x03
>>>>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>>>>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>>>>>> #define PCI_CLASS_DISPLAY_3D 0x0302
>>>>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> Any ideas ?
>>>>> I'm not quite sure what you are asking about here.
>>>> To be honest, I'm worried about the PCI devices which has a
>>>>
>>>> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>>>>
>>>> As those devices are very uncommon in the real world.
>>>>
>>>>
>>>> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>>>>
>>>>
>>>> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>>>>
>>>> there no code reference this macro. So I think it seems safe to ignore
>>>> the XGA ?
>>>>
>>>>
>>>> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
>>>> the render-only GPU.
>>>>
>>>> And render-only GPU can't decode the fixed VGA address space, it is safe
>>>> to ignore them.
>>>>
>>>>
>>>>> For vga_arb, we
>>>>> only care about VGA class devices since those should be on the only
>>>>> ones that might have VGA routed to them.
>>>>> However, as VGA gets deprecated,
>>>> We need the vgaarb for a system with multiple video card.
>>>>
>>>> Not only because some Legacy VGA devices implemented
>>>>
>>>> on PCI will typically have the same "hard-decoded" addresses;
>>>>
>>>> But also these video card need to participate in the arbitration,
>>>>
>>>> determine the default boot device.
>>> But couldn't the boot device be determined via what whatever resources
>>> were used by the pre-OS console?
>> I don't know what you are refer to by saying pre-OS console, UEFI
>> SHELL, UEFI GOP or something like that.
>>
> Right. Before the OS loads the platform firmware generally sets up
> something for display. That could be GOP or vesa or some other
> platform specific protocol.
>
>> If you are referring to the framebuffer driver which light up the screen
>> before the Linux kernel is loaded .
>>
>>
>> Then, what you have said is true, the boot device is determined by the
>> pre-OS console.
>>
>> But the problem is how does the Linux kernel(vgaarb) could know which
>> one is the default boot device
>>
>> on a multiple GPU machine. Relaying on the firmware fb's address and
>> size is what the mechanism
>>
>> we already in using.
> Right. It shouldn't need to depend on vgaarb.
>
>>
>>> I feel like that should be separate from vgaarb.
>> Emm, this really deserved another patch, please ?
>>
>>> vgaarb should handle PCI VGA routing and some other
>>> mechanism should be used to determine what device provided the pre-OS
>>> console.
>> If the new mechanism need the firmware changed, then this probably break
>> the old machine.
>>
>> Also, this probably will get all arch involved. to get the new mechanism
>> supported.
>>
>> The testing pressure and review power needed is quite large.
>>
>> drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and
>> more arch...
>>
>> The reviewing process will became quite difficult then.
>>
>> vgaarb is really what we already in use, and being used more than ten
>> years ...

This base class is defined for all types of display controllers.

For VGA devices (Sub-Class 00h), the Programming Interface byte is
divided into a bit field that identifies additional video controller
compatibilities.

A device can support multiple interfaces by using the bit map to
indicate which interfaces are supported.

For the XGA devices (Sub-Class 01h), only the standard XGA interface is
defined.

Sub-Class 02h is for controllers that have hardware support for 3D
operations and are not VGA compatible.


> Yes, it works for x86 (and a few other platforms) today because of the
> VGA legacy, so we can look at VGA routing to determine this. But even
> today, we don't need VGA routing to determine what was the primary
> display before starting the OS. We could probably have a platform
> independent way to handle this by looking at the bread crumbs leftover
> from the pre-OS environment.

Yes, I agree with you.

> E.g., for pre-UEFI platforms, we can
> look at VGA routing. For UEFI platforms we can look at what GOP left
> us. For various non-UEFI ARM/PPC/MIPS/etc. platforms we can look at
> whatever breadcrumbs those pre-OS environments left. That way when
> VGA goes away, we can have a clean break and you won't need vgaarb if
> the platform has no VGA devices.

Yes, I agree with you.


Then, move vga_is_firmware_default() function to video/aperture.c also ?

Because this function shouldn't be platform dependent,

can be usable as long as the PCI resource relocation don't happen  (The
vram bar don't move),

And screen_info is more about video specifci thing.


Yes your are right, it seems that the selection of the default boot
device shouldn't depend on vgaarb.

As vgaarb is only for PCI vga compatible display controller.

It seems that platform display controller device should also
participated in the arbitration.

Emm, but that may a bit difficult. Because we rely on the PCI notifier
to snoop the bound between the drm driver and device,

call back to use if successful. So, what should I do next?  as a first step?

> Alex
>
>>
>>> Alex
>>>
>>>> Nowadays, the 'VGA devices' here is stand for the Graphics card
>>>>
>>>> which is capable of display something on the screen.
>>>>
>>>> We still need vgaarb to select the default boot device.
>>>>
>>>>
>>>>> you'll have more non VGA PCI classes for devices which
>>>>> could be the pre-OS console device.
>>>> Ah, we still want do this(by applying this patch) first,
>>>>
>>>> and then we will have the opportunity to see who will crying if
>>>> something is broken. Will know more then.
>>>>
>>>> But drop this patch or revise it with more consideration is also
>>>> acceptable.
>>>>
>>>>
>>>> I asking about suggestion and/or review.
>>>>
>>>>> Alex
>>>>>
>>>>>>> /* For now we're only intereted in devices added and removed. I didn't
>>>>>>> * test this thing here, so someone needs to double check for the
>>>>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>>>>>> notify = vga_arbiter_del_pci_device(pdev);
>>>>>>>
>>>>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>>>>>> +
>>>>>>> if (notify)
>>>>>>> vga_arbiter_notify_clients();
>>>>>>> return 0;
>>>>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>>>>>
>>>>>>> static int __init vga_arb_device_init(void)
>>>>>>> {
>>>>>>> + struct pci_dev *pdev = NULL;
>>>>>>> int rc;
>>>>>>> - struct pci_dev *pdev;
>>>>>>>
>>>>>>> rc = misc_register(&vga_arb_device);
>>>>>>> if (rc < 0)
>>>>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>>>>>
>>>>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>>>>>> * default */
>>>>>>> - pdev = NULL;
>>>>>>> - while ((pdev =
>>>>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>>>>>> - PCI_ANY_ID, pdev)) != NULL)
>>>>>>> + while (1) {
>>>>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>>>>>> + if (!pdev)
>>>>>>> + break;
>>>>>>> +
>>>>>>> vga_arbiter_add_pci_device(pdev);
>>>>>>> + }
>>>>>>>
>>>>>>> pr_info("loaded\n");
>>>>>>> return rc;
>>>>>> --
>>>>>> Jingfeng
>>>>>>
>>>> --
>>>> Jingfeng
>>>>
>> --
>> Jingfeng
>>
--
Jingfeng


2023-06-19 02:29:41

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/18 20:11, Sui Jingfeng wrote:
> call back to use if successful


Call back to us if the drm device driver bound to the PCI GPU successfully


2023-06-19 02:56:14

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices


On 2023/6/18 20:11, Sui Jingfeng wrote:
> And screen_info is more about video specifci thing.


screen_info is something more about video specific thing.


2023-06-19 03:12:38

by Maciej W. Rozycki

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

On Tue, 13 Jun 2023, Sui Jingfeng wrote:

> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the

Typo here: s/devcie/device/.

> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
> device(pdev->class != 0x0300) out. There no need to process the non-display
> PCI device.

I've only come across this patch series now. Without diving into what
this code actually does I have just one question as a matter of interest.

> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
> index c1bc6c983932..22a505e877dc 100644
> --- a/drivers/pci/vgaarb.c
> +++ b/drivers/pci/vgaarb.c
> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
> struct pci_dev *pdev = to_pci_dev(dev);
> bool notify = false;
>
> - vgaarb_dbg(dev, "%s\n", __func__);
> + /* Only deal with VGA class devices */
> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
> + return 0;

Hmm, shouldn't this also handle PCI_CLASS_NOT_DEFINED_VGA? As far as I
know it is the equivalent of PCI_CLASS_DISPLAY_VGA for PCI <= 2.0 devices
that were implemented before the idea of PCI device classes has developed
into its current form. I may have such a VGA device somewhere.

Maciej

2023-06-19 04:03:32

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/19 11:02, Maciej W. Rozycki wrote:
> On Tue, 13 Jun 2023, Sui Jingfeng wrote:
>
>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
> Typo here: s/devcie/device/.
Thanks a lot,
>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>> device(pdev->class != 0x0300) out. There no need to process the non-display
>> PCI device.
> I've only come across this patch series now. Without diving into what
> this code actually does I have just one question as a matter of interest.
>
>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>> index c1bc6c983932..22a505e877dc 100644
>> --- a/drivers/pci/vgaarb.c
>> +++ b/drivers/pci/vgaarb.c
>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>> struct pci_dev *pdev = to_pci_dev(dev);
>> bool notify = false;
>>
>> - vgaarb_dbg(dev, "%s\n", __func__);
>> + /* Only deal with VGA class devices */
>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>> + return 0;
> Hmm, shouldn't this also handle PCI_CLASS_NOT_DEFINED_VGA?

If your machine have only one such a VGA card, it probably don't hurt.

But, such a card will also get ignored originally (before applying this
patch).

> As far as I
> know it is the equivalent of PCI_CLASS_DISPLAY_VGA for PCI <= 2.0 devices
> that were implemented before the idea of PCI device classes has developed
> into its current form.

If multiple video card problems on your machine is matter,

then I think it do deserve another patch to clarify this issue and to
explain the rationale.

> I may have such a VGA device somewhere.
>
> Maciej

2023-06-21 08:09:17

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/16 22:34, Alex Deucher wrote:
> On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng <[email protected]> wrote:
>>
>> On 2023/6/16 21:41, Alex Deucher wrote:
>>> On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>>>> Hi,
>>>>
>>>> On 2023/6/16 05:11, Alex Deucher wrote:
>>>>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>>>>>> From: Sui Jingfeng <[email protected]>
>>>>>>>
>>>>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>>>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>>>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>>>>>> PCI device.
>>>>>>>
>>>>>>> Cc: Bjorn Helgaas <[email protected]>
>>>>>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>>>>>> ---
>>>>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>>>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>>>>>> index c1bc6c983932..22a505e877dc 100644
>>>>>>> --- a/drivers/pci/vgaarb.c
>>>>>>> +++ b/drivers/pci/vgaarb.c
>>>>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>>>>>> struct pci_dev *bridge;
>>>>>>> u16 cmd;
>>>>>>>
>>>>>>> - /* Only deal with VGA class devices */
>>>>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>>>>>> - return false;
>>>>>>> -
>>>>>> Hi, here is probably a bug fixing.
>>>>>>
>>>>>> For an example, nvidia render only GPU typically has 0x0380.
>>>>>>
>>>>>> as its PCI class number, but render only GPU should not participate in
>>>>>> the arbitration.
>>>>>>
>>>>>> As it shouldn't snoop the legacy fixed VGA address.
>>>>>>
>>>>>> It(render only GPU) can not display anything.
>>>>>>
>>>>>>
>>>>>> But 0x0380 >> 8 = 0x03, the filter failed.
>>>>>>
>>>>>>
>>>>>>> /* Allocate structure */
>>>>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>>>>>> if (vgadev == NULL) {
>>>>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> bool notify = false;
>>>>>>>
>>>>>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>>>>>> + /* Only deal with VGA class devices */
>>>>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>>>>>> + return 0;
>>>>>> So here we only care 0x0300, my initial intent is to make an optimization,
>>>>>>
>>>>>> nowadays sane display graphic card should all has 0x0300 as its PCI
>>>>>> class number, is this complete right?
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> #define PCI_BASE_CLASS_DISPLAY 0x03
>>>>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>>>>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>>>>>> #define PCI_CLASS_DISPLAY_3D 0x0302
>>>>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> Any ideas ?
>>>>> I'm not quite sure what you are asking about here.
>>>> To be honest, I'm worried about the PCI devices which has a
>>>>
>>>> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>>>>
>>>> As those devices are very uncommon in the real world.
>>>>
>>>>
>>>> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>>>>
>>>>
>>>> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>>>>
>>>> there no code reference this macro. So I think it seems safe to ignore
>>>> the XGA ?
>>>>
>>>>
>>>> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
>>>> the render-only GPU.
>>>>
>>>> And render-only GPU can't decode the fixed VGA address space, it is safe
>>>> to ignore them.
>>>>
>>>>
>>>>> For vga_arb, we
>>>>> only care about VGA class devices since those should be on the only
>>>>> ones that might have VGA routed to them.
>>>>> However, as VGA gets deprecated,
>>>> We need the vgaarb for a system with multiple video card.
>>>>
>>>> Not only because some Legacy VGA devices implemented
>>>>
>>>> on PCI will typically have the same "hard-decoded" addresses;
>>>>
>>>> But also these video card need to participate in the arbitration,
>>>>
>>>> determine the default boot device.
>>> But couldn't the boot device be determined via what whatever resources
>>> were used by the pre-OS console?
>> I don't know what you are refer to by saying pre-OS console, UEFI
>> SHELL, UEFI GOP or something like that.
>>
> Right. Before the OS loads the platform firmware generally sets up
> something for display. That could be GOP or vesa or some other
> platform specific protocol.
>
>> If you are referring to the framebuffer driver which light up the screen
>> before the Linux kernel is loaded .
>>
>>
>> Then, what you have said is true, the boot device is determined by the
>> pre-OS console.
>>
>> But the problem is how does the Linux kernel(vgaarb) could know which
>> one is the default boot device
>>
>> on a multiple GPU machine. Relaying on the firmware fb's address and
>> size is what the mechanism
>>
>> we already in using.
> Right. It shouldn't need to depend on vgaarb.
>
>>
>>> I feel like that should be separate from vgaarb.
>> Emm, this really deserved another patch, please ?
>>
>>> vgaarb should handle PCI VGA routing and some other
>>> mechanism should be used to determine what device provided the pre-OS
>>> console.
>> If the new mechanism need the firmware changed, then this probably break
>> the old machine.
>>
>> Also, this probably will get all arch involved. to get the new mechanism
>> supported.
>>
>> The testing pressure and review power needed is quite large.
>>
>> drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and
>> more arch...
>>
>> The reviewing process will became quite difficult then.
>>
>> vgaarb is really what we already in use, and being used more than ten
>> years ...
> Yes, it works for x86 (and a few other platforms) today because of the
> VGA legacy, so we can look at VGA routing to determine this. But even
> today, we don't need VGA routing to determine what was the primary
> display before starting the OS. We could probably have a platform
> independent way to handle this by looking at the bread crumbs leftover
> from the pre-OS environment. E.g., for pre-UEFI platforms, we can
> look at VGA routing. For UEFI platforms we can look at what GOP left
> us. For various non-UEFI ARM/PPC/MIPS/etc. platforms we can look at
> whatever breadcrumbs those pre-OS environments left. That way when
> VGA goes away, we can have a clean break and you won't need vgaarb if
> the platform has no VGA devices.


Thanks for the dear developers from AMDGPU,


Mario already give me a R-B to V6 of this series[1],  I link it to
here,//for fear of lost it.

He may be busy,  not seeing the latest.


For this patch series, V6 is same with the V7.


[1]
https://lore.kernel.org/all/[email protected]/

> Alex
>
>>
>>> Alex
>>>
>>>> Nowadays, the 'VGA devices' here is stand for the Graphics card
>>>>
>>>> which is capable of display something on the screen.
>>>>
>>>> We still need vgaarb to select the default boot device.
>>>>
>>>>
>>>>> you'll have more non VGA PCI classes for devices which
>>>>> could be the pre-OS console device.
>>>> Ah, we still want do this(by applying this patch) first,
>>>>
>>>> and then we will have the opportunity to see who will crying if
>>>> something is broken. Will know more then.
>>>>
>>>> But drop this patch or revise it with more consideration is also
>>>> acceptable.
>>>>
>>>>
>>>> I asking about suggestion and/or review.
>>>>
>>>>> Alex
>>>>>
>>>>>>> /* For now we're only intereted in devices added and removed. I didn't
>>>>>>> * test this thing here, so someone needs to double check for the
>>>>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>>>>>> notify = vga_arbiter_del_pci_device(pdev);
>>>>>>>
>>>>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>>>>>> +
>>>>>>> if (notify)
>>>>>>> vga_arbiter_notify_clients();
>>>>>>> return 0;
>>>>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>>>>>
>>>>>>> static int __init vga_arb_device_init(void)
>>>>>>> {
>>>>>>> + struct pci_dev *pdev = NULL;
>>>>>>> int rc;
>>>>>>> - struct pci_dev *pdev;
>>>>>>>
>>>>>>> rc = misc_register(&vga_arb_device);
>>>>>>> if (rc < 0)
>>>>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>>>>>
>>>>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>>>>>> * default */
>>>>>>> - pdev = NULL;
>>>>>>> - while ((pdev =
>>>>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>>>>>> - PCI_ANY_ID, pdev)) != NULL)
>>>>>>> + while (1) {
>>>>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>>>>>> + if (!pdev)
>>>>>>> + break;
>>>>>>> +
>>>>>>> vga_arbiter_add_pci_device(pdev);
>>>>>>> + }
>>>>>>>
>>>>>>> pr_info("loaded\n");
>>>>>>> return rc;
>>>>>> --
>>>>>> Jingfeng
>>>>>>
>>>> --
>>>> Jingfeng
>>>>
>> --
>> Jingfeng
>>
--
Jingfeng


2023-07-03 17:38:34

by Sui Jingfeng

[permalink] [raw]
Subject: Re: [PATCH v7 2/8] PCI/VGA: Deal only with VGA class devices

Hi,

On 2023/6/16 22:34, Alex Deucher wrote:
> On Fri, Jun 16, 2023 at 10:22 AM Sui Jingfeng <[email protected]> wrote:
>>
>> On 2023/6/16 21:41, Alex Deucher wrote:
>>> On Fri, Jun 16, 2023 at 3:11 AM Sui Jingfeng <[email protected]> wrote:
>>>> Hi,
>>>>
>>>> On 2023/6/16 05:11, Alex Deucher wrote:
>>>>> On Wed, Jun 14, 2023 at 6:50 AM Sui Jingfeng <[email protected]> wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 2023/6/13 11:01, Sui Jingfeng wrote:
>>>>>>> From: Sui Jingfeng <[email protected]>
>>>>>>>
>>>>>>> Deal only with the VGA devcie(pdev->class == 0x0300), so replace the
>>>>>>> pci_get_subsys() function with pci_get_class(). Filter the non-PCI display
>>>>>>> device(pdev->class != 0x0300) out. There no need to process the non-display
>>>>>>> PCI device.
>>>>>>>
>>>>>>> Cc: Bjorn Helgaas <[email protected]>
>>>>>>> Signed-off-by: Sui Jingfeng <[email protected]>
>>>>>>> ---
>>>>>>> drivers/pci/vgaarb.c | 22 ++++++++++++----------
>>>>>>> 1 file changed, 12 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c
>>>>>>> index c1bc6c983932..22a505e877dc 100644
>>>>>>> --- a/drivers/pci/vgaarb.c
>>>>>>> +++ b/drivers/pci/vgaarb.c
>>>>>>> @@ -754,10 +754,6 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
>>>>>>> struct pci_dev *bridge;
>>>>>>> u16 cmd;
>>>>>>>
>>>>>>> - /* Only deal with VGA class devices */
>>>>>>> - if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
>>>>>>> - return false;
>>>>>>> -
>>>>>> Hi, here is probably a bug fixing.
>>>>>>
>>>>>> For an example, nvidia render only GPU typically has 0x0380.
>>>>>>
>>>>>> as its PCI class number, but render only GPU should not participate in
>>>>>> the arbitration.
>>>>>>
>>>>>> As it shouldn't snoop the legacy fixed VGA address.
>>>>>>
>>>>>> It(render only GPU) can not display anything.
>>>>>>
>>>>>>
>>>>>> But 0x0380 >> 8 = 0x03, the filter failed.
>>>>>>
>>>>>>
>>>>>>> /* Allocate structure */
>>>>>>> vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL);
>>>>>>> if (vgadev == NULL) {
>>>>>>> @@ -1500,7 +1496,9 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> struct pci_dev *pdev = to_pci_dev(dev);
>>>>>>> bool notify = false;
>>>>>>>
>>>>>>> - vgaarb_dbg(dev, "%s\n", __func__);
>>>>>>> + /* Only deal with VGA class devices */
>>>>>>> + if (pdev->class != PCI_CLASS_DISPLAY_VGA << 8)
>>>>>>> + return 0;
>>>>>> So here we only care 0x0300, my initial intent is to make an optimization,
>>>>>>
>>>>>> nowadays sane display graphic card should all has 0x0300 as its PCI
>>>>>> class number, is this complete right?
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> #define PCI_BASE_CLASS_DISPLAY 0x03
>>>>>> #define PCI_CLASS_DISPLAY_VGA 0x0300
>>>>>> #define PCI_CLASS_DISPLAY_XGA 0x0301
>>>>>> #define PCI_CLASS_DISPLAY_3D 0x0302
>>>>>> #define PCI_CLASS_DISPLAY_OTHER 0x0380
>>>>>>
>>>>>> ```
>>>>>>
>>>>>> Any ideas ?
>>>>> I'm not quite sure what you are asking about here.
>>>> To be honest, I'm worried about the PCI devices which has a
>>>>
>>>> PCI_CLASS_DISPLAY_XGA as its PCI class number.
>>>>
>>>> As those devices are very uncommon in the real world.
>>>>
>>>>
>>>> $ find . -name "*.c" -type f | xargs grep "PCI_CLASS_DISPLAY_XGA"
>>>>
>>>>
>>>> Grep the "PCI_CLASS_DISPLAY_XGA" in the linux kernel tree got ZERO,
>>>>
>>>> there no code reference this macro. So I think it seems safe to ignore
>>>> the XGA ?
>>>>
>>>>
>>>> PCI_CLASS_DISPLAY_3D and PCI_CLASS_DISPLAY_OTHER are used to annotate
>>>> the render-only GPU.
>>>>
>>>> And render-only GPU can't decode the fixed VGA address space, it is safe
>>>> to ignore them.
>>>>
>>>>
>>>>> For vga_arb, we
>>>>> only care about VGA class devices since those should be on the only
>>>>> ones that might have VGA routed to them.
>>>>> However, as VGA gets deprecated,
>>>> We need the vgaarb for a system with multiple video card.
>>>>
>>>> Not only because some Legacy VGA devices implemented
>>>>
>>>> on PCI will typically have the same "hard-decoded" addresses;
>>>>
>>>> But also these video card need to participate in the arbitration,
>>>>
>>>> determine the default boot device.
>>> But couldn't the boot device be determined via what whatever resources
>>> were used by the pre-OS console?
>> I don't know what you are refer to by saying pre-OS console, UEFI
>> SHELL, UEFI GOP or something like that.
>>
> Right. Before the OS loads the platform firmware generally sets up
> something for display. That could be GOP or vesa or some other
> platform specific protocol.
>
>> If you are referring to the framebuffer driver which light up the screen
>> before the Linux kernel is loaded .
>>
>>
>> Then, what you have said is true, the boot device is determined by the
>> pre-OS console.
>>
>> But the problem is how does the Linux kernel(vgaarb) could know which
>> one is the default boot device
>>
>> on a multiple GPU machine. Relaying on the firmware fb's address and
>> size is what the mechanism
>>
>> we already in using.
> Right. It shouldn't need to depend on vgaarb.
>
>>
>>> I feel like that should be separate from vgaarb.
>> Emm, this really deserved another patch, please ?
>>
>>> vgaarb should handle PCI VGA routing and some other
>>> mechanism should be used to determine what device provided the pre-OS
>>> console.
>> If the new mechanism need the firmware changed, then this probably break
>> the old machine.
>>
>> Also, this probably will get all arch involved. to get the new mechanism
>> supported.
>>
>> The testing pressure and review power needed is quite large.
>>
>> drm/amdgpu and drm/radeon already being used on X86, ARM64, Mips and
>> more arch...
>>
>> The reviewing process will became quite difficult then.
>>
>> vgaarb is really what we already in use, and being used more than ten
>> years ...
> Yes, it works for x86 (and a few other platforms) today because of the
> VGA legacy, so we can look at VGA routing to determine this. But even
> today, we don't need VGA routing to determine what was the primary
> display before starting the OS. We could probably have a platform
> independent way to handle this by looking at the bread crumbs leftover
> from the pre-OS environment. E.g., for pre-UEFI platforms, we can
> look at VGA routing. For UEFI platforms we can look at what GOP left
> us. For various non-UEFI ARM/PPC/MIPS/etc. platforms we can look at
> whatever breadcrumbs those pre-OS environments left.

Yes, it's good idea. I know what you means.

I'm trying to craft platform-independent way to handle this, and I have
something in my mind.

I already write a few scratch code to verify the idea.

But when I go deeper, I ask myself frequently, what's the problem you
are going to solve ?

A platform-independent implement itself is not a good sake (not enough).

To allow the platform device P.K. with the PCI device is a good rationale.

But now,  I don't have such a hardware platform anymore.

Such a hardware platform should have PCIe controller integrated to
support dedicated GPU.

It should also have integrated GPU(display controller). I should also be
famous platform (ARM64 for example)

I means that we at least need a bug to push us to do so :-)


Strip it away from vgaarb sound like a challenge to Bjorn, I'm not going
to do so.

Currently, we are trying to push it to be arch-independent,

which is to make vgaarb more functional(and more useful) on non-x86 arch,

which also help to amdgpu/radeon cooperate with the hardware vendor
native integrated one harmoniously.

while there are even no enough supporter.

> VGA goes away, we can have a clean break and you won't need vgaarb if
> the platform has no VGA devices.
>
> Alex
>
>>
>>> Alex
>>>
>>>> Nowadays, the 'VGA devices' here is stand for the Graphics card
>>>>
>>>> which is capable of display something on the screen.
>>>>
>>>> We still need vgaarb to select the default boot device.
>>>>
>>>>
>>>>> you'll have more non VGA PCI classes for devices which
>>>>> could be the pre-OS console device.
>>>> Ah, we still want do this(by applying this patch) first,
>>>>
>>>> and then we will have the opportunity to see who will crying if
>>>> something is broken. Will know more then.
>>>>
>>>> But drop this patch or revise it with more consideration is also
>>>> acceptable.
>>>>
>>>>
>>>> I asking about suggestion and/or review.
>>>>
>>>>> Alex
>>>>>
>>>>>>> /* For now we're only intereted in devices added and removed. I didn't
>>>>>>> * test this thing here, so someone needs to double check for the
>>>>>>> @@ -1510,6 +1508,8 @@ static int pci_notify(struct notifier_block *nb, unsigned long action,
>>>>>>> else if (action == BUS_NOTIFY_DEL_DEVICE)
>>>>>>> notify = vga_arbiter_del_pci_device(pdev);
>>>>>>>
>>>>>>> + vgaarb_dbg(dev, "%s: action = %lu\n", __func__, action);
>>>>>>> +
>>>>>>> if (notify)
>>>>>>> vga_arbiter_notify_clients();
>>>>>>> return 0;
>>>>>>> @@ -1534,8 +1534,8 @@ static struct miscdevice vga_arb_device = {
>>>>>>>
>>>>>>> static int __init vga_arb_device_init(void)
>>>>>>> {
>>>>>>> + struct pci_dev *pdev = NULL;
>>>>>>> int rc;
>>>>>>> - struct pci_dev *pdev;
>>>>>>>
>>>>>>> rc = misc_register(&vga_arb_device);
>>>>>>> if (rc < 0)
>>>>>>> @@ -1545,11 +1545,13 @@ static int __init vga_arb_device_init(void)
>>>>>>>
>>>>>>> /* We add all PCI devices satisfying VGA class in the arbiter by
>>>>>>> * default */
>>>>>>> - pdev = NULL;
>>>>>>> - while ((pdev =
>>>>>>> - pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
>>>>>>> - PCI_ANY_ID, pdev)) != NULL)
>>>>>>> + while (1) {
>>>>>>> + pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev);
>>>>>>> + if (!pdev)
>>>>>>> + break;
>>>>>>> +
>>>>>>> vga_arbiter_add_pci_device(pdev);
>>>>>>> + }
>>>>>>>
>>>>>>> pr_info("loaded\n");
>>>>>>> return rc;
>>>>>> --
>>>>>> Jingfeng
>>>>>>
>>>> --
>>>> Jingfeng
>>>>
>> --
>> Jingfeng
>>