2023-08-08 20:43:43

by Bjorn Helgaas

[permalink] [raw]
Subject: [PATCH] PCI: acpiphp: Log more slot and notification details

From: Bjorn Helgaas <[email protected]>

When registering an acpiphp slot, log the slot name in the same style as
pciehp and include the PCI bus/device and whether a device is present or
the slot is empty.

When handling an ACPI notification, log the PCI bus/device and notification
type.

Sample dmesg log diff:

ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
- acpiphp: Slot [3] registered
- acpiphp: Slot [4] registered
PCI host bridge to bus 0000:00
pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
<ACPI Device Check notification>
pci 0000:00:04.0: [8086:100e] type 00 class 0x020000

ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
+ acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
+ acpiphp: pci 0000:00:04 Slot(4) registered (empty)
PCI host bridge to bus 0000:00
pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
<ACPI Device Check notification>
+ acpiphp: pci 0000:00:04 Slot(4) Device Check
pci 0000:00:04.0: [8086:100e] type 00 class 0x020000

Signed-off-by: Bjorn Helgaas <[email protected]>
---
drivers/pci/hotplug/acpiphp_core.c | 4 ----
drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index c02257f4b61c..19d47607d009 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
goto error_slot;
}

- pr_info("Slot [%s] registered\n", slot_name(slot));
-
return 0;
error_slot:
kfree(slot);
@@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
{
struct slot *slot = acpiphp_slot->slot;

- pr_info("Slot [%s] unregistered\n", slot_name(slot));
-
pci_hp_deregister(&slot->hotplug_slot);
kfree(slot);
}
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 328d1e416014..eeca2753a5c7 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -25,7 +25,7 @@
* bus. It loses the refcount when the driver unloads.
*/

-#define pr_fmt(fmt) "acpiphp_glue: " fmt
+#define pr_fmt(fmt) "acpiphp: " fmt

#include <linux/module.h>

@@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
&val, 60*1000))
slot->flags |= SLOT_ENABLED;

+ if (slot->slot)
+ pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
+ pci_domain_nr(slot->bus), slot->bus->number,
+ slot->device, slot_name(slot->slot),
+ slot->flags & SLOT_ENABLED ? "enabled" : "empty");
+
return AE_OK;
}

@@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
acpi_unlock_hp_context();
}
slot->flags |= SLOT_IS_GOING_AWAY;
- if (slot->slot)
+ if (slot->slot) {
+ pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
+ pci_domain_nr(slot->bus), slot->bus->number,
+ slot->device, slot_name(slot->slot));
+
acpiphp_unregister_hotplug_slot(slot);
+ }
}

mutex_lock(&bridge_mutex);
@@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)

pci_lock_rescan_remove();

+ pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
+ pci_domain_nr(slot->bus), slot->bus->number,
+ slot->device, slot_name(slot->slot),
+ type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
+ type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
+ type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
+ "Notification");
+
switch (type) {
case ACPI_NOTIFY_BUS_CHECK:
/* bus re-enumerate */
--
2.34.1



2023-08-08 20:43:55

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] PCI: acpiphp: Log more slot and notification details

On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <[email protected]> wrote:
>
> From: Bjorn Helgaas <[email protected]>
>
> When registering an acpiphp slot, log the slot name in the same style as
> pciehp and include the PCI bus/device and whether a device is present or
> the slot is empty.
>
> When handling an ACPI notification, log the PCI bus/device and notification
> type.
>
> Sample dmesg log diff:
>
> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> - acpiphp: Slot [3] registered
> - acpiphp: Slot [4] registered
> PCI host bridge to bus 0000:00
> pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> <ACPI Device Check notification>
> pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> PCI host bridge to bus 0000:00
> pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> <ACPI Device Check notification>
> + acpiphp: pci 0000:00:04 Slot(4) Device Check
> pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
> Signed-off-by: Bjorn Helgaas <[email protected]>
> ---
> drivers/pci/hotplug/acpiphp_core.c | 4 ----
> drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
> 2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
> index c02257f4b61c..19d47607d009 100644
> --- a/drivers/pci/hotplug/acpiphp_core.c
> +++ b/drivers/pci/hotplug/acpiphp_core.c
> @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
> goto error_slot;
> }
>
> - pr_info("Slot [%s] registered\n", slot_name(slot));
> -
> return 0;
> error_slot:
> kfree(slot);
> @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
> {
> struct slot *slot = acpiphp_slot->slot;
>
> - pr_info("Slot [%s] unregistered\n", slot_name(slot));
> -
> pci_hp_deregister(&slot->hotplug_slot);
> kfree(slot);
> }
> diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
> index 328d1e416014..eeca2753a5c7 100644
> --- a/drivers/pci/hotplug/acpiphp_glue.c
> +++ b/drivers/pci/hotplug/acpiphp_glue.c
> @@ -25,7 +25,7 @@
> * bus. It loses the refcount when the driver unloads.
> */
>
> -#define pr_fmt(fmt) "acpiphp_glue: " fmt
> +#define pr_fmt(fmt) "acpiphp: " fmt
>
> #include <linux/module.h>
>
> @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
> &val, 60*1000))
> slot->flags |= SLOT_ENABLED;
>
> + if (slot->slot)
> + pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot),
> + slot->flags & SLOT_ENABLED ? "enabled" : "empty");
> +
> return AE_OK;
> }
>
> @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
> acpi_unlock_hp_context();
> }
> slot->flags |= SLOT_IS_GOING_AWAY;
> - if (slot->slot)
> + if (slot->slot) {
> + pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot));
> +
> acpiphp_unregister_hotplug_slot(slot);
> + }
> }
>
> mutex_lock(&bridge_mutex);
> @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
>
> pci_lock_rescan_remove();
>
> + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot),
> + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> + "Notification");
> +

pr_debug() perhaps?

On systems that don't have any hotplug problems these messages will
just be filling the kernel log unnecessarily.

> switch (type) {
> case ACPI_NOTIFY_BUS_CHECK:
> /* bus re-enumerate */
> --

2023-08-08 21:04:22

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [PATCH] PCI: acpiphp: Log more slot and notification details

On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote:
> On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <[email protected]> wrote:
> > From: Bjorn Helgaas <[email protected]>
> >
> > When registering an acpiphp slot, log the slot name in the same style as
> > pciehp and include the PCI bus/device and whether a device is present or
> > the slot is empty.
> >
> > When handling an ACPI notification, log the PCI bus/device and notification
> > type.
> >
> > Sample dmesg log diff:
> >
> > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > - acpiphp: Slot [3] registered
> > - acpiphp: Slot [4] registered
> > PCI host bridge to bus 0000:00
> > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > <ACPI Device Check notification>
> > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> >
> > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> > + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> > PCI host bridge to bus 0000:00
> > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > <ACPI Device Check notification>
> > + acpiphp: pci 0000:00:04 Slot(4) Device Check
> > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > ...

> > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
> >
> > pci_lock_rescan_remove();
> >
> > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> > + pci_domain_nr(slot->bus), slot->bus->number,
> > + slot->device, slot_name(slot->slot),
> > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> > + "Notification");
>
> pr_debug() perhaps?
>
> On systems that don't have any hotplug problems these messages will
> just be filling the kernel log unnecessarily.

If these notifications are really common, pr_debug() sounds like the
right thing. I assumed that they would not be common, e.g., they
would happen for user-time things like dock/undock, plug/unplug,
suspend/resume, etc.

In pciehp, we use _info for attention button presses, presence detect
changes, link up/down, and I assumed the ACPI notify events would
roughly correspond to those. No?

Bjorn

2023-08-08 23:14:07

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH] PCI: acpiphp: Log more slot and notification details

On Tue, Aug 8, 2023 at 10:58 PM Bjorn Helgaas <[email protected]> wrote:
>
> On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote:
> > On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <[email protected]> wrote:
> > > From: Bjorn Helgaas <[email protected]>
> > >
> > > When registering an acpiphp slot, log the slot name in the same style as
> > > pciehp and include the PCI bus/device and whether a device is present or
> > > the slot is empty.
> > >
> > > When handling an ACPI notification, log the PCI bus/device and notification
> > > type.
> > >
> > > Sample dmesg log diff:
> > >
> > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > > - acpiphp: Slot [3] registered
> > > - acpiphp: Slot [4] registered
> > > PCI host bridge to bus 0000:00
> > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > > <ACPI Device Check notification>
> > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > >
> > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> > > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> > > + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> > > PCI host bridge to bus 0000:00
> > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> > > <ACPI Device Check notification>
> > > + acpiphp: pci 0000:00:04 Slot(4) Device Check
> > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
> > > ...
>
> > > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
> > >
> > > pci_lock_rescan_remove();
> > >
> > > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> > > + pci_domain_nr(slot->bus), slot->bus->number,
> > > + slot->device, slot_name(slot->slot),
> > > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> > > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> > > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> > > + "Notification");
> >
> > pr_debug() perhaps?
> >
> > On systems that don't have any hotplug problems these messages will
> > just be filling the kernel log unnecessarily.
>
> If these notifications are really common, pr_debug() sounds like the
> right thing. I assumed that they would not be common, e.g., they
> would happen for user-time things like dock/undock, plug/unplug,
> suspend/resume, etc.
>
> In pciehp, we use _info for attention button presses, presence detect
> changes, link up/down, and I assumed the ACPI notify events would
> roughly correspond to those. No?

Depending on how often the system gets suspended and resumed, they may
end up in the log quite often and if there are no problems related to
them, they are just noise.

IMHO in that case the users are taught to ignore stuff that lands in
the log which is not fantastic.

2023-08-09 11:59:52

by Igor Mammedov

[permalink] [raw]
Subject: Re: [PATCH] PCI: acpiphp: Log more slot and notification details

On Tue, 8 Aug 2023 14:27:13 -0500
Bjorn Helgaas <[email protected]> wrote:

> From: Bjorn Helgaas <[email protected]>
>
> When registering an acpiphp slot, log the slot name in the same style as
> pciehp and include the PCI bus/device and whether a device is present or
> the slot is empty.
>
> When handling an ACPI notification, log the PCI bus/device and notification
> type.
>
> Sample dmesg log diff:
>
> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> - acpiphp: Slot [3] registered
> - acpiphp: Slot [4] registered
> PCI host bridge to bus 0000:00
> pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> <ACPI Device Check notification>

Having ACPI node name/path here that received notification would be helpfull

> pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
> ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
> + acpiphp: pci 0000:00:03 Slot(3) registered (enabled)
> + acpiphp: pci 0000:00:04 Slot(4) registered (empty)
> PCI host bridge to bus 0000:00
> pci 0000:00:03.0: [8086:100e] type 00 class 0x020000
> <ACPI Device Check notification>
> + acpiphp: pci 0000:00:04 Slot(4) Device Check
> pci 0000:00:04.0: [8086:100e] type 00 class 0x020000
>
> Signed-off-by: Bjorn Helgaas <[email protected]>
> ---
> drivers/pci/hotplug/acpiphp_core.c | 4 ----
> drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++--
> 2 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
> index c02257f4b61c..19d47607d009 100644
> --- a/drivers/pci/hotplug/acpiphp_core.c
> +++ b/drivers/pci/hotplug/acpiphp_core.c
> @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
> goto error_slot;
> }
>
> - pr_info("Slot [%s] registered\n", slot_name(slot));
> -
> return 0;
> error_slot:
> kfree(slot);
> @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
> {
> struct slot *slot = acpiphp_slot->slot;
>
> - pr_info("Slot [%s] unregistered\n", slot_name(slot));
> -
> pci_hp_deregister(&slot->hotplug_slot);
> kfree(slot);
> }
> diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
> index 328d1e416014..eeca2753a5c7 100644
> --- a/drivers/pci/hotplug/acpiphp_glue.c
> +++ b/drivers/pci/hotplug/acpiphp_glue.c
> @@ -25,7 +25,7 @@
> * bus. It loses the refcount when the driver unloads.
> */
>
> -#define pr_fmt(fmt) "acpiphp_glue: " fmt
> +#define pr_fmt(fmt) "acpiphp: " fmt
>
> #include <linux/module.h>
>
> @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data,
> &val, 60*1000))
> slot->flags |= SLOT_ENABLED;
>
> + if (slot->slot)
> + pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot),
> + slot->flags & SLOT_ENABLED ? "enabled" : "empty");
> +
> return AE_OK;
> }
>
> @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge)
> acpi_unlock_hp_context();
> }
> slot->flags |= SLOT_IS_GOING_AWAY;
> - if (slot->slot)
> + if (slot->slot) {
> + pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot));
> +
> acpiphp_unregister_hotplug_slot(slot);
> + }
> }
>
> mutex_lock(&bridge_mutex);
> @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context)
>
> pci_lock_rescan_remove();
>
> + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n",
> + pci_domain_nr(slot->bus), slot->bus->number,
> + slot->device, slot_name(slot->slot),
I had similar issue with logging patches that I've asked Woody to run.

it crashes here with buscheck on non existing device

Call Trace:
<TASK>
? __die_body+0x19/0x60
? page_fault_oops+0x158/0x430
? fixup_exception+0x21/0x330
? exc_page_fault+0x6b/0x150
? asm_exc_page_fault+0x26/0x30
? acpiphp_hotplug_notify+0x13d/0x2f0
? __pfx_acpiphp_hotplug_notify+0x10/0x10
acpi_device_hotplug+0xc2/0x4f0
acpi_hotplug_work_fn+0x19/0x30
process_one_work+0x1f8/0x3e0
worker_thread+0x29/0x3b0
? __pfx_worker_thread+0x10/0x10
kthread+0xf2/0x120
? __pfx_kthread+0x10/0x10
ret_from_fork+0x2f/0x40
? __pfx_kthread+0x10/0x10
ret_from_fork_asm+0

(gdb) p *slot
$2 = {node = {next = 0xffff8881003287c0, prev = 0xffff888100887730}, bus = 0xffff8881003de800,
funcs = {next = 0xffff8881008877b0, prev = 0xffff888100887a50}, slot = 0x0 <fixed_percpu_data>,
^^^ likely culprit
device = 0x0, flags = 0x1}


reproducer hack: https://gitlab.com/imammedo/qemu/-/tree/acpiphp_buscheck_on_missing_device?ref_type=heads

./qemu-system-x86_64 -M q35 -cpu host -smp 4 -enable-kvm -m 6G -kernel ~/builds/linux-2.6/arch/x86/boot/bzImage -append 'nokaslr root=/dev/sda1 console=ttyS0' -snapshot rhel9.img -device pcie-root-port,id=rp1,bus=pcie.0,chassis=0,addr=8 -monitor stdio -serial file:/tmp/s -s

then once booted to trigger buscheck issue at monitor prompt:
(qemu) device_add e1000e,bus=rp1

> + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" :
> + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" :
> + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" :
> + "Notification");
> +
> switch (type) {
> case ACPI_NOTIFY_BUS_CHECK:
> /* bus re-enumerate */