The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
fixup, the mute/micmute LEDs work good.
Signed-off-by: Jeremy Szu <[email protected]>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 552e2cb73291..9d68f591c6bf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
--
2.31.1
The HP ZBook Fury 15.6 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.
Signed-off-by: Jeremy Szu <[email protected]>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8d1c18843758..58686d7d5576 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
--
2.31.1
The HP ZBook Studio 15.6 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.
Signed-off-by: Jeremy Szu <[email protected]>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9d68f591c6bf..8d1c18843758 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
--
2.31.1
The HP ZBook Studio 17.3 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.
Signed-off-by: Jeremy Szu <[email protected]>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 58686d7d5576..4011ce090355 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
--
2.31.1
Hi Takashi,
Would you please help to review these quirks? Many thanks.
On Thu, May 20, 2021 at 1:04 AM Jeremy Szu <[email protected]> wrote:
>
> The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
> ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
> fixup, the mute/micmute LEDs work good.
>
> Signed-off-by: Jeremy Szu <[email protected]>
> ---
> sound/pci/hda/patch_realtek.c | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
> index 552e2cb73291..9d68f591c6bf 100644
> --- a/sound/pci/hda/patch_realtek.c
> +++ b/sound/pci/hda/patch_realtek.c
> @@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
> SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
> SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
> SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
> + SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
> SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
> SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
> SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
> --
> 2.31.1
>
--
Sincerely,
Jeremy Su
On Thu, 27 May 2021 04:00:34 +0200,
Jeremy Szu wrote:
>
> Hi Takashi,
>
> Would you please help to review these quirks? Many thanks.
Sorry, it was overlooked.
Now applied all four patches. Thanks.
Takashi
>
>
> On Thu, May 20, 2021 at 1:04 AM Jeremy Szu <[email protected]> wrote:
> >
> > The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
> > ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
> > fixup, the mute/micmute LEDs work good.
> >
> > Signed-off-by: Jeremy Szu <[email protected]>
> > ---
> > sound/pci/hda/patch_realtek.c | 1 +
> > 1 file changed, 1 insertion(+)
> >
> > diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
> > index 552e2cb73291..9d68f591c6bf 100644
> > --- a/sound/pci/hda/patch_realtek.c
> > +++ b/sound/pci/hda/patch_realtek.c
> > @@ -8291,6 +8291,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
> > SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
> > SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
> > SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
> > + SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
> > SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
> > SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
> > SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
> > --
> > 2.31.1
> >
>
>
> --
> Sincerely,
> Jeremy Su
>
Hello,
On Thu, May 20, 2021 at 01:03:53AM +0800, Jeremy Szu wrote:
>The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
>ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
>fixup, the mute/micmute LEDs work good.
I've recently got HP EliteBook 855 G8 and it happens that neither micmute LED
nor speakers work (except rare cases, more on that later) in 5.16.0. The
corresponding ALC285_FIXUP_HP_MUTE_LED fixup is definitely applied (verified by
adding a printk into alc285_fixup_hp_mute_led).
What is the most interesting, both micmute LED and speakers do work on rare
boots. I've written some scripts to pick up sound from speakers using a
known-good USB microphone. Out of 709 boots today only 16 ended up with working
micmute LED and speakers.
Is there anything I can do to help with debugging of this problem?
Initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=215466
On Tue, 11 Jan 2022 20:52:29 +0100,
Alexander Sergeyev wrote:
>
> Hello,
>
> On Thu, May 20, 2021 at 01:03:53AM +0800, Jeremy Szu wrote:
> >The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
> >ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
> >fixup, the mute/micmute LEDs work good.
>
> I've recently got HP EliteBook 855 G8 and it happens that neither
> micmute LED nor speakers work (except rare cases, more on that later)
> in 5.16.0. The corresponding ALC285_FIXUP_HP_MUTE_LED fixup is
> definitely applied (verified by adding a printk into
> alc285_fixup_hp_mute_led).
>
> What is the most interesting, both micmute LED and speakers do work on
> rare boots. I've written some scripts to pick up sound from speakers
> using a known-good USB microphone. Out of 709 boots today only 16
> ended up with working micmute LED and speakers.
>
> Is there anything I can do to help with debugging of this problem?
>
> Initially reported at https://bugzilla.kernel.org/show_bug.cgi?id=215466
The problem is about the built-in drivers, or do you see the very same
problem even with modules? AFAIK, quite a few AMD platforms tend to
have some issues with various devices showing initialization problems
at the early boot. Just reloading / rebinding the device later often
helps.
Takashi
On Wed, Jan 12, 2022 at 10:45:46AM +0100, Takashi Iwai wrote:
>The problem is about the built-in drivers, or do you see the very same
>problem even with modules?
The problem is definitely there for the built-in drivers which I've tested
quite a lot. It's the primary usecase for me, as I tend to build minimal
device-specific and self-contained kernels in Gentoo.
For builds with modules things are not very consistent. Live Ubuntu with an
older (and probably vendor-patched) kernel works just fine, but when I pull
Ubuntu kernel sources and build it with the mostly same config (including
modules) it boots with no sound in Gentoo. Mostly same -- because I need nvme
drivers to be built-in as I don't use initrd.
>AFAIK, quite a few AMD platforms tend to have some issues with various devices
>showing initialization problems at the early boot. Just reloading / rebinding
>the device later often helps.
Is it possible to do with the built-in drivers?
On Wed, 12 Jan 2022 11:12:49 +0100,
Alexander Sergeyev wrote:
>
> On Wed, Jan 12, 2022 at 10:45:46AM +0100, Takashi Iwai wrote:
> > The problem is about the built-in drivers, or do you see the very
> > same problem even with modules?
>
> The problem is definitely there for the built-in drivers which I've
> tested quite a lot. It's the primary usecase for me, as I tend to
> build minimal device-specific and self-contained kernels in Gentoo.
>
> For builds with modules things are not very consistent. Live Ubuntu
> with an older (and probably vendor-patched) kernel works just fine,
> but when I pull Ubuntu kernel sources and build it with the mostly
> same config (including modules) it boots with no sound in
> Gentoo. Mostly same -- because I need nvme drivers to be built-in as I
> don't use initrd.
Sounds like some timing issue, then. It's pretty hard to debug,
unfortunately.
You may try to get the codec proc dump with COEF by passing
snd_hda_codec.dump_coef=1 module option for both working and
non-working cases. Check the difference of the COEF and apply the
difference with hda-verb manually.
> > AFAIK, quite a few AMD platforms tend to have some issues with
> > various devices showing initialization problems at the early
> > boot. Just reloading / rebinding the device later often helps.
>
> Is it possible to do with the built-in drivers?
You can unbind and re-bind the PCI (HD-audio controller) device via
sysfs.
Takashi
On Wed, Jan 12, 2022 at 11:13:44AM +0100, Takashi Iwai wrote:
>Sounds like some timing issue, then. It's pretty hard to debug, unfortunately.
I can imagine. Is it possible that initcall_debug logs could help? Or is it
timing issues within the same module?
>You may try to get the codec proc dump with COEF by passing
>snd_hda_codec.dump_coef=1 module option for both working and non-working
>cases.
>You can unbind and re-bind the PCI (HD-audio controller) device via sysfs.
I'll try both options later today when able, thank you!
On Wed, Jan 12, 2022 at 01:48:28PM +0300, Alexander Sergeyev wrote:
>On Wed, Jan 12, 2022 at 11:13:44AM +0100, Takashi Iwai wrote:
>>You may try to get the codec proc dump with COEF by passing
>>snd_hda_codec.dump_coef=1 module option for both working and non-working
>>cases.
>>You can unbind and re-bind the PCI (HD-audio controller) device via sysfs.
>
>I'll try both options later today when able, thank you!
First, about unbind and bind via sysfs -- attempts to unbind the HD-audio
controller immediately trigger BUGs:
05:00.6 Audio device [0403]: Advanced Micro Devices, Inc. [AMD] Family 17h
(Models 10h-1fh) HD Audio Controller [1022:15e3]
echo -n '0000:05:00.6' > /sys/bus/pci/drivers/snd_hda_intel/unbind
BUG: unable to handle page fault for address 000000000000173c
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] SMP NOPTI
CPU: 2 PID: 167 Comm: kworker/2:3 Tainted: G T 5.16.0-dirty #3
Workqueue: events set_brightness_delayed
RIP: 0010:coef_micmute_led_set+0xf/0x60
...
Call Trace:
<TASK>
set_brightness_delayed+0x6f/0xb0
process_one_work+0x1e1/0x380
worker_thread+0x4b/0x3b0
? rescuer_thread+0x370/0x370
kthread+0x142/0x160
? set_kthread_struct+0x50/0x50
ret_from_work+0x22/0x30
</TASK>
Is it normal/expected?
Second, about dump_coef. I've collected a bunch of samples from
/proc/asound/Generic_1/codec#0. Overall, there are 6 different versions across
196 samples, two versions are with working sound (and micmute LED).
Differences between _non-working_ versions:
Node 0x02 [Audio Output] wcaps 0x41d: Stereo Amp-Out
Amp-Out vals: [0x3c 0x3c] // (!) OR [0x53 0x53]
Converter: stream=5, channel=0 // (!) OR stream=0, channel=0
Node 0x03 [Audio Output] wcaps 0x41d: Stereo Amp-Out
Amp-Out vals: [0x3c 0x3c] // (!) OR [0x53 0x53]
Converter: stream=5, channel=0 // (!) OR stream=0, channel=0
Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
Processing caps: benign=0, ncoeff=142
Coeff 0x0b: 0x8003 // (!) OR 0x7770
Coeff 0x19: 0x01e3 // (!) OR 0x21e3
Node 0x08 [Audio Input] wcaps 0x10051b: Stereo Amp-In
Amp-In vals: [0x27 0x27] // (!) OR [0xa7 0xa7]
Differences between _working_ versions:
Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
Processing caps: benign=0, ncoeff=142
Coeff 0x0b: 0x8003 // (!) OR 0x7770
Differences between _non_working_ and _working_ versions:
Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
Processing caps: benign=0, ncoeff=142
Coeff 0x19: 0x01e3 OR 0x21e3 // (!) 0x8e11 for working versions
In short:
1) Coeff 0x0b is flapping between 0x8003 and 0x7770 and does not seem to have
any effect in both non-working and working versions. Not sure about this, maybe
microphone is not operational since I haven't checked it yet.
2) Coeff 0x19 with 0x8e11 value makes speakers work. Non-working values are
0x01e3 and 0x21e3.
Running the following commands makes speakers and micmute LED work (0x20 is the
node id, which is mentioned in the snippets above):
hda-verb /dev/snd/hwC1D0 0x20 SET_COEF_INDEX 0x19
hda-verb /dev/snd/hwC1D0 0x20 SET_PROC_COEF 0x8e11
Is it possible to somehow trace this particular coefficient to hunt the timing
issue? It would be great to have a proper fix.
On Wed, 12 Jan 2022 21:18:24 +0100,
Alexander Sergeyev wrote:
>
> On Wed, Jan 12, 2022 at 01:48:28PM +0300, Alexander Sergeyev wrote:
> >On Wed, Jan 12, 2022 at 11:13:44AM +0100, Takashi Iwai wrote:
> >> You may try to get the codec proc dump with COEF by passing
> >> snd_hda_codec.dump_coef=1 module option for both working and
> >> non-working cases.
> >>You can unbind and re-bind the PCI (HD-audio controller) device via sysfs.
> >
> >I'll try both options later today when able, thank you!
>
> First, about unbind and bind via sysfs -- attempts to unbind the
> HD-audio controller immediately trigger BUGs:
>
> 05:00.6 Audio device [0403]: Advanced Micro Devices, Inc. [AMD] Family
> 17h (Models 10h-1fh) HD Audio Controller [1022:15e3]
>
> echo -n '0000:05:00.6' > /sys/bus/pci/drivers/snd_hda_intel/unbind
>
> BUG: unable to handle page fault for address 000000000000173c
> #PF: supervisor read access in kernel mode
> #PF: error_code(0x0000) - not-present page
> PGD 0 P4D 0
> Oops: 0000 [#1] SMP NOPTI
> CPU: 2 PID: 167 Comm: kworker/2:3 Tainted: G T 5.16.0-dirty #3
> Workqueue: events set_brightness_delayed
> RIP: 0010:coef_micmute_led_set+0xf/0x60
> ...
> Call Trace:
> <TASK>
> set_brightness_delayed+0x6f/0xb0
> process_one_work+0x1e1/0x380
> worker_thread+0x4b/0x3b0
> ? rescuer_thread+0x370/0x370
> kthread+0x142/0x160
> ? set_kthread_struct+0x50/0x50
> ret_from_work+0x22/0x30
> </TASK>
>
> Is it normal/expected?
A sort of. The sysfs unbind is little tested and may be still buggy
if done during the stream operation.
To be sure, could you check with my latest sound.git tree for-linus
branch? There are a few fixes that harden the dynamic unbind.
Though, the code path is from the leds class, and it might not be
covered yet. It's managed via devm, so it should have been cleared,
but there may be still some ordering problem...
> Second, about dump_coef. I've collected a bunch of samples from
> /proc/asound/Generic_1/codec#0. Overall, there are 6 different
> versions across 196 samples, two versions are with working sound (and
> micmute LED).
>
>
> Differences between _non-working_ versions:
>
> Node 0x02 [Audio Output] wcaps 0x41d: Stereo Amp-Out
> Amp-Out vals: [0x3c 0x3c] // (!) OR [0x53 0x53]
> Converter: stream=5, channel=0 // (!) OR stream=0, channel=0
>
> Node 0x03 [Audio Output] wcaps 0x41d: Stereo Amp-Out
> Amp-Out vals: [0x3c 0x3c] // (!) OR [0x53 0x53]
> Converter: stream=5, channel=0 // (!) OR stream=0, channel=0
>
> Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
> Processing caps: benign=0, ncoeff=142
> Coeff 0x0b: 0x8003 // (!) OR 0x7770
> Coeff 0x19: 0x01e3 // (!) OR 0x21e3
>
> Node 0x08 [Audio Input] wcaps 0x10051b: Stereo Amp-In
> Amp-In vals: [0x27 0x27] // (!) OR [0xa7 0xa7]
>
>
> Differences between _working_ versions:
>
> Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
> Processing caps: benign=0, ncoeff=142
> Coeff 0x0b: 0x8003 // (!) OR 0x7770
>
>
> Differences between _non_working_ and _working_ versions:
>
> Node 0x20 [Vendor Defined Widget] wcaps 0xf00040: Mono
> Processing caps: benign=0, ncoeff=142
> Coeff 0x19: 0x01e3 OR 0x21e3 // (!) 0x8e11 for working versions
>
>
> In short:
> 1) Coeff 0x0b is flapping between 0x8003 and 0x7770 and does not seem
> to have any effect in both non-working and working versions. Not sure
> about this, maybe microphone is not operational since I haven't
> checked it yet.
> 2) Coeff 0x19 with 0x8e11 value makes speakers work. Non-working
> values are 0x01e3 and 0x21e3.
>
> Running the following commands makes speakers and micmute LED work
> (0x20 is the node id, which is mentioned in the snippets above):
>
> hda-verb /dev/snd/hwC1D0 0x20 SET_COEF_INDEX 0x19
> hda-verb /dev/snd/hwC1D0 0x20 SET_PROC_COEF 0x8e11
>
> Is it possible to somehow trace this particular coefficient to hunt
> the timing issue? It would be great to have a proper fix.
Those might be some codec init values, which aren't set up properly by
whatever reason, e.g. it might need a bit more wait time for init,
etc. You can fix it rather by issuing those explicitly at the fixup.
Takashi
On Thu, Jan 13, 2022 at 08:14:29AM +0100, Takashi Iwai wrote:
>> First, about unbind and bind via sysfs -- attempts to unbind the
>> HD-audio controller immediately trigger BUGs:
>> Is it normal/expected?
>
>A sort of. The sysfs unbind is little tested and may be still buggy
>if done during the stream operation.
>
>To be sure, could you check with my latest sound.git tree for-linus
>branch? There are a few fixes that harden the dynamic unbind.
I assume that the referred repository is the one at [1]. I've tried
081c73701ef0 "ALSA: hda: intel-dsp-config: reorder the config table". It
crashed with nearly identical logs.
>> 1) Coeff 0x0b is flapping between 0x8003 and 0x7770 and does not seem
>> to have any effect in both non-working and working versions. Not sure
>> about this, maybe microphone is not operational since I haven't
>> checked it yet.
I got some time to poke the internal microphone. It works, but oddities are
there as well. Initially I get "Mic Boost", "Capture" and "Internal Mic Boost"
controls in alsamixer. When I run arecord, "Digital" control appears, but it
cannot be changed until arecord is stopped. Subsequent arecord calls do not
lock "Digital" control. This control affects sensitivity of the microphone and
seems useful.
/proc/asound/card1/codec#0:
Node 0x08 [Audio Input] wcaps 0x10051b: Stereo Amp-In
Control: name="Capture Volume", index=0, device=0
ControlAmp: chs=3, dir=In, idx=0, ofs=0
Control: name="Capture Switch", index=0, device=0
ControlAmp: chs=3, dir=In, idx=0, ofs=0
Device: name="ALC285 Analog", type="Audio", device=0
Amp-In caps: ofs=0x17, nsteps=0x3f, stepsize=0x02, mute=1
Amp-In vals: [0x27 0x27]
- Converter: stream=0, channel=0
+ Converter: stream=1, channel=0
This is the only change in /proc/asound after the first arecord run. Overall,
seems like a small annoyance, but I'm curious -- is it how it's supposed to
work?
[1]https://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git/?h=for-linus
On Thu, Jan 13, 2022 at 09:31:43PM +0300, Alexander Sergeyev wrote:
>This is the only change in /proc/asound after the first arecord run. Overall,
>seems like a small annoyance, but I'm curious -- is it how it's supposed to
>work?
Just to clarify, this particular Digital control behavior is the same on live
Ubuntu (which uses modules and supposedly works correctly).
Also, I've posted a patch for review. It adds the quirk for my PCI subdevice id
which is not present in the current set of SND_PCI_QUIRK. The former micmute
quirk was picked up by the codec SSID and not PCI id. The patch fixes the
speakers problem for me (including the built-in drivers usecase).
On Thu, 13 Jan 2022 19:31:41 +0100,
Alexander Sergeyev wrote:
>
> On Thu, Jan 13, 2022 at 08:14:29AM +0100, Takashi Iwai wrote:
> >> First, about unbind and bind via sysfs -- attempts to unbind the
> >> HD-audio controller immediately trigger BUGs:
> >> Is it normal/expected?
> >
> >A sort of. The sysfs unbind is little tested and may be still buggy
> >if done during the stream operation.
> >
> >To be sure, could you check with my latest sound.git tree for-linus
> >branch? There are a few fixes that harden the dynamic unbind.
>
> I assume that the referred repository is the one at [1]. I've tried
> 081c73701ef0 "ALSA: hda: intel-dsp-config: reorder the config
> table". It crashed with nearly identical logs.
OK, then it's still something to do with the led cdev
unregisteration.
Could you try the patch below?
> >> 1) Coeff 0x0b is flapping between 0x8003 and 0x7770 and does not seem
> >> to have any effect in both non-working and working versions. Not sure
> >> about this, maybe microphone is not operational since I haven't
> >> checked it yet.
>
> I got some time to poke the internal microphone. It works, but
> oddities are there as well. Initially I get "Mic Boost", "Capture" and
> "Internal Mic Boost" controls in alsamixer. When I run arecord,
> "Digital" control appears, but it cannot be changed until arecord is
> stopped. Subsequent arecord calls do not lock "Digital" control. This
> control affects sensitivity of the microphone and seems useful.
That's presumably an alsa-lib softvol stuff. It's created
dynamically. So not really a kernel problem.
Takashi
---
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 3bf5e3410703..503cd979c908 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -84,13 +84,21 @@ static void free_kctls(struct hda_gen_spec *spec)
snd_array_free(&spec->kctls);
}
-static void snd_hda_gen_spec_free(struct hda_gen_spec *spec)
+static void snd_hda_gen_spec_free(struct hda_codec *codec)
{
+ struct hda_gen_spec *spec = codec->spec;
+
if (!spec)
return;
free_kctls(spec);
snd_array_free(&spec->paths);
snd_array_free(&spec->loopback_list);
+#ifdef CONFIG_SND_HDA_GENERIC_LEDS
+ if (spec->led_cdevs[LED_AUDIO_MUTE])
+ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]);
+ if (spec->led_cdevs[LED_AUDIO_MICMUTE])
+ led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]);
+#endif
}
/*
@@ -3922,7 +3930,9 @@ static int create_mute_led_cdev(struct hda_codec *codec,
enum led_brightness),
bool micmute)
{
+ struct hda_gen_spec *spec = codec->spec;
struct led_classdev *cdev;
+ int err;
cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL);
if (!cdev)
@@ -3935,7 +3945,11 @@ static int create_mute_led_cdev(struct hda_codec *codec,
cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE);
cdev->flags = LED_CORE_SUSPENDRESUME;
- return devm_led_classdev_register(&codec->core.dev, cdev);
+ err = led_classdev_register(&codec->core.dev, cdev);
+ if (err < 0)
+ return err;
+ spec->led_cdevs[micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE] = cdev;
+ return 0;
}
/**
@@ -5998,7 +6012,7 @@ EXPORT_SYMBOL_GPL(snd_hda_gen_init);
void snd_hda_gen_free(struct hda_codec *codec)
{
snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_FREE);
- snd_hda_gen_spec_free(codec->spec);
+ snd_hda_gen_spec_free(codec);
kfree(codec->spec);
codec->spec = NULL;
}
diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
index 8e1bc8ea74fc..34eba40cc6e6 100644
--- a/sound/pci/hda/hda_generic.h
+++ b/sound/pci/hda/hda_generic.h
@@ -294,6 +294,9 @@ struct hda_gen_spec {
struct hda_jack_callback *cb);
void (*mic_autoswitch_hook)(struct hda_codec *codec,
struct hda_jack_callback *cb);
+
+ /* leds */
+ struct led_classdev *led_cdevs[NUM_AUDIO_LEDS];
};
/* values for add_stereo_mix_input flag */
On Fri, Jan 14, 2022 at 05:37:42PM +0100, Takashi Iwai wrote:
> > I assume that the referred repository is the one at [1]. I've tried
> > 081c73701ef0 "ALSA: hda: intel-dsp-config: reorder the config
> > table". It crashed with nearly identical logs.
>
> OK, then it's still something to do with the led cdev
> unregisteration.
>
> Could you try the patch below?
This patch solved the BUG problem. But after unbind/bind micmute LED stopped
working. Speakers and mute LED are fine though.
Dmesg:
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff840 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff840 flags=0x0020]
...
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:90170118
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:411111f0
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:270300
...
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffb80 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffb80 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffba0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffba0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffba0 flags=0x0020]
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:80000000
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffba0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffbc0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffbc0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffbc0 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffffbc0 flags=0x0020]
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:20010b
> > I got some time to poke the internal microphone. It works, but
> > oddities are there as well. Initially I get "Mic Boost", "Capture" and
> > "Internal Mic Boost" controls in alsamixer. When I run arecord,
> > "Digital" control appears, but it cannot be changed until arecord is
> > stopped. Subsequent arecord calls do not lock "Digital" control. This
> > control affects sensitivity of the microphone and seems useful.
>
> That's presumably an alsa-lib softvol stuff. It's created
> dynamically. So not really a kernel problem.
Okay, that's good to know.
On Fri, 14 Jan 2022 19:37:20 +0100,
Alexander Sergeyev wrote:
>
> On Fri, Jan 14, 2022 at 05:37:42PM +0100, Takashi Iwai wrote:
> > > I assume that the referred repository is the one at [1]. I've tried
> > > 081c73701ef0 "ALSA: hda: intel-dsp-config: reorder the config
> > > table". It crashed with nearly identical logs.
> >
> > OK, then it's still something to do with the led cdev
> > unregisteration.
> >
> > Could you try the patch below?
>
> This patch solved the BUG problem. But after unbind/bind micmute LED stopped
> working. Speakers and mute LED are fine though.
Does the corresponding sysfs entry exist in /sys/class/leds/*?
And can you control LED over there?
> Dmesg:
> snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
Hmm, that looks bad. Something must be accessing out of bound.
> snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:90170118
> snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:411111f0
> snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:270300
This seems to be a bogus COEF. But I have no idea from where this
comes. The values look completely wrong.
I guess you'd need to put some debug prints to trace down how those
are triggered...
Takashi
On Sat, Jan 15, 2022 at 08:55:28AM +0100, Takashi Iwai wrote:
> > This patch solved the BUG problem. But after unbind/bind micmute LED
> > stopped working. Speakers and mute LED are fine though.
> Does the corresponding sysfs entry exist in /sys/class/leds/*?
Yes.
> And can you control LED over there?
After "out of range cmd" messages the sysfs entry remains present, but writing
ones to the brightness file stops to produce any effect.
Actually, the timing issues are present here as well. Sometimes unbind & bind
works fine. But fails on the second round.
> This seems to be a bogus COEF. But I have no idea from where this
> comes. The values look completely wrong.
> I guess you'd need to put some debug prints to trace down how those
> are triggered...
Okay, I'll try. It's going to take some time though.
On Sat, 15 Jan 2022 16:22:15 +0100,
Alexander Sergeyev wrote:
>
> On Sat, Jan 15, 2022 at 08:55:28AM +0100, Takashi Iwai wrote:
> > > This patch solved the BUG problem. But after unbind/bind micmute LED
> > > stopped working. Speakers and mute LED are fine though.
> > Does the corresponding sysfs entry exist in /sys/class/leds/*?
>
> Yes.
>
> > And can you control LED over there?
>
> After "out of range cmd" messages the sysfs entry remains present, but writing
> ones to the brightness file stops to produce any effect.
>
> Actually, the timing issues are present here as well. Sometimes unbind & bind
> works fine. But fails on the second round.
Here "fails" means the kernel Oops / crash? If yes, the back trace
would be helpful.
thanks,
Takashi
On Wed, Jan 19, 2022 at 10:12:43AM +0100, Takashi Iwai wrote:
> > Actually, the timing issues are present here as well. Sometimes unbind &
> > bind works fine. But fails on the second round.
>
> Here "fails" means the kernel Oops / crash? If yes, the back trace
> would be helpful.
No, I mean "IO_PAGE_FAULT" and "out of range cmd" don't appear on every unbind
& bind. Sometimes it works cleanly.
Backtraces for the source of "out of range cmd" messages:
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -231,6 +231,7 @@ static unsigned int snd_hdac_make_cmd(struct hdac_device *codec, hda_nid_t nid,
(verb & ~0xfff) || (parm & ~0xffff)) {
dev_err(&codec->dev, "out of range cmd %x:%x:%x:%x\n",
addr, nid, verb, parm);
+ dump_stack();
return -1;
}
give me the following:
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:90170118
Workqueue: events set_brightness_delayed
Call Trace:
<TASK>
dump_stack_lvl+0x34/0x4c
snd_hdac_make_cmd.cold+0x17/0x2c
snd_hdac_codec_write+0x16/0x60
coef_mute_led_set+0x3a/0x60
set_brightness_delayed+0x6f/0xb0
process_one_work+0x1e1/0x380
worker_thread+0x4e/0x3b0
? rescuer_thread+0x370/0x370
kthread+0x145/0x170
? set_kthread_struct+0x50/0x50
ret_from_fork+0x22/0x30
</TASK>
I'll have more time to look into this deeper later this week.
On Wed, Jan 19, 2022 at 12:32:51PM +0300, Alexander Sergeyev wrote:
> No, I mean "IO_PAGE_FAULT" and "out of range cmd" don't appear on every
> unbind & bind. Sometimes it works cleanly.
Unbind & bind generally works without error messages on the first attempt. The
second unbind & bind tends to generate io page faults like "AMD-Vi: Event
logged [IO_PAGE_FAULT ...]", but might work fine as well.
In some cases "snd_hda_codec_realtek: out of range cmd" errors are triggered in
addition to io page faults.
> This seems to be a bogus COEF. But I have no idea from where this
> comes. The values look completely wrong.
In such cases unexpected COEF values are coming from COEF reads during
read/write pairs that implement update operations.
For example (these traces are from added printk statements):
alc_update_coef_led(codec, led {.idx=0x19, .mask=0x2000, .on=0x2000, .off=0x0}, polarity=0, on=1)
alc_update_coefex_idx(codec, nid, coef_idx=0x19, mask=0x2000, bits_set=0x2000)
alc_update_coef_led(codec, led {.idx=0xb, .mask=0x8, .on=0x8, .off=0x0}, polarity=0, on=1)
alc_update_coefex_idx(codec, nid, coef_idx=0xb, mask=0x8, bits_set=0x8)
snd_hdac_codec_write(hdac, nid, flags=0, verb=0x500, parm=0x19) = 0x0
snd_hdac_codec_write(hdac, nid, flags=0, verb=0x500, parm=0xb) = 0x0
snd_hdac_codec_read(hdac, nid, flags=0, verb=0xc00, parm=0x0) = 0x90170110
alc_update_coefex_idx: alc_read_coefex_idx(codec, nid, coef_idx=0xb) returned 0x90170110
alc_update_coefex_idx: calling alc_write_coefex_idx(codec, nid, coef_idx=0xb, coef_val=0x90170118)
snd_hdac_codec_read(hdac, nid, flags=0, verb=0xc00, parm=0x0) = 0x0
alc_update_coefex_idx: alc_read_coefex_idx(codec, nid, coef_idx=0x19) returned 0x0
alc_update_coefex_idx: calling alc_write_coefex_idx(codec, nid, coef_idx=0x19, coef_val=0x2000)
snd_hdac_codec_write(hdac, nid, flags=0, verb=0x500, parm=0xb) = 0x0
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:90170118
snd_hdac_codec_write(hdac, nid, flags=0, verb=0x400, parm=0x90170118) = 0xffffffff
Then I've specifically added printk on alc_update_coefex_idx entry and exit:
[4.036211] alc_update_coefex_idx(codec, nid, coef_idx=0x10, mask=0x200, bits_set=0x0): entering
[4.036503] alc_update_coefex_idx(codec, nid, coef_idx=0x10, mask=0x200, bits_set=0x0): exiting
[4.036543] alc_update_coefex_idx(codec, nid, coef_idx=0xb, mask=0x8, bits_set=0x0): entering
[4.036546] alc_update_coefex_idx(codec, nid, coef_idx=0x19, mask=0x2000, bits_set=0x0): entering
[4.037139] alc_update_coefex_idx(codec, nid, coef_idx=0xb, mask=0x8, bits_set=0x0): exiting
[4.037609] alc_update_coefex_idx(codec, nid, coef_idx=0x19, mask=0x2000, bits_set=0x0): exiting
I'm not sure about kernel log buffering or maybe the device support for
pipelining, but is it okay that alc_update_coefex_idx() seem to overlap?
On Sat, Jan 22, 2022 at 10:05:24PM +0300, Alexander Sergeyev wrote:
> I'm not sure about kernel log buffering or maybe the device support for
> pipelining, but is it okay that alc_update_coefex_idx() seem to overlap?
Given that the CPU number is the same in alc_update_coefex_idx(), it seems
these calls execution is interrupted and interleaved on the same core.
And, actually, there are two LEDs to set (mute and micmute). Am I onto
something here?
[4.043235] alc_update_coefex_idx(codec, nid, coef_idx=0xb, mask=0x8, bits_set=0x0): entering
[4.043237] CPU: 0 PID: 18 Comm: kworker/0:1 Tainted: G T 5.16.0-rc1-00001-g5c38c8c84e47-dirty #18
[4.043910] Hardware name: HP HP EliteBook 855 G8 Notebook PC/8895, BIOS T82 Ver. 01.06.03 09/17/2021
[4.044559] Workqueue: events set_brightness_delayed
[4.044559] Call Trace:
[4.044559] <TASK>
[4.046289] dump_stack_lvl+0x34/0x4c
[4.046289] alc_update_coefex_idx+0x34/0x7a
[4.046289] coef_mute_led_set+0x48/0x56
[4.046289] set_brightness_delayed+0x6f/0xb0
[4.046289] process_one_work+0x1e1/0x380
[4.046289] worker_thread+0x4e/0x3b0
[4.046289] ? rescuer_thread+0x370/0x370
[4.046289] kthread+0x145/0x170
[4.046289] ? set_kthread_struct+0x50/0x50
[4.046289] ret_from_fork+0x22/0x30
[4.046289] </TASK>
[4.052793] alc_update_coefex_idx(codec, nid, coef_idx=0x19, mask=0x2000, bits_set=0x0): entering
[4.052794] CPU: 0 PID: 171 Comm: kworker/0:2 Tainted: G T 5.16.0-rc1-00001-g5c38c8c84e47-dirty #18
[4.053363] Hardware name: HP HP EliteBook 855 G8 Notebook PC/8895, BIOS T82 Ver. 01.06.03 09/17/2021
[4.053364] Workqueue: events set_brightness_delayed
[4.053366] Call Trace:
[4.053366] <TASK>
[4.053367] dump_stack_lvl+0x34/0x4c
[4.053790] alc_update_coefex_idx+0x34/0x7a
[4.053790] coef_micmute_led_set+0x48/0x56
[4.053790] set_brightness_delayed+0x6f/0xb0
[4.053790] process_one_work+0x1e1/0x380
[4.053790] worker_thread+0x4e/0x3b0
[4.053790] ? rescuer_thread+0x370/0x370
[4.053790] kthread+0x145/0x170
[4.053790] ? set_kthread_struct+0x50/0x50
[4.053790] ret_from_fork+0x22/0x30
[4.053790] </TASK>
On Sat, 22 Jan 2022 21:56:37 +0100,
Alexander Sergeyev wrote:
>
> On Sat, Jan 22, 2022 at 10:05:24PM +0300, Alexander Sergeyev wrote:
> > I'm not sure about kernel log buffering or maybe the device support for
> > pipelining, but is it okay that alc_update_coefex_idx() seem to overlap?
>
> Given that the CPU number is the same in alc_update_coefex_idx(), it seems
> these calls execution is interrupted and interleaved on the same core.
>
> And, actually, there are two LEDs to set (mute and micmute). Am I onto
> something here?
That's an interesting finding, and yes, such a race is quite
possible. Below is a quick fix as an attempt to cover it.
Could you give it a try?
BTW, the fix for the unbind problem was submitted. It's a slightly
more simplified version than what I posted here beforehand.
thanks,
Takashi
-- 8< --
From: Takashi Iwai <[email protected]>
Subject: [PATCH] ALSA: hda: realtek: Fix race at concurrent COEF updates
The COEF access is done with two steps: setting the index then read or
write the data. When multiple COEF accesses are performed
concurrently, the index and data might be paired unexpectedly.
In most cases, this isn't a big problem as the COEF setup is done at
the initialization, but some dynamic changes like the mute LED may hit
such a race.
For avoiding the racy COEF accesses, this patch introduces a new
mutex coef_mutex to alc_spec, and wrap the COEF accessing functions
with it.
Signed-off-by: Takashi Iwai <[email protected]>
---
sound/pci/hda/patch_realtek.c | 61 ++++++++++++++++++++++++++++-------
1 file changed, 50 insertions(+), 11 deletions(-)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 668274e52674..a5677be0a405 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -98,6 +98,7 @@ struct alc_spec {
unsigned int gpio_mic_led_mask;
struct alc_coef_led mute_led_coef;
struct alc_coef_led mic_led_coef;
+ struct mutex coef_mutex;
hda_nid_t headset_mic_pin;
hda_nid_t headphone_mic_pin;
@@ -137,8 +138,8 @@ struct alc_spec {
* COEF access helper functions
*/
-static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
- unsigned int coef_idx)
+static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx)
{
unsigned int val;
@@ -147,28 +148,61 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
return val;
}
+static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx)
+{
+ struct alc_spec *spec = codec->spec;
+ unsigned int val;
+
+ mutex_lock(&spec->coef_mutex);
+ val = __alc_read_coefex_idx(codec, nid, coef_idx);
+ mutex_unlock(&spec->coef_mutex);
+ return val;
+}
+
#define alc_read_coef_idx(codec, coef_idx) \
alc_read_coefex_idx(codec, 0x20, coef_idx)
-static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
- unsigned int coef_idx, unsigned int coef_val)
+static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int coef_val)
{
snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx);
snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val);
}
+static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int coef_val)
+{
+ struct alc_spec *spec = codec->spec;
+
+ mutex_lock(&spec->coef_mutex);
+ __alc_write_coefex_idx(codec, nid, coef_idx, coef_val);
+ mutex_unlock(&spec->coef_mutex);
+}
+
#define alc_write_coef_idx(codec, coef_idx, coef_val) \
alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val)
+static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
+ unsigned int coef_idx, unsigned int mask,
+ unsigned int bits_set)
+{
+ unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx);
+
+ if (val != -1)
+ __alc_write_coefex_idx(codec, nid, coef_idx,
+ (val & ~mask) | bits_set);
+}
+
static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid,
unsigned int coef_idx, unsigned int mask,
unsigned int bits_set)
{
- unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx);
+ struct alc_spec *spec = codec->spec;
- if (val != -1)
- alc_write_coefex_idx(codec, nid, coef_idx,
- (val & ~mask) | bits_set);
+ mutex_lock(&spec->coef_mutex);
+ __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set);
+ mutex_unlock(&spec->coef_mutex);
}
#define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \
@@ -201,13 +235,17 @@ struct coef_fw {
static void alc_process_coef_fw(struct hda_codec *codec,
const struct coef_fw *fw)
{
+ struct alc_spec *spec = codec->spec;
+
+ mutex_lock(&spec->coef_mutex);
for (; fw->nid; fw++) {
if (fw->mask == (unsigned short)-1)
- alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
+ __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val);
else
- alc_update_coefex_idx(codec, fw->nid, fw->idx,
- fw->mask, fw->val);
+ __alc_update_coefex_idx(codec, fw->nid, fw->idx,
+ fw->mask, fw->val);
}
+ mutex_unlock(&spec->coef_mutex);
}
/*
@@ -1153,6 +1191,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid)
codec->spdif_status_reset = 1;
codec->forced_resume = 1;
codec->patch_ops = alc_patch_ops;
+ mutex_init(&spec->coef_mutex);
err = alc_codec_rename_from_preset(codec);
if (err < 0) {
--
2.31.1
On Wed, Jan 26, 2022 at 04:24:36PM +0100, Takashi Iwai wrote:
> > Given that the CPU number is the same in alc_update_coefex_idx(), it seems
> > these calls execution is interrupted and interleaved on the same core.
> > And, actually, there are two LEDs to set (mute and micmute). Am I onto
> > something here?
> That's an interesting finding, and yes, such a race is quite
> possible. Below is a quick fix as an attempt to cover it.
> Could you give it a try?
Well, results are somewhat mixed.
With the supplied patch (with a mutex), the original fixup 91502a9a0b0d ("ALSA:
hda/realtek: fix speakers and micmute on HP 855 G8") is no longer needed for
speakers to work. So, the original timing issue is identified now.
But unbind-bind problems with IO_PAGE_FAULT and "out of range cmd" are not
eliminated. IO_PAGE_FAULT are often logged without accompanying "out of range
cmd". And after adding debugging printk() I haven't managed to trigger "out of
range cmd" yet. But IO_PAGE_FAULT are more easily triggered.
Are there ways to trace origins of IO_PAGE_FAULT itself?
On Sat, Jan 29, 2022 at 05:47:07PM +0300, Alexander Sergeyev wrote:
> But unbind-bind problems with IO_PAGE_FAULT and "out of range cmd" are not
> eliminated. IO_PAGE_FAULT are often logged without accompanying "out of range
> cmd". And after adding debugging printk() I haven't managed to trigger "out
> of range cmd" yet. But IO_PAGE_FAULT are more easily triggered.
IO_PAGE_FAULTs go away with CONFIG_IOMMU_DEFAULT_PASSTHROUGH enabled. As I
understand, this leads to reduced DMA device isolation which is generally not
desirable. I was initially thinking about races between some delayed code and
io-memory pages unmapping, but first IO_PAGE_FAULTs (running non-passthrough
iommu) happen during bind operations as well.
What is also interesting, unbind & bind consistently fails on 31th bind:
echo -n '0000:05:00.6' > /sys/bus/pci/drivers/snd_hda_intel/bind
-bash: echo: write error: No such device
And does not recover from there until a reboot.
On Sun, 30 Jan 2022 12:10:20 +0100,
Alexander Sergeyev wrote:
>
> On Sat, Jan 29, 2022 at 05:47:07PM +0300, Alexander Sergeyev wrote:
> > But unbind-bind problems with IO_PAGE_FAULT and "out of range cmd" are not
> > eliminated. IO_PAGE_FAULT are often logged without accompanying "out of range
> > cmd". And after adding debugging printk() I haven't managed to trigger "out
> > of range cmd" yet. But IO_PAGE_FAULT are more easily triggered.
>
> IO_PAGE_FAULTs go away with CONFIG_IOMMU_DEFAULT_PASSTHROUGH enabled. As I
> understand, this leads to reduced DMA device isolation which is generally not
> desirable. I was initially thinking about races between some delayed code and
> io-memory pages unmapping, but first IO_PAGE_FAULTs (running non-passthrough
> iommu) happen during bind operations as well.
That's logical, as IOMMU is bypassed for DMA with that option.
I still don't get what really triggers it. This won't happen when you
build modules and load/unload the driver instead of dynamic binding?
And the out-of-range access is puzzling, too. I guess this comes from
the update of a COEF, i.e. it reads a strange value and tries to
update the value based on it. The problem is that it's no -1 but some
random value. This might be tied with the IOMMU error, and it might
reading unexpected address, which would be really bad.
In anyway, we need to track down exactly which access triggers those
errors...
> What is also interesting, unbind & bind consistently fails on 31th bind:
>
> echo -n '0000:05:00.6' > /sys/bus/pci/drivers/snd_hda_intel/bind
> -bash: echo: write error: No such device
>
> And does not recover from there until a reboot.
This is intended behavior. The driver has a static device index that
is incremented at each probe, so that the driver may probe multiple
instances. It'll be tricky to reset this for dynamic binding,
though. This problem is not only for HD-audio but for most of other
drivers. But I leave this as is for now, since the dynamic binding is
rarely used for PCI and other buses, so far.
Takashi
On Mon, Jan 31, 2022 at 03:57:04PM +0100, Takashi Iwai wrote:
> > IO_PAGE_FAULTs go away with CONFIG_IOMMU_DEFAULT_PASSTHROUGH enabled. As I
> > understand, this leads to reduced DMA device isolation which is generally
> > not desirable. I was initially thinking about races between some delayed
> > code and io-memory pages unmapping, but first IO_PAGE_FAULTs (running
> > non-passthrough iommu) happen during bind operations as well.
> I still don't get what really triggers it. This won't happen when you
> build modules and load/unload the driver instead of dynamic binding?
I've built snd_hda_intel as a module, everything else is left built-in.
The initial modprobe and rmmod were clean, but the subsequent modprobe gave
IO_PAGE_FAULT.
# modprobe snd-hda-intel
snd_hda_intel 0000:05:00.1: bound 0000:05:00.0 (ops amdgpu_dm_audio_component_bind_ops)
input: HD-Audio Generic HDMI/DP,pcm=3 as /devices/pci0000:00/0000:00:08.1/0000:05:00.1/sound/card0/input24
input: HD-Audio Generic HDMI/DP,pcm=7 as /devices/pci0000:00/0000:00:08.1/0000:05:00.1/sound/card0/input25
input: HD-Audio Generic HDMI/DP,pcm=8 as /devices/pci0000:00/0000:00:08.1/0000:05:00.1/sound/card0/input26
input: HD-Audio Generic HDMI/DP,pcm=9 as /devices/pci0000:00/0000:00:08.1/0000:05:00.1/sound/card0/input27
snd_hda_codec_realtek hdaudioC1D0: autoconfig for ALC285: line_outs=1 (0x14/0x0/0x0/0x0/0x0) type:speaker
snd_hda_codec_realtek hdaudioC1D0: speaker_outs=0 (0x0/0x0/0x0/0x0/0x0)
snd_hda_codec_realtek hdaudioC1D0: hp_outs=1 (0x21/0x0/0x0/0x0/0x0)
snd_hda_codec_realtek hdaudioC1D0: mono: mono_out=0x0
snd_hda_codec_realtek hdaudioC1D0: inputs:
snd_hda_codec_realtek hdaudioC1D0: Mic=0x19
snd_hda_codec_realtek hdaudioC1D0: Internal Mic=0x12
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
On Sat, 05 Feb 2022 18:51:32 +0100,
Alexander Sergeyev wrote:
>
> On Mon, Jan 31, 2022 at 03:57:04PM +0100, Takashi Iwai wrote:
> > In anyway, we need to track down exactly which access triggers those
> > errors...
>
> I went deeper into codec reads and writes:
> - snd_hda_codec_write
> - snd_hdac_codec_write
> - codec_write
> - snd_hdac_exec_verb
> - codec_exec_verb
> - snd_hdac_bus_exec_verb_unlocked
> - azx_send_cmd / azx_get_response
> - snd_hdac_bus_send_cmd / azx_rirb_get_response
>
> In the last functions a circular buffer is used to write commands. The
> problem is that "bus->corb.buf[wp]" and "bus->rirb.res[addr]" are
> nowhere close to the IOMMU-reported address of the offending memory
> access. It's likely that I've missed other communication channels. But
> is it possible that IOMMU-reported address and buffers addresses are of
> different kinds (physical/virtual) or different regions mapped to the
> same physical pages?
>
> Example:
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x3b8000, wp=0xfb, &buf[wp]=00000000f1fd4592
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x339000, wp=0xfc, &buf[wp]=000000007f14c128
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x1470740, wp=0xfd, &buf[wp]=00000000a6b14901
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14ba000, wp=0xfe, &buf[wp]=00000000d8d1672a
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14b8000, wp=0xff, &buf[wp]=00000000b87b3287
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2ba000, wp=0x0, &buf[wp]=000000002162c728
> snd_hdac_bus_get_response: reading result from 0000000059c4003d
> snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2b8000, wp=0x1, &buf[wp]=0000000095f61061
> snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
Hm, I'm not sure, either. But let's try to avoid some possible
confusion at first, e.g. a patch like below.
Takashi
-- 8< --
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index f7bd6e2db085..074199aa73ea 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -618,7 +618,7 @@ int snd_hdac_bus_alloc_stream_pages(struct hdac_bus *bus)
if (WARN_ON(!num_streams))
return -EINVAL;
/* allocate memory for the position buffer */
- err = snd_dma_alloc_pages(dma_type, bus->dev,
+ err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, bus->dev,
num_streams * 8, &bus->posbuf);
if (err < 0)
return -ENOMEM;
@@ -626,7 +626,7 @@ int snd_hdac_bus_alloc_stream_pages(struct hdac_bus *bus)
s->posbuf = (__le32 *)(bus->posbuf.area + s->index * 8);
/* single page (at least 4096 bytes) must suffice for both ringbuffes */
- return snd_dma_alloc_pages(dma_type, bus->dev, PAGE_SIZE, &bus->rb);
+ return snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, bus->dev, PAGE_SIZE, &bus->rb);
}
EXPORT_SYMBOL_GPL(snd_hdac_bus_alloc_stream_pages);
On Mon, Jan 31, 2022 at 03:57:04PM +0100, Takashi Iwai wrote:
> In anyway, we need to track down exactly which access triggers those
> errors...
I went deeper into codec reads and writes:
- snd_hda_codec_write
- snd_hdac_codec_write
- codec_write
- snd_hdac_exec_verb
- codec_exec_verb
- snd_hdac_bus_exec_verb_unlocked
- azx_send_cmd / azx_get_response
- snd_hdac_bus_send_cmd / azx_rirb_get_response
In the last functions a circular buffer is used to write commands. The
problem is that "bus->corb.buf[wp]" and "bus->rirb.res[addr]" are
nowhere close to the IOMMU-reported address of the offending memory
access. It's likely that I've missed other communication channels. But
is it possible that IOMMU-reported address and buffers addresses are of
different kinds (physical/virtual) or different regions mapped to the
same physical pages?
Example:
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x3b8000, wp=0xfb, &buf[wp]=00000000f1fd4592
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x339000, wp=0xfc, &buf[wp]=000000007f14c128
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x1470740, wp=0xfd, &buf[wp]=00000000a6b14901
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14ba000, wp=0xfe, &buf[wp]=00000000d8d1672a
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14b8000, wp=0xff, &buf[wp]=00000000b87b3287
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2ba000, wp=0x0, &buf[wp]=000000002162c728
snd_hdac_bus_get_response: reading result from 0000000059c4003d
snd_hdac_bus_send_cmd: bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2b8000, wp=0x1, &buf[wp]=0000000095f61061
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
On Tue, Feb 08, 2022 at 03:36:08PM +0100, Takashi Iwai wrote:
> ... and here is a fix patch for allowing more rebinds.
> Give it a try.
It works, no problems with large numbers of rebinds.
> -- 8< --
> From: Takashi Iwai <[email protected]>
> Subject: [PATCH] ALSA: hda: Fix driver index handling at re-binding
>
> HD-audio driver handles the multiple instances and keeps the static
> index that is incremented at each probe. This becomes a problem when
> user tries to re-bind the device via sysfs multiple times; as the
> device index isn't cleared unlike rmmod case, it points to the next
> element at re-binding, and eventually later you can't probe any more
> when it reaches to SNDRV_CARDS_MAX (usually 32).
>
> This patch is an attempt to improve the handling at rebinding.
> Instead of a static device index, now we keep a bitmap and assigns to
> the first zero bit position. At the driver remove, in return, the
> bitmap slot is cleared again, so that it'll be available for the next
> probe.
>
> Reported-by: Alexander Sergeyev <[email protected]>
> Signed-off-by: Takashi Iwai <[email protected]>
> ---
> sound/pci/hda/hda_intel.c | 10 +++++++---
> 1 file changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
> index 4b0338c4c543..a2922233e85f 100644
> --- a/sound/pci/hda/hda_intel.c
> +++ b/sound/pci/hda/hda_intel.c
> @@ -2064,14 +2064,16 @@ static const struct hda_controller_ops pci_hda_ops = {
> .position_check = azx_position_check,
> };
>
> +static DECLARE_BITMAP(probed_devs, SNDRV_CARDS);
> +
> static int azx_probe(struct pci_dev *pci,
> const struct pci_device_id *pci_id)
> {
> - static int dev;
> struct snd_card *card;
> struct hda_intel *hda;
> struct azx *chip;
> bool schedule_probe;
> + int dev;
> int err;
>
> if (pci_match_id(driver_denylist, pci)) {
> @@ -2079,10 +2081,11 @@ static int azx_probe(struct pci_dev *pci,
> return -ENODEV;
> }
>
> + dev = find_first_zero_bit(probed_devs, SNDRV_CARDS);
> if (dev >= SNDRV_CARDS)
> return -ENODEV;
> if (!enable[dev]) {
> - dev++;
> + set_bit(dev, probed_devs);
> return -ENOENT;
> }
>
> @@ -2149,7 +2152,7 @@ static int azx_probe(struct pci_dev *pci,
> if (schedule_probe)
> schedule_delayed_work(&hda->probe_work, 0);
>
> - dev++;
> + set_bit(dev, probed_devs);
> if (chip->disabled)
> complete_all(&hda->probe_wait);
> return 0;
> @@ -2372,6 +2375,7 @@ static void azx_remove(struct pci_dev *pci)
> cancel_delayed_work_sync(&hda->probe_work);
> device_lock(&pci->dev);
>
> + clear_bit(chip->dev_index, probed_devs);
> pci_set_drvdata(pci, NULL);
> snd_card_free(card);
> }
> --
> 2.34.1
>
On Mon, Feb 07, 2022 at 03:21:58PM +0100, Takashi Iwai wrote:
> > In the last functions a circular buffer is used to write commands. The
> > problem is that "bus->corb.buf[wp]" and "bus->rirb.res[addr]" are nowhere
> > close to the IOMMU-reported address of the offending memory access. It's
> > likely that I've missed other communication channels. But is it possible
> > that IOMMU-reported address and buffers addresses are of different kinds
> > (physical/virtual) or different regions mapped to the same physical pages?
> Hm, I'm not sure, either. But let's try to avoid some possible
> confusion at first, e.g. a patch like below.
No changes with the patch applied. Also, I added logs for dma_type used:
snd_hdac_bus_alloc_stream_pages: dma_type = 2
snd_hdac_bus_alloc_stream_pages: dma_type = 2
snd_hdac_bus_alloc_stream_pages: dma_type = 2
Which matches SNDRV_DMA_TYPE_DEV, so the same behavior is expected.
I've noticed that the IO_PAGE_FAULT regularly comes shortly after the write
position overflows and restarts from 0, while after the driver binding the wp
starts from 1 and not 0. Correlation does not mean causation, through. A
similar overflow happens during the initial kernel bootup with no error
messages. An another way of looking on it -- the fault comes on wp=0x1, which
corresponds to the first re-used address in the buffer.
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14ba000, wp=0xfe, &buf[wp]=000000005b92167d
snd_hdac_bus_get_response: reading result from 0000000096c36d67
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14b8000, wp=0xff, &buf[wp]=00000000a91a3679
snd_hdac_bus_get_response: reading result from 0000000096c36d67
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2ba000, wp=0x0, &buf[wp]=000000002fda9222
snd_hdac_bus_get_response: reading result from 0000000096c36d67
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2b8000, wp=0x1, &buf[wp]=000000009747a629
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
And I finally got "out of range cmd", but logging is limited to IO addresses.
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14ba000, wp=0xfe, &buf[wp]=0000000036a02eae
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x14b8000, wp=0xff, &buf[wp]=00000000ce140303
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2ba000, wp=0x0, &buf[wp]=000000004c6aa283
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x2b8000, wp=0x1, &buf[wp]=000000002a825cc8
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x239000, wp=0x2, &buf[wp]=0000000078eca2cf
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x1970724, wp=0x3, &buf[wp]=00000000613071da
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x1270720, wp=0x4, &buf[wp]=000000006db33d93
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff800 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x8b2000, wp=0x5, &buf[wp]=000000002a3c7e90
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x836080, wp=0x6, &buf[wp]=00000000571d53bf
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x8b0000, wp=0x7, &buf[wp]=000000000a52a2af
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff820 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x835080, wp=0x8, &buf[wp]=00000000f139c302
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff840 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x23f000d, wp=0x9, &buf[wp]=000000003c565021
snd_hda_intel 0000:05:00.6: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0015 address=0x1fffff840 flags=0x0020]
snd_hdac_bus_get_response: reading result from 0000000037aa0724
...
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x205000b, wp=0x3e, &buf[wp]=000000002ce016ac
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x20c0000, wp=0x3f, &buf[wp]=000000003ad48d6f
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x8350a7, wp=0x40, &buf[wp]=0000000098c2fb2d
snd_hdac_bus_get_response: reading result from 0000000037aa0724
bus->corb.buf[wp] = cpu_to_le32(val) // = 0x205000b, wp=0x41, &buf[wp]=000000006e281f5b
snd_hdac_bus_get_response: reading result from 0000000037aa0724
snd_hda_codec_realtek hdaudioC1D0: out of range cmd 0:20:400:40600001
On Mon, 31 Jan 2022 15:57:04 +0100,
Takashi Iwai wrote:
>
> On Sun, 30 Jan 2022 12:10:20 +0100,
> Alexander Sergeyev wrote:
> > What is also interesting, unbind & bind consistently fails on 31th bind:
> >
> > echo -n '0000:05:00.6' > /sys/bus/pci/drivers/snd_hda_intel/bind
> > -bash: echo: write error: No such device
> >
> > And does not recover from there until a reboot.
>
> This is intended behavior. The driver has a static device index that
> is incremented at each probe, so that the driver may probe multiple
> instances. It'll be tricky to reset this for dynamic binding,
> though. This problem is not only for HD-audio but for most of other
> drivers. But I leave this as is for now, since the dynamic binding is
> rarely used for PCI and other buses, so far.
... and here is a fix patch for allowing more rebinds.
Give it a try.
Takashi
-- 8< --
From: Takashi Iwai <[email protected]>
Subject: [PATCH] ALSA: hda: Fix driver index handling at re-binding
HD-audio driver handles the multiple instances and keeps the static
index that is incremented at each probe. This becomes a problem when
user tries to re-bind the device via sysfs multiple times; as the
device index isn't cleared unlike rmmod case, it points to the next
element at re-binding, and eventually later you can't probe any more
when it reaches to SNDRV_CARDS_MAX (usually 32).
This patch is an attempt to improve the handling at rebinding.
Instead of a static device index, now we keep a bitmap and assigns to
the first zero bit position. At the driver remove, in return, the
bitmap slot is cleared again, so that it'll be available for the next
probe.
Reported-by: Alexander Sergeyev <[email protected]>
Signed-off-by: Takashi Iwai <[email protected]>
---
sound/pci/hda/hda_intel.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4b0338c4c543..a2922233e85f 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2064,14 +2064,16 @@ static const struct hda_controller_ops pci_hda_ops = {
.position_check = azx_position_check,
};
+static DECLARE_BITMAP(probed_devs, SNDRV_CARDS);
+
static int azx_probe(struct pci_dev *pci,
const struct pci_device_id *pci_id)
{
- static int dev;
struct snd_card *card;
struct hda_intel *hda;
struct azx *chip;
bool schedule_probe;
+ int dev;
int err;
if (pci_match_id(driver_denylist, pci)) {
@@ -2079,10 +2081,11 @@ static int azx_probe(struct pci_dev *pci,
return -ENODEV;
}
+ dev = find_first_zero_bit(probed_devs, SNDRV_CARDS);
if (dev >= SNDRV_CARDS)
return -ENODEV;
if (!enable[dev]) {
- dev++;
+ set_bit(dev, probed_devs);
return -ENOENT;
}
@@ -2149,7 +2152,7 @@ static int azx_probe(struct pci_dev *pci,
if (schedule_probe)
schedule_delayed_work(&hda->probe_work, 0);
- dev++;
+ set_bit(dev, probed_devs);
if (chip->disabled)
complete_all(&hda->probe_wait);
return 0;
@@ -2372,6 +2375,7 @@ static void azx_remove(struct pci_dev *pci)
cancel_delayed_work_sync(&hda->probe_work);
device_lock(&pci->dev);
+ clear_bit(chip->dev_index, probed_devs);
pci_set_drvdata(pci, NULL);
snd_card_free(card);
}
--
2.34.1