It is possible that during system boot when there multiple devices
attempting simultaneous initialization on a slow control bus the
download of firmware and tuning data may take a user perceivable amount
of time.
Adopt a pattern used in the ASoC driver and perform this activity in a
background thread so that interactive performance is not impaired.
Signed-off-by: Simon Trimmer <[email protected]>
---
sound/pci/hda/cs35l56_hda.c | 89 ++++++++++++++++++++++++++++++-------
sound/pci/hda/cs35l56_hda.h | 4 ++
2 files changed, 76 insertions(+), 17 deletions(-)
diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c
index 1a3f84599cb5..042c2407c007 100644
--- a/sound/pci/hda/cs35l56_hda.c
+++ b/sound/pci/hda/cs35l56_hda.c
@@ -13,6 +13,7 @@
#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <linux/slab.h>
+#include <linux/workqueue.h>
#include <sound/core.h>
#include <sound/cs-amp-lib.h>
#include <sound/hda_codec.h>
@@ -50,11 +51,19 @@ static const struct reg_sequence cs35l56_hda_dai_config[] = {
};
+static void cs35l56_hda_wait_dsp_ready(struct cs35l56_hda *cs35l56)
+{
+ /* Wait for patching to complete */
+ flush_work(&cs35l56->dsp_work);
+}
+
static void cs35l56_hda_play(struct cs35l56_hda *cs35l56)
{
unsigned int val;
int ret;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
pm_runtime_get_sync(cs35l56->base.dev);
ret = cs35l56_mbox_send(&cs35l56->base, CS35L56_MBOX_CMD_AUDIO_PLAY);
if (ret == 0) {
@@ -180,6 +189,8 @@ static int cs35l56_hda_mixer_get(struct snd_kcontrol *kcontrol,
unsigned int reg_val;
int i;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
regmap_read(cs35l56->base.regmap, kcontrol->private_value, ®_val);
reg_val &= CS35L56_ASP_TXn_SRC_MASK;
@@ -203,6 +214,8 @@ static int cs35l56_hda_mixer_put(struct snd_kcontrol *kcontrol,
if (item >= CS35L56_NUM_INPUT_SRC)
return -EINVAL;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
regmap_update_bits_check(cs35l56->base.regmap, kcontrol->private_value,
CS35L56_INPUT_MASK, cs35l56_tx_input_values[item],
&changed);
@@ -227,6 +240,8 @@ static int cs35l56_hda_posture_get(struct snd_kcontrol *kcontrol,
unsigned int pos;
int ret;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
ret = regmap_read(cs35l56->base.regmap, CS35L56_MAIN_POSTURE_NUMBER, &pos);
if (ret)
return ret;
@@ -248,6 +263,8 @@ static int cs35l56_hda_posture_put(struct snd_kcontrol *kcontrol,
(pos > CS35L56_MAIN_POSTURE_MAX))
return -EINVAL;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
ret = regmap_update_bits_check(cs35l56->base.regmap,
CS35L56_MAIN_POSTURE_NUMBER,
CS35L56_MAIN_POSTURE_MASK,
@@ -291,6 +308,8 @@ static int cs35l56_hda_vol_get(struct snd_kcontrol *kcontrol,
int vol;
int ret;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
ret = regmap_read(cs35l56->base.regmap, CS35L56_MAIN_RENDER_USER_VOLUME, &raw_vol);
if (ret)
@@ -323,6 +342,8 @@ static int cs35l56_hda_vol_put(struct snd_kcontrol *kcontrol,
raw_vol = (vol + CS35L56_MAIN_RENDER_USER_VOLUME_MIN) <<
CS35L56_MAIN_RENDER_USER_VOLUME_SHIFT;
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
ret = regmap_update_bits_check(cs35l56->base.regmap,
CS35L56_MAIN_RENDER_USER_VOLUME,
CS35L56_MAIN_RENDER_USER_VOLUME_MASK,
@@ -539,8 +560,9 @@ static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmw
kfree(coeff_filename);
}
-static void cs35l56_hda_add_dsp_controls(struct cs35l56_hda *cs35l56)
+static void cs35l56_hda_create_dsp_controls_work(struct work_struct *work)
{
+ struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, control_work);
struct hda_cs_dsp_ctl_info info;
info.device_name = cs35l56->amp_name;
@@ -566,7 +588,7 @@ static void cs35l56_hda_apply_calibration(struct cs35l56_hda *cs35l56)
dev_info(cs35l56->base.dev, "Calibration applied\n");
}
-static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
+static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
{
const struct firmware *coeff_firmware = NULL;
const struct firmware *wmfw_firmware = NULL;
@@ -574,15 +596,27 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
char *wmfw_filename = NULL;
unsigned int preloaded_fw_ver;
bool firmware_missing;
- int ret = 0;
+ bool add_dsp_controls_required = false;
+ int ret;
- /* Prepare for a new DSP power-up */
+ /*
+ * Prepare for a new DSP power-up. If the DSP has had firmware
+ * downloaded previously then it needs to be powered down so that it
+ * can be updated and if hadn't been patched before then the controls
+ * will need to be added once firmware download succeeds.
+ */
if (cs35l56->base.fw_patched)
cs_dsp_power_down(&cs35l56->cs_dsp);
+ else
+ add_dsp_controls_required = true;
cs35l56->base.fw_patched = false;
- pm_runtime_get_sync(cs35l56->base.dev);
+ ret = pm_runtime_resume_and_get(cs35l56->base.dev);
+ if (ret < 0) {
+ dev_err(cs35l56->base.dev, "Failed to resume and get %d\n", ret);
+ return;
+ }
/*
* The firmware can only be upgraded if it is currently running
@@ -606,7 +640,6 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
*/
if (!coeff_firmware && firmware_missing) {
dev_err(cs35l56->base.dev, ".bin file required but not found\n");
- ret = -ENOENT;
goto err_fw_release;
}
@@ -657,6 +690,15 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
CS35L56_FIRMWARE_MISSING);
cs35l56->base.fw_patched = true;
+ /*
+ * Adding controls is deferred to prevent a lock inversion - ALSA takes
+ * the controls_rwsem when adding a control, the get() / put()
+ * functions of a control are called holding controls_rwsem and those
+ * that depend on running firmware wait for dsp_work() to complete.
+ */
+ if (add_dsp_controls_required)
+ queue_work(cs35l56->dsp_wq, &cs35l56->control_work);
+
ret = cs_dsp_run(&cs35l56->cs_dsp);
if (ret)
dev_dbg(cs35l56->base.dev, "%s: cs_dsp_run ret %d\n", __func__, ret);
@@ -676,15 +718,19 @@ static int cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56)
coeff_firmware, coeff_filename);
err_pm_put:
pm_runtime_put(cs35l56->base.dev);
+}
- return ret;
+static void cs35l56_hda_dsp_work(struct work_struct *work)
+{
+ struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, dsp_work);
+
+ cs35l56_hda_fw_load(cs35l56);
}
static int cs35l56_hda_bind(struct device *dev, struct device *master, void *master_data)
{
struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev);
struct hda_component *comps = master_data;
- int ret;
if (!comps || cs35l56->index < 0 || cs35l56->index >= HDA_MAX_COMPONENTS)
return -EINVAL;
@@ -698,12 +744,9 @@ static int cs35l56_hda_bind(struct device *dev, struct device *master, void *mas
strscpy(comps->name, dev_name(dev), sizeof(comps->name));
comps->playback_hook = cs35l56_hda_playback_hook;
- ret = cs35l56_hda_fw_load(cs35l56);
- if (ret)
- return ret;
+ queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work);
cs35l56_hda_create_controls(cs35l56);
- cs35l56_hda_add_dsp_controls(cs35l56);
#if IS_ENABLED(CONFIG_SND_DEBUG)
cs35l56->debugfs_root = debugfs_create_dir(dev_name(cs35l56->base.dev), sound_debugfs_root);
@@ -720,6 +763,9 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void *
struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev);
struct hda_component *comps = master_data;
+ cancel_work_sync(&cs35l56->dsp_work);
+ cancel_work_sync(&cs35l56->control_work);
+
cs35l56_hda_remove_controls(cs35l56);
#if IS_ENABLED(CONFIG_SND_DEBUG)
@@ -747,6 +793,10 @@ static int cs35l56_hda_system_suspend(struct device *dev)
{
struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev);
+ cs35l56_hda_wait_dsp_ready(cs35l56);
+
+ flush_work(&cs35l56->control_work);
+
if (cs35l56->playing)
cs35l56_hda_pause(cs35l56);
@@ -842,11 +892,8 @@ static int cs35l56_hda_system_resume(struct device *dev)
ret = cs35l56_is_fw_reload_needed(&cs35l56->base);
dev_dbg(cs35l56->base.dev, "fw_reload_needed: %d\n", ret);
- if (ret > 0) {
- ret = cs35l56_hda_fw_load(cs35l56);
- if (ret)
- return ret;
- }
+ if (ret > 0)
+ queue_work(cs35l56->dsp_wq, &cs35l56->dsp_work);
if (cs35l56->playing)
cs35l56_hda_play(cs35l56);
@@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
mutex_init(&cs35l56->base.irq_lock);
dev_set_drvdata(cs35l56->base.dev, cs35l56);
+ cs35l56->dsp_wq = create_singlethread_workqueue("cs35l56-dsp");
+ if (!cs35l56->dsp_wq) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ INIT_WORK(&cs35l56->dsp_work, cs35l56_hda_dsp_work);
+ INIT_WORK(&cs35l56->control_work, cs35l56_hda_create_dsp_controls_work);
+
ret = cs35l56_hda_read_acpi(cs35l56, hid, id);
if (ret)
goto err;
diff --git a/sound/pci/hda/cs35l56_hda.h b/sound/pci/hda/cs35l56_hda.h
index 464e4aa63cd1..d207e4e0d167 100644
--- a/sound/pci/hda/cs35l56_hda.h
+++ b/sound/pci/hda/cs35l56_hda.h
@@ -14,6 +14,7 @@
#include <linux/firmware/cirrus/cs_dsp.h>
#include <linux/firmware/cirrus/wmfw.h>
#include <linux/regulator/consumer.h>
+#include <linux/workqueue.h>
#include <sound/cs35l56.h>
struct dentry;
@@ -21,6 +22,9 @@ struct dentry;
struct cs35l56_hda {
struct cs35l56_base base;
struct hda_codec *codec;
+ struct work_struct dsp_work;
+ struct work_struct control_work;
+ struct workqueue_struct *dsp_wq;
int index;
const char *system_name;
--
2.34.1
On Wed, 01 May 2024 13:17:55 +0200,
Simon Trimmer wrote:
> @@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
> mutex_init(&cs35l56->base.irq_lock);
> dev_set_drvdata(cs35l56->base.dev, cs35l56);
>
> + cs35l56->dsp_wq = create_singlethread_workqueue("cs35l56-dsp");
> + if (!cs35l56->dsp_wq) {
> + ret = -ENOMEM;
> + goto err;
> + }
Do we really need a dedicated workqueue? In most usages, simple
schedule_work*() works fine and is recommended.
thanks,
Takashi
On 02/05/2024 08:34, Takashi Iwai wrote:
> On Wed, 01 May 2024 13:17:55 +0200,
> Simon Trimmer wrote:
>> @@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
>> mutex_init(&cs35l56->base.irq_lock);
>> dev_set_drvdata(cs35l56->base.dev, cs35l56);
>>
>> + cs35l56->dsp_wq = create_singlethread_workqueue("cs35l56-dsp");
>> + if (!cs35l56->dsp_wq) {
>> + ret = -ENOMEM;
>> + goto err;
>> + }
>
> Do we really need a dedicated workqueue? In most usages, simple
> schedule_work*() works fine and is recommended.
>
On a slow I2C bus with 4 amps this work could take over 2 seconds.
That seems too long to be blocking a global system queue. We use a
dedicated queue in the ASoC driver.
Also if we queue work on an ordered (single-threaded) system queue the
firmware won't be downloaded to multiple amps in parallel, so we don't
get the best use of the available bus bandwidth.
>
> thanks,
>
> Takashi
On Thu, 02 May 2024 11:21:36 +0200,
Richard Fitzgerald wrote:
>
> On 02/05/2024 08:34, Takashi Iwai wrote:
> > On Wed, 01 May 2024 13:17:55 +0200,
> > Simon Trimmer wrote:
> >> @@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id)
> >> mutex_init(&cs35l56->base.irq_lock);
> >> dev_set_drvdata(cs35l56->base.dev, cs35l56);
> >> + cs35l56->dsp_wq =
> >> create_singlethread_workqueue("cs35l56-dsp");
> >> + if (!cs35l56->dsp_wq) {
> >> + ret = -ENOMEM;
> >> + goto err;
> >> + }
> >
> > Do we really need a dedicated workqueue? In most usages, simple
> > schedule_work*() works fine and is recommended.
> >
>
> On a slow I2C bus with 4 amps this work could take over 2 seconds.
> That seems too long to be blocking a global system queue. We use a
> dedicated queue in the ASoC driver.
>
> Also if we queue work on an ordered (single-threaded) system queue the
> firmware won't be downloaded to multiple amps in parallel, so we don't
> get the best use of the available bus bandwidth.
OK, that sounds like a sensible argument.
But the patch has no call of a queue destructor. Won't it leak
resources?
thanks,
Takashi
On Thu, 02 May 2024 12:17:48 +0200,
Simon Trimmer wrote:
>
> > -----Original Message-----
> > From: Takashi Iwai <[email protected]>
> > Sent: Thursday, May 2, 2024 10:53 AM
> > To: Richard Fitzgerald <[email protected]>
> > Cc: Simon Trimmer <[email protected]>; [email protected]; linux-
> > [email protected]; [email protected]; linux-
> > [email protected]; [email protected]
> > Subject: Re: [PATCH] ALSA: hda: cs35l56: Perform firmware download in the
> > background
> >
> > On Thu, 02 May 2024 11:21:36 +0200,
> > Richard Fitzgerald wrote:
> > >
> > > On 02/05/2024 08:34, Takashi Iwai wrote:
> > > > On Wed, 01 May 2024 13:17:55 +0200,
> > > > Simon Trimmer wrote:
> > > >> @@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct
> > cs35l56_hda *cs35l56, int hid, int id)
> > > >> mutex_init(&cs35l56->base.irq_lock);
> > > >> dev_set_drvdata(cs35l56->base.dev, cs35l56);
> > > >> + cs35l56->dsp_wq =
> > > >> create_singlethread_workqueue("cs35l56-dsp");
> > > >> + if (!cs35l56->dsp_wq) {
> > > >> + ret = -ENOMEM;
> > > >> + goto err;
> > > >> + }
> > > >
> > > > Do we really need a dedicated workqueue? In most usages, simple
> > > > schedule_work*() works fine and is recommended.
> > > >
> > >
> > > On a slow I2C bus with 4 amps this work could take over 2 seconds.
> > > That seems too long to be blocking a global system queue. We use a
> > > dedicated queue in the ASoC driver.
> > >
> > > Also if we queue work on an ordered (single-threaded) system queue the
> > > firmware won't be downloaded to multiple amps in parallel, so we don't
> > > get the best use of the available bus bandwidth.
> >
> > OK, that sounds like a sensible argument.
> >
> > But the patch has no call of a queue destructor. Won't it leak
> > resources?
>
> Oops that's a good spot - I missed that and will send a v2
If you submit a newer version, it'd be appreciated to explain about
the workqueue usage in the patch description, too.
thanks,
Takashi
> -----Original Message-----
> From: Takashi Iwai <[email protected]>
> Sent: Thursday, May 2, 2024 10:53 AM
> To: Richard Fitzgerald <[email protected]>
> Cc: Simon Trimmer <[email protected]>; [email protected]; linux-
> [email protected]; [email protected]; linux-
> [email protected]; [email protected]
> Subject: Re: [PATCH] ALSA: hda: cs35l56: Perform firmware download in the
> background
>
> On Thu, 02 May 2024 11:21:36 +0200,
> Richard Fitzgerald wrote:
> >
> > On 02/05/2024 08:34, Takashi Iwai wrote:
> > > On Wed, 01 May 2024 13:17:55 +0200,
> > > Simon Trimmer wrote:
> > >> @@ -964,6 +1011,14 @@ int cs35l56_hda_common_probe(struct
> cs35l56_hda *cs35l56, int hid, int id)
> > >> mutex_init(&cs35l56->base.irq_lock);
> > >> dev_set_drvdata(cs35l56->base.dev, cs35l56);
> > >> + cs35l56->dsp_wq =
> > >> create_singlethread_workqueue("cs35l56-dsp");
> > >> + if (!cs35l56->dsp_wq) {
> > >> + ret = -ENOMEM;
> > >> + goto err;
> > >> + }
> > >
> > > Do we really need a dedicated workqueue? In most usages, simple
> > > schedule_work*() works fine and is recommended.
> > >
> >
> > On a slow I2C bus with 4 amps this work could take over 2 seconds.
> > That seems too long to be blocking a global system queue. We use a
> > dedicated queue in the ASoC driver.
> >
> > Also if we queue work on an ordered (single-threaded) system queue the
> > firmware won't be downloaded to multiple amps in parallel, so we don't
> > get the best use of the available bus bandwidth.
>
> OK, that sounds like a sensible argument.
>
> But the patch has no call of a queue destructor. Won't it leak
> resources?
Oops that's a good spot - I missed that and will send a v2
Cheers,
-Simon