Hi,
I'm running 5.10.111 LTS, so if this has been fixed already then we definitely
want to cherry pick the fix for -stable.
Anonymous union in this struct is of zero size
/* generic control data */
struct sof_ipc_ctrl_data {
struct sof_ipc_reply rhdr;
uint32_t comp_id;
/* control access and data type */
uint32_t type; /**< enum sof_ipc_ctrl_type */
uint32_t cmd; /**< enum sof_ipc_ctrl_cmd */
uint32_t index; /**< control index for comps > 1 control */
/* control data - can either be appended or DMAed from host */
struct sof_ipc_host_buffer buffer;
uint32_t num_elems; /**< in array elems or bytes for data type */
uint32_t elems_remaining; /**< elems remaining if sent in parts */
uint32_t msg_index; /**< for large messages sent in parts */
/* reserved for future use */
uint32_t reserved[6];
/* control data - add new types if needed */
union {
/* channel values can be used by volume type controls */
struct sof_ipc_ctrl_value_chan chanv[0];
/* component values used by routing controls like mux, mixer */
struct sof_ipc_ctrl_value_comp compv[0];
/* data can be used by binary controls */
struct sof_abi_hdr data[0];
};
} __packed;
sof_ipc_ctrl_value_chan and sof_ipc_ctrl_value_comp are of the same
size - 8 bytes, while sof_abi_hdr is much larger - _at least_ 32 bytes
(`__u32 data[0]` in sof_abi_hdr suggest that there should be more
payload after header). But they all contribute 0 to sizeof(sof_ipc_ctrl_data).
Now control data allocations looks as follows
scontrol->size = struct_size(scontrol->control_data, chanv,
le32_to_cpu(mc->num_channels));
scontrol->control_data = kzalloc(scontrol->size, GFP_KERNEL);
Which is sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
For some reason it uses sizeof(sof_ipc_ctrl_value_chan), which is not
the largest member of the union.
And this is where the problem is: in order to make control->data.FOO loads
and stores legal we need mc->num_channels to be of at least 4. So that
sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
92 + 4 * 8
will be the same as
sizeof(sof_ipc_ctrl_data) + sizeof(sof_abi_hdr).
92 + 32
Otherwise scontrol->control_data->data.FOO will access nearby/foreign
slab object.
And there is at least one such memory access. In sof_get_control_data().
wdata[i].pdata = wdata[i].control->control_data->data;
*size += wdata[i].pdata->size;
pdata->size is at offset 8, but if, say, mc->num_channels == 1 then
we allocate only 8 bytes for pdata, so pdata->size is 4 bytes outside
of allocated slab object.
Thoughts?
On 4/19/22 06:50, Péter Ujfalusi wrote:
> Hi Sergey, Pierre,
>
> On 15/04/2022 19:00, Pierre-Louis Bossart wrote:
>> Thanks Sergey for this email.
>>
>> On 4/15/22 04:23, Sergey Senozhatsky wrote:
>>> Hi,
>>>
>>> I'm running 5.10.111 LTS, so if this has been fixed already then we definitely
>>> want to cherry pick the fix for -stable.
>
> I'm afraid, that this is still valid as of today, but in real life I
> don't think it can happen.
>
>>> Anonymous union in this struct is of zero size
>>>
>>> /* generic control data */
>>> struct sof_ipc_ctrl_data {
>>> struct sof_ipc_reply rhdr;
>>> uint32_t comp_id;
>>>
>>> /* control access and data type */
>>> uint32_t type; /**< enum sof_ipc_ctrl_type */
>>> uint32_t cmd; /**< enum sof_ipc_ctrl_cmd */
>>> uint32_t index; /**< control index for comps > 1 control */
>>>
>>> /* control data - can either be appended or DMAed from host */
>>> struct sof_ipc_host_buffer buffer;
>>> uint32_t num_elems; /**< in array elems or bytes for data type */
>>> uint32_t elems_remaining; /**< elems remaining if sent in parts */
>>>
>>> uint32_t msg_index; /**< for large messages sent in parts */
>>>
>>> /* reserved for future use */
>>> uint32_t reserved[6];
>>>
>>> /* control data - add new types if needed */
>>> union {
>>> /* channel values can be used by volume type controls */
>>> struct sof_ipc_ctrl_value_chan chanv[0];
>>> /* component values used by routing controls like mux, mixer */
>>> struct sof_ipc_ctrl_value_comp compv[0];
>>> /* data can be used by binary controls */
>>> struct sof_abi_hdr data[0];
>>> };
>>> } __packed;
>>>
>>> sof_ipc_ctrl_value_chan and sof_ipc_ctrl_value_comp are of the same
>>> size - 8 bytes, while sof_abi_hdr is much larger - _at least_ 32 bytes
>>> (`__u32 data[0]` in sof_abi_hdr suggest that there should be more
>>> payload after header). But they all contribute 0 to sizeof(sof_ipc_ctrl_data).
>>>
>>> Now control data allocations looks as follows
>>>
>>> scontrol->size = struct_size(scontrol->control_data, chanv,
>>> le32_to_cpu(mc->num_channels));
>>> scontrol->control_data = kzalloc(scontrol->size, GFP_KERNEL);
>>>
>>> Which is sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
>>>
>>> For some reason it uses sizeof(sof_ipc_ctrl_value_chan), which is not
>>> the largest member of the union.
>>>
>>> And this is where the problem is: in order to make control->data.FOO loads
>>> and stores legal we need mc->num_channels to be of at least 4. So that
>>>
>>> sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
>>>
>>> 92 + 4 * 8
>>>
>>> will be the same as
>>>
>>> sizeof(sof_ipc_ctrl_data) + sizeof(sof_abi_hdr).
>>>
>>> 92 + 32
>>>
>>> Otherwise scontrol->control_data->data.FOO will access nearby/foreign
>>> slab object.
>>>
>>> And there is at least one such memory access. In sof_get_control_data().
>>>
>>> wdata[i].pdata = wdata[i].control->control_data->data;
>>> *size += wdata[i].pdata->size;
>>>
>>>
>>> pdata->size is at offset 8, but if, say, mc->num_channels == 1 then
>>> we allocate only 8 bytes for pdata, so pdata->size is 4 bytes outside
>>> of allocated slab object.
>>>
>>> Thoughts?
>
> Your analyzes are spot on, unfortunately. But...
>
> As of today, the sof_get_control_data() is in the call path of
> (ipc3-topology.c):
>
> sof_widget_update_ipc_comp_process() -> sof_process_load() ->
> sof_get_control_data()
>
> sof_widget_update_ipc_comp_process() is the ipc_setup callback for
> snd_soc_dapm_effect. If I'm not mistaken these only carries bin payload
> and never MIXER/ENUM/SWITCH/VOLUME.
> This means that the sof_get_control_data() is only called with
> SND_SOC_TPLG_TYPE_BYTES and for that the allocated data area is correct.
>
> This can explain why we have not seen any issues so far. This does not
> renders the code right, as how it is written atm is wrong.
Sergey's results with KASAN show that there's a real-life problem though. I also don't understand how that might happen.
Could it be that these results are with a specific topology where our assumptions are incorrect?
> Now control data allocations looks as follows
>
> scontrol->size = struct_size(scontrol->control_data, chanv,
> le32_to_cpu(mc->num_channels));
> scontrol->control_data = kzalloc(scontrol->size, GFP_KERNEL);
>
> Which is sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
>
> For some reason it uses sizeof(sof_ipc_ctrl_value_chan), which is not
> the largest member of the union.
>
For the record, this could be hitting as far back as 5.4 as I have
been trying to debug an invalid IPC write in JSL.
On Tue, Apr 19, 2022 at 10:55 AM Pierre-Louis Bossart
<[email protected]> wrote:
>
>
>
> On 4/19/22 06:50, Péter Ujfalusi wrote:
> > Hi Sergey, Pierre,
> >
> > On 15/04/2022 19:00, Pierre-Louis Bossart wrote:
> >> Thanks Sergey for this email.
> >>
> >> On 4/15/22 04:23, Sergey Senozhatsky wrote:
> >>> Hi,
> >>>
> >>> I'm running 5.10.111 LTS, so if this has been fixed already then we definitely
> >>> want to cherry pick the fix for -stable.
> >
> > I'm afraid, that this is still valid as of today, but in real life I
> > don't think it can happen.
> >
> >>> Anonymous union in this struct is of zero size
> >>>
> >>> /* generic control data */
> >>> struct sof_ipc_ctrl_data {
> >>> struct sof_ipc_reply rhdr;
> >>> uint32_t comp_id;
> >>>
> >>> /* control access and data type */
> >>> uint32_t type; /**< enum sof_ipc_ctrl_type */
> >>> uint32_t cmd; /**< enum sof_ipc_ctrl_cmd */
> >>> uint32_t index; /**< control index for comps > 1 control */
> >>>
> >>> /* control data - can either be appended or DMAed from host */
> >>> struct sof_ipc_host_buffer buffer;
> >>> uint32_t num_elems; /**< in array elems or bytes for data type */
> >>> uint32_t elems_remaining; /**< elems remaining if sent in parts */
> >>>
> >>> uint32_t msg_index; /**< for large messages sent in parts */
> >>>
> >>> /* reserved for future use */
> >>> uint32_t reserved[6];
> >>>
> >>> /* control data - add new types if needed */
> >>> union {
> >>> /* channel values can be used by volume type controls */
> >>> struct sof_ipc_ctrl_value_chan chanv[0];
> >>> /* component values used by routing controls like mux, mixer */
> >>> struct sof_ipc_ctrl_value_comp compv[0];
> >>> /* data can be used by binary controls */
> >>> struct sof_abi_hdr data[0];
> >>> };
> >>> } __packed;
> >>>
> >>> sof_ipc_ctrl_value_chan and sof_ipc_ctrl_value_comp are of the same
> >>> size - 8 bytes, while sof_abi_hdr is much larger - _at least_ 32 bytes
> >>> (`__u32 data[0]` in sof_abi_hdr suggest that there should be more
> >>> payload after header). But they all contribute 0 to sizeof(sof_ipc_ctrl_data).
> >>>
> >>> Now control data allocations looks as follows
> >>>
> >>> scontrol->size = struct_size(scontrol->control_data, chanv,
> >>> le32_to_cpu(mc->num_channels));
> >>> scontrol->control_data = kzalloc(scontrol->size, GFP_KERNEL);
> >>>
> >>> Which is sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
> >>>
> >>> For some reason it uses sizeof(sof_ipc_ctrl_value_chan), which is not
> >>> the largest member of the union.
> >>>
> >>> And this is where the problem is: in order to make control->data.FOO loads
> >>> and stores legal we need mc->num_channels to be of at least 4. So that
> >>>
> >>> sizeof(sof_ipc_ctrl_data) + mc->num_channels * sizeof(sof_ipc_ctrl_value_chan)
> >>>
> >>> 92 + 4 * 8
> >>>
> >>> will be the same as
> >>>
> >>> sizeof(sof_ipc_ctrl_data) + sizeof(sof_abi_hdr).
> >>>
> >>> 92 + 32
> >>>
> >>> Otherwise scontrol->control_data->data.FOO will access nearby/foreign
> >>> slab object.
> >>>
> >>> And there is at least one such memory access. In sof_get_control_data().
> >>>
> >>> wdata[i].pdata = wdata[i].control->control_data->data;
> >>> *size += wdata[i].pdata->size;
> >>>
> >>>
> >>> pdata->size is at offset 8, but if, say, mc->num_channels == 1 then
> >>> we allocate only 8 bytes for pdata, so pdata->size is 4 bytes outside
> >>> of allocated slab object.
> >>>
> >>> Thoughts?
> >
> > Your analyzes are spot on, unfortunately. But...
> >
> > As of today, the sof_get_control_data() is in the call path of
> > (ipc3-topology.c):
> >
> > sof_widget_update_ipc_comp_process() -> sof_process_load() ->
> > sof_get_control_data()
> >
> > sof_widget_update_ipc_comp_process() is the ipc_setup callback for
> > snd_soc_dapm_effect. If I'm not mistaken these only carries bin payload
> > and never MIXER/ENUM/SWITCH/VOLUME.
> > This means that the sof_get_control_data() is only called with
> > SND_SOC_TPLG_TYPE_BYTES and for that the allocated data area is correct.
> >
> > This can explain why we have not seen any issues so far. This does not
> > renders the code right, as how it is written atm is wrong.
>
>
> Sergey's results with KASAN show that there's a real-life problem though. I also don't understand how that might happen.
>
> Could it be that these results are with a specific topology where our assumptions are incorrect?
>
That would align with our testing as we are seeing the failing on
exactly once device with a custom topology with a bytes payload. See
sof-jsl-rt5682.m4 with -DWAVES configured for
sof-jsl-rt5682-rt1015.tplg
On (22/04/19 08:07), Pierre-Louis Bossart wrote:
> > Your analyzes are spot on, unfortunately. But...
> >
> > As of today, the sof_get_control_data() is in the call path of
> > (ipc3-topology.c):
> >
> > sof_widget_update_ipc_comp_process() -> sof_process_load() ->
> > sof_get_control_data()
> >
> > sof_widget_update_ipc_comp_process() is the ipc_setup callback for
> > snd_soc_dapm_effect. If I'm not mistaken these only carries bin payload
> > and never MIXER/ENUM/SWITCH/VOLUME.
> > This means that the sof_get_control_data() is only called with
> > SND_SOC_TPLG_TYPE_BYTES and for that the allocated data area is correct.
> >
> > This can explain why we have not seen any issues so far. This does not
> > renders the code right, as how it is written atm is wrong.
>
>
> Sergey's results with KASAN show that there's a real-life problem though. I also don't understand how that might happen.
>
> Could it be that these results are with a specific topology where our assumptions are incorrect?
Is there anything I can do to help?
On 27/04/2022 09:55, Sergey Senozhatsky wrote:
> On (22/04/19 08:07), Pierre-Louis Bossart wrote:
>>> Your analyzes are spot on, unfortunately. But...
>>>
>>> As of today, the sof_get_control_data() is in the call path of
>>> (ipc3-topology.c):
>>>
>>> sof_widget_update_ipc_comp_process() -> sof_process_load() ->
>>> sof_get_control_data()
>>>
>>> sof_widget_update_ipc_comp_process() is the ipc_setup callback for
>>> snd_soc_dapm_effect. If I'm not mistaken these only carries bin payload
>>> and never MIXER/ENUM/SWITCH/VOLUME.
>>> This means that the sof_get_control_data() is only called with
>>> SND_SOC_TPLG_TYPE_BYTES and for that the allocated data area is correct.
>>>
>>> This can explain why we have not seen any issues so far. This does not
>>> renders the code right, as how it is written atm is wrong.
>>
>>
>> Sergey's results with KASAN show that there's a real-life problem though. I also don't understand how that might happen.
>>
>> Could it be that these results are with a specific topology where our assumptions are incorrect?
>
> Is there anything I can do to help?
I will send a patch shortly, I think it is going to be easy to backport
for you and test it.
--
Péter