When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
To avoid sleep in interrupt context, we need to call local_irq_enable()
before schedule().
The problem call stack is as follows:
bug1:
do_write_oneword_retry()
xip_disable()
local_irq_disable()
do_write_oneword_once()
schedule()
bug2:
do_write_buffer()
xip_disable()
local_irq_disable()
do_write_buffer_wait()
schedule()
bug3:
do_erase_chip()
xip_disable()
local_irq_disable()
schedule()
bug4:
do_erase_oneblock()
xip_disable()
local_irq_disable()
schedule()
Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
Cc: [email protected] # v2.6.13
Signed-off-by: Xiaoming Ni <[email protected]>
---
drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index a1f3e1031c3d..12c3776f093a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&chip->wq, &wait);
mutex_unlock(&chip->mutex);
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_enable();
schedule();
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_disable();
remove_wait_queue(&chip->wq, &wait);
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(&chip->mutex);
@@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&chip->wq, &wait);
mutex_unlock(&chip->mutex);
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_enable();
schedule();
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_disable();
remove_wait_queue(&chip->wq, &wait);
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(&chip->mutex);
@@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&chip->wq, &wait);
mutex_unlock(&chip->mutex);
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_enable();
schedule();
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_disable();
remove_wait_queue(&chip->wq, &wait);
mutex_lock(&chip->mutex);
continue;
@@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(&chip->wq, &wait);
mutex_unlock(&chip->mutex);
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_enable();
schedule();
+ if (IS_ENABLED(CONFIG_MTD_XIP))
+ local_irq_disable();
remove_wait_queue(&chip->wq, &wait);
mutex_lock(&chip->mutex);
continue;
--
2.27.0
ping
On 2020/11/27 21:07, Xiaoming Ni wrote:
> When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
> To avoid sleep in interrupt context, we need to call local_irq_enable()
> before schedule().
>
> The problem call stack is as follows:
> bug1:
> do_write_oneword_retry()
> xip_disable()
> local_irq_disable()
> do_write_oneword_once()
> schedule()
> bug2:
> do_write_buffer()
> xip_disable()
> local_irq_disable()
> do_write_buffer_wait()
> schedule()
> bug3:
> do_erase_chip()
> xip_disable()
> local_irq_disable()
> schedule()
> bug4:
> do_erase_oneblock()
> xip_disable()
> local_irq_disable()
> schedule()
>
> Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
> Cc: [email protected] # v2.6.13
> Signed-off-by: Xiaoming Ni <[email protected]>
> ---
> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
> 1 file changed, 16 insertions(+)
>
> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> index a1f3e1031c3d..12c3776f093a 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
> set_current_state(TASK_UNINTERRUPTIBLE);
> add_wait_queue(&chip->wq, &wait);
> mutex_unlock(&chip->mutex);
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_enable();
> schedule();
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_disable();
> remove_wait_queue(&chip->wq, &wait);
> timeo = jiffies + (HZ / 2); /* FIXME */
> mutex_lock(&chip->mutex);
> @@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
> set_current_state(TASK_UNINTERRUPTIBLE);
> add_wait_queue(&chip->wq, &wait);
> mutex_unlock(&chip->mutex);
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_enable();
> schedule();
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_disable();
> remove_wait_queue(&chip->wq, &wait);
> timeo = jiffies + (HZ / 2); /* FIXME */
> mutex_lock(&chip->mutex);
> @@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
> set_current_state(TASK_UNINTERRUPTIBLE);
> add_wait_queue(&chip->wq, &wait);
> mutex_unlock(&chip->mutex);
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_enable();
> schedule();
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_disable();
> remove_wait_queue(&chip->wq, &wait);
> mutex_lock(&chip->mutex);
> continue;
> @@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
> set_current_state(TASK_UNINTERRUPTIBLE);
> add_wait_queue(&chip->wq, &wait);
> mutex_unlock(&chip->mutex);
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_enable();
> schedule();
> + if (IS_ENABLED(CONFIG_MTD_XIP))
> + local_irq_disable();
> remove_wait_queue(&chip->wq, &wait);
> mutex_lock(&chip->mutex);
> continue;
>
Hi Xiaoming,
Xiaoming Ni <[email protected]> wrote on Mon, 7 Dec 2020 18:48:33
+0800:
> ping
>
> On 2020/11/27 21:07, Xiaoming Ni wrote:
> > When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
> > To avoid sleep in interrupt context, we need to call local_irq_enable()
> > before schedule().
> >
> > The problem call stack is as follows:
> > bug1:
> > do_write_oneword_retry()
> > xip_disable()
> > local_irq_disable()
> > do_write_oneword_once()
> > schedule()
> > bug2:
> > do_write_buffer()
> > xip_disable()
> > local_irq_disable()
> > do_write_buffer_wait()
> > schedule()
> > bug3:
> > do_erase_chip()
> > xip_disable()
> > local_irq_disable()
> > schedule()
> > bug4:
> > do_erase_oneblock()
> > xip_disable()
> > local_irq_disable()
> > schedule()
> >
> > Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
> > Cc: [email protected] # v2.6.13
> > Signed-off-by: Xiaoming Ni <[email protected]>
> > ---
> > drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
> > 1 file changed, 16 insertions(+)
> >
> > diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
> > index a1f3e1031c3d..12c3776f093a 100644
> > --- a/drivers/mtd/chips/cfi_cmdset_0002.c
> > +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
> > @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
> > set_current_state(TASK_UNINTERRUPTIBLE);
> > add_wait_queue(&chip->wq, &wait);
> > mutex_unlock(&chip->mutex);
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_enable();
> > schedule();
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_disable();
The fix really seems strange to me. I will let Vignesh decide but I
think we should consider updating/fixing xip_disable instead.
> > remove_wait_queue(&chip->wq, &wait);
> > timeo = jiffies + (HZ / 2); /* FIXME */
> > mutex_lock(&chip->mutex);
> > @@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
> > set_current_state(TASK_UNINTERRUPTIBLE);
> > add_wait_queue(&chip->wq, &wait);
> > mutex_unlock(&chip->mutex);
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_enable();
> > schedule();
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_disable();
> > remove_wait_queue(&chip->wq, &wait);
> > timeo = jiffies + (HZ / 2); /* FIXME */
> > mutex_lock(&chip->mutex);
> > @@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
> > set_current_state(TASK_UNINTERRUPTIBLE);
> > add_wait_queue(&chip->wq, &wait);
> > mutex_unlock(&chip->mutex);
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_enable();
> > schedule();
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_disable();
> > remove_wait_queue(&chip->wq, &wait);
> > mutex_lock(&chip->mutex);
> > continue;
> > @@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
> > set_current_state(TASK_UNINTERRUPTIBLE);
> > add_wait_queue(&chip->wq, &wait);
> > mutex_unlock(&chip->mutex);
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_enable();
> > schedule();
> > + if (IS_ENABLED(CONFIG_MTD_XIP))
> > + local_irq_disable();
> > remove_wait_queue(&chip->wq, &wait);
> > mutex_lock(&chip->mutex);
> > continue;
> >
>
Thanks,
Miquèl
Hi Xiaoming,
On 12/7/20 4:23 PM, Miquel Raynal wrote:
> Hi Xiaoming,
>
> Xiaoming Ni <[email protected]> wrote on Mon, 7 Dec 2020 18:48:33
> +0800:
>
>> ping
>>
>> On 2020/11/27 21:07, Xiaoming Ni wrote:
>>> When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
>>> To avoid sleep in interrupt context, we need to call local_irq_enable()
>>> before schedule().
>>>
>>> The problem call stack is as follows:
>>> bug1:
>>> do_write_oneword_retry()
>>> xip_disable()
>>> local_irq_disable()
>>> do_write_oneword_once()
>>> schedule()
>>> bug2:
>>> do_write_buffer()
>>> xip_disable()
>>> local_irq_disable()
>>> do_write_buffer_wait()
>>> schedule()
>>> bug3:
>>> do_erase_chip()
>>> xip_disable()
>>> local_irq_disable()
>>> schedule()
>>> bug4:
>>> do_erase_oneblock()
>>> xip_disable()
>>> local_irq_disable()
>>> schedule()
>>>
>>> Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
>>> Cc: [email protected] # v2.6.13
>>> Signed-off-by: Xiaoming Ni <[email protected]>
>>> ---
>>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
>>> 1 file changed, 16 insertions(+)
>>>
>>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
>>> index a1f3e1031c3d..12c3776f093a 100644
>>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
>>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
>>> @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
>>> set_current_state(TASK_UNINTERRUPTIBLE);
>>> add_wait_queue(&chip->wq, &wait);
>>> mutex_unlock(&chip->mutex);
>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>> + local_irq_enable();
>>> schedule();
>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>> + local_irq_disable();
>
> The fix really seems strange to me. I will let Vignesh decide but I
> think we should consider updating/fixing xip_disable instead.
Agree with Miquel. Have you done any testing
or is this purely based on code inspection?
What about comment before xip_disable() function:
/*
* No interrupt what so ever can be serviced while the flash isn't in array
* mode. This is ensured by the xip_disable() and xip_enable() functions
* enclosing any code path where the flash is known not to be in array mode.
* And within a XIP disabled code path, only functions marked with __xipram
* may be called and nothing else (it's a good thing to inspect generated
* assembly to make sure inline functions were actually inlined and that gcc
* didn't emit calls to its own support functions). Also configuring MTD CFI
* support to a single buswidth and a single interleave is also recommended.
*/
So, I don't think the fix is as simple as this patch.
Regards
Vignesh
On 2020/12/8 2:59, Vignesh Raghavendra wrote:
> Hi Xiaoming,
>
> On 12/7/20 4:23 PM, Miquel Raynal wrote:
>> Hi Xiaoming,
>>
>> Xiaoming Ni <[email protected]> wrote on Mon, 7 Dec 2020 18:48:33
>> +0800:
>>
>>> ping
>>>
>>> On 2020/11/27 21:07, Xiaoming Ni wrote:
>>>> When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
>>>> To avoid sleep in interrupt context, we need to call local_irq_enable()
>>>> before schedule().
>>>>
>>>> The problem call stack is as follows:
>>>> bug1:
>>>> do_write_oneword_retry()
>>>> xip_disable()
>>>> local_irq_disable()
>>>> do_write_oneword_once()
>>>> schedule()
>>>> bug2:
>>>> do_write_buffer()
>>>> xip_disable()
>>>> local_irq_disable()
>>>> do_write_buffer_wait()
>>>> schedule()
>>>> bug3:
>>>> do_erase_chip()
>>>> xip_disable()
>>>> local_irq_disable()
>>>> schedule()
>>>> bug4:
>>>> do_erase_oneblock()
>>>> xip_disable()
>>>> local_irq_disable()
>>>> schedule()
>>>>
>>>> Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
>>>> Cc: [email protected] # v2.6.13
>>>> Signed-off-by: Xiaoming Ni <[email protected]>
>>>> ---
>>>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
>>>> 1 file changed, 16 insertions(+)
>>>>
>>>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
>>>> index a1f3e1031c3d..12c3776f093a 100644
>>>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
>>>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
>>>> @@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
>>>> set_current_state(TASK_UNINTERRUPTIBLE);
>>>> add_wait_queue(&chip->wq, &wait);
>>>> mutex_unlock(&chip->mutex);
>>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>>> + local_irq_enable();
>>>> schedule();
>>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>>> + local_irq_disable();
>>
>> The fix really seems strange to me. I will let Vignesh decide but I
>> think we should consider updating/fixing xip_disable instead.
>
> Agree with Miquel. Have you done any testing
> or is this purely based on code inspection?
>
I don't have the corresponding device test environment.
I found the problem through code review.
> What about comment before xip_disable() function:
>
> /*
> * No interrupt what so ever can be serviced while the flash isn't in array
> * mode. This is ensured by the xip_disable() and xip_enable() functions
> * enclosing any code path where the flash is known not to be in array mode.
> * And within a XIP disabled code path, only functions marked with __xipram
> * may be called and nothing else (it's a good thing to inspect generated
> * assembly to make sure inline functions were actually inlined and that gcc
> * didn't emit calls to its own support functions). Also configuring MTD CFI
> * support to a single buswidth and a single interleave is also recommended.
> */
>
> So, I don't think the fix is as simple as this patch.
>
+xip_enable();
schedule();
+xip_disable();
Do I need to change it to this?
> Regards
> Vignesh
> .
>
Thanks
Xiaoming Ni
On 12/8/20 6:53 AM, Xiaoming Ni wrote:
> On 2020/12/8 2:59, Vignesh Raghavendra wrote:
>> Hi Xiaoming,
>>
[...]
>>>>> ---
>>>>> drivers/mtd/chips/cfi_cmdset_0002.c | 16 ++++++++++++++++
>>>>> 1 file changed, 16 insertions(+)
>>>>>
>>>>> diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c
>>>>> b/drivers/mtd/chips/cfi_cmdset_0002.c
>>>>> index a1f3e1031c3d..12c3776f093a 100644
>>>>> --- a/drivers/mtd/chips/cfi_cmdset_0002.c
>>>>> +++ b/drivers/mtd/chips/cfi_cmdset_0002.c
>>>>> @@ -1682,7 +1682,11 @@ static int __xipram
>>>>> do_write_oneword_once(struct map_info *map,
>>>>> set_current_state(TASK_UNINTERRUPTIBLE);
>>>>> add_wait_queue(&chip->wq, &wait);
>>>>> mutex_unlock(&chip->mutex);
>>>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>>>> + local_irq_enable();
>>>>> schedule();
>>>>> + if (IS_ENABLED(CONFIG_MTD_XIP))
>>>>> + local_irq_disable();
>>>
>>> The fix really seems strange to me. I will let Vignesh decide but I
>>> think we should consider updating/fixing xip_disable instead.
>>
>> Agree with Miquel. Have you done any testing
>> or is this purely based on code inspection?
>>
> I don't have the corresponding device test environment.
> I found the problem through code review.
>
Sorry, I am not comfortable applying this patch without proper testing
that given the unknowns and legacy nature of the code.
>
>> What about comment before xip_disable() function:
>>
>> /*
>> * No interrupt what so ever can be serviced while the flash isn't in
>> array
>> * mode. This is ensured by the xip_disable() and xip_enable()
>> functions
>> * enclosing any code path where the flash is known not to be in
>> array mode.
>> * And within a XIP disabled code path, only functions marked with
>> __xipram
>> * may be called and nothing else (it's a good thing to inspect
>> generated
>> * assembly to make sure inline functions were actually inlined and
>> that gcc
>> * didn't emit calls to its own support functions). Also configuring
>> MTD CFI
>> * support to a single buswidth and a single interleave is also
>> recommended.
>> */
>>
>> So, I don't think the fix is as simple as this patch.
>>
> +xip_enable();
> schedule();
> +xip_disable();
>
> Do I need to change it to this?
>
This just narrows the window, but an IRQ is still possible just after
xip_enable() but before schedule().
Regards
Vignesh