hello Bjorn,
Sorry for these late remarks/questions
On 11/30/2017 02:16 AM, Bjorn Andersson wrote:
> remoteproc instances can be stopped either by invoking shutdown or by an
> attempt to recover from a crash. For some subdev types it's expected to
> clean up gracefully during a shutdown, but are unable to do so during a
> crash - so pass this information to the subdev remove functions.
>
> Signed-off-by: Bjorn Andersson <[email protected]>
> ---
>
> Changes since v3:
> - None
>
> Changes since v2:
> - None
>
> Changes since v1:
> - New patch
>
> drivers/remoteproc/qcom_common.c | 6 +++---
> drivers/remoteproc/remoteproc_core.c | 18 +++++++++---------
> include/linux/remoteproc.h | 4 ++--
> 3 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
> index d487040b528b..7a862d41503e 100644
> --- a/drivers/remoteproc/qcom_common.c
> +++ b/drivers/remoteproc/qcom_common.c
> @@ -60,7 +60,7 @@ static int glink_subdev_probe(struct rproc_subdev *subdev)
> return IS_ERR(glink->edge) ? PTR_ERR(glink->edge) : 0;
> }
>
> -static void glink_subdev_remove(struct rproc_subdev *subdev)
> +static void glink_subdev_remove(struct rproc_subdev *subdev, bool graceful)
> {
> struct qcom_rproc_glink *glink = to_glink_subdev(subdev);
>
> @@ -107,7 +107,7 @@ static int smd_subdev_probe(struct rproc_subdev *subdev)
> return PTR_ERR_OR_ZERO(smd->edge);
> }
>
> -static void smd_subdev_remove(struct rproc_subdev *subdev)
> +static void smd_subdev_remove(struct rproc_subdev *subdev, bool graceful)
> {
> struct qcom_rproc_subdev *smd = to_smd_subdev(subdev);
>
> @@ -176,7 +176,7 @@ static int ssr_notify_start(struct rproc_subdev *subdev)
> return 0;
> }
>
> -static void ssr_notify_stop(struct rproc_subdev *subdev)
> +static void ssr_notify_stop(struct rproc_subdev *subdev, bool graceful)
> {
> struct qcom_rproc_ssr *ssr = to_ssr_subdev(subdev);
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index eab14b414bf0..3146e965ca47 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -307,7 +307,7 @@ static int rproc_vdev_do_probe(struct rproc_subdev *subdev)
> return rproc_add_virtio_dev(rvdev, rvdev->id);
> }
>
> -static void rproc_vdev_do_remove(struct rproc_subdev *subdev)
> +static void rproc_vdev_do_remove(struct rproc_subdev *subdev, bool graceful)
> {
> struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
>
> @@ -785,17 +785,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
>
> unroll_registration:
> list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
> - subdev->remove(subdev);
> + subdev->remove(subdev, false);
Why do you need to do a non graceful remove in this case? This could
lead to side effect like memory leakage...
>
> return ret;
> }
>
> -static void rproc_remove_subdevices(struct rproc *rproc)
> +static void rproc_remove_subdevices(struct rproc *rproc, bool graceful)
> {
> struct rproc_subdev *subdev;
>
> list_for_each_entry_reverse(subdev, &rproc->subdevs, node)
> - subdev->remove(subdev);
> + subdev->remove(subdev, graceful);
> }
>
> /**
> @@ -1013,13 +1013,13 @@ static int rproc_trigger_auto_boot(struct rproc *rproc)
> return ret;
> }
>
> -static int rproc_stop(struct rproc *rproc)
> +static int rproc_stop(struct rproc *rproc, bool graceful)
> {
> struct device *dev = &rproc->dev;
> int ret;
>
> /* remove any subdevices for the remote processor */
> - rproc_remove_subdevices(rproc);
> + rproc_remove_subdevices(rproc, graceful);
>
> /* power off the remote processor */
> ret = rproc->ops->stop(rproc);
> @@ -1063,7 +1063,7 @@ int rproc_trigger_recovery(struct rproc *rproc)
> if (ret)
> return ret;
>
> - ret = rproc_stop(rproc);
> + ret = rproc_stop(rproc, false);
> if (ret)
> goto unlock_mutex;
>
> @@ -1216,7 +1216,7 @@ void rproc_shutdown(struct rproc *rproc)
> if (!atomic_dec_and_test(&rproc->power))
> goto out;
>
> - ret = rproc_stop(rproc);
> + ret = rproc_stop(rproc, true);
> if (ret) {
> atomic_inc(&rproc->power);
> goto out;
> @@ -1550,7 +1550,7 @@ EXPORT_SYMBOL(rproc_del);
> void rproc_add_subdev(struct rproc *rproc,
> struct rproc_subdev *subdev,
> int (*probe)(struct rproc_subdev *subdev),
> - void (*remove)(struct rproc_subdev *subdev))
> + void (*remove)(struct rproc_subdev *subdev, bool graceful))
> {
> subdev->probe = probe;
> subdev->remove = remove;
> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> index 44e630eb3d94..20a9467744ea 100644
> --- a/include/linux/remoteproc.h
> +++ b/include/linux/remoteproc.h
> @@ -456,7 +456,7 @@ struct rproc_subdev {
> struct list_head node;
>
> int (*probe)(struct rproc_subdev *subdev);
> - void (*remove)(struct rproc_subdev *subdev);
> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
What about adding a new ops instead of a parameter, like a recovery
callback?
Regard
Arnaud
> };
>
> /* we currently support only two vrings per rvdev */
> @@ -539,7 +539,7 @@ static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
> void rproc_add_subdev(struct rproc *rproc,
> struct rproc_subdev *subdev,
> int (*probe)(struct rproc_subdev *subdev),
> - void (*remove)(struct rproc_subdev *subdev));
> + void (*remove)(struct rproc_subdev *subdev, bool graceful));
>
> void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
>
>
On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
> hello Bjorn,
>
> Sorry for these late remarks/questions
>
No worries, I'm happy to see you reading the patch!
>
> On 11/30/2017 02:16 AM, Bjorn Andersson wrote:
[..]
> > diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
[..]
> > @@ -785,17 +785,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
> >
> > unroll_registration:
> > list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
> > - subdev->remove(subdev);
> > + subdev->remove(subdev, false);
> Why do you need to do a non graceful remove in this case? This could
> lead to side effect like memory leakage...
>
Regardless of this being true or false resources should always be
reclaimed.
The reason for introducing this is that the modem in the Qualcomm
platforms implements persistent storage and it's preferred to tell it to
flush the latest data to the storage server (on the Linux side) before
pulling the plug. But in the case of a firmware crash this mechanism
will not be operational and there's no point in attempting this
"graceful shutdown".
[..]
> > diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> > index 44e630eb3d94..20a9467744ea 100644
> > --- a/include/linux/remoteproc.h
> > +++ b/include/linux/remoteproc.h
> > @@ -456,7 +456,7 @@ struct rproc_subdev {
> > struct list_head node;
> >
> > int (*probe)(struct rproc_subdev *subdev);
> > - void (*remove)(struct rproc_subdev *subdev);
> > + void (*remove)(struct rproc_subdev *subdev, bool graceful);
> What about adding a new ops instead of a parameter, like a recovery
> callback?
>
I think that for symmetry purposes it should be probe/remove in both
code paths. A possible alternative to the proposal would be to introduce
an operation "request_shutdown()" the would be called in the proposed
graceful code path.
However, in the Qualcomm SMD and GLINK (conceptually equivalent to
virtio-rpmsg) it is possible to open and close communication channels
and it's conceivable to see that the graceful case would close all
channels cleanly while the non-graceful case would just remove the rpmsg
devices (and leave the channel states/memory as is).
In this case a "request_shutdown()" would complicate things, compared to
the boolean.
Regards,
Bjorn
On 12/05/2017 07:46 AM, Bjorn Andersson wrote:
> On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
>
>> hello Bjorn,
>>
>> Sorry for these late remarks/questions
>>
>
> No worries, I'm happy to see you reading the patch!
>
>>
>> On 11/30/2017 02:16 AM, Bjorn Andersson wrote:
> [..]
>>> diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
> [..]
>>> @@ -785,17 +785,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
>>>
>>> unroll_registration:
>>> list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
>>> - subdev->remove(subdev);
>>> + subdev->remove(subdev, false);
>> Why do you need to do a non graceful remove in this case? This could
>> lead to side effect like memory leakage...
>>
>
> Regardless of this being true or false resources should always be
> reclaimed.
>
> The reason for introducing this is that the modem in the Qualcomm
> platforms implements persistent storage and it's preferred to tell it to
> flush the latest data to the storage server (on the Linux side) before
> pulling the plug. But in the case of a firmware crash this mechanism
> will not be operational and there's no point in attempting this
> "graceful shutdown".
I understand your usecase for Qualcomm, but in rproc_probe_subdevices
there is not crash of the remote firmware , so remove should be graceful.
>
> [..]
>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>> index 44e630eb3d94..20a9467744ea 100644
>>> --- a/include/linux/remoteproc.h
>>> +++ b/include/linux/remoteproc.h
>>> @@ -456,7 +456,7 @@ struct rproc_subdev {
>>> struct list_head node;
>>>
>>> int (*probe)(struct rproc_subdev *subdev);
>>> - void (*remove)(struct rproc_subdev *subdev);
>>> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
>> What about adding a new ops instead of a parameter, like a recovery
>> callback?
>>
>
> I think that for symmetry purposes it should be probe/remove in both
> code paths. A possible alternative to the proposal would be to introduce
> an operation "request_shutdown()" the would be called in the proposed
> graceful code path.
>
>
> However, in the Qualcomm SMD and GLINK (conceptually equivalent to
> virtio-rpmsg) it is possible to open and close communication channels
> and it's conceivable to see that the graceful case would close all
> channels cleanly while the non-graceful case would just remove the rpmsg
> devices (and leave the channel states/memory as is).
>
> In this case a "request_shutdown()" would complicate things, compared to
> the boolean.
>
I would be more for a specific ops that inform sub-dev on a crash. This
would allow sub-dev to perform specific action (for instance dump) and
store crash information, then on remove, sub_dev would perform specific
action.
This could be something like "trigger_recovery" that is propagated to
the sub-dev.
That would sound more flexible from my point of view.
Regards
Arnaud
> Regards,
> Bjorn
>
On Tue 05 Dec 02:54 PST 2017, Arnaud Pouliquen wrote:
>
>
> On 12/05/2017 07:46 AM, Bjorn Andersson wrote:
> > On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
> >
> >> hello Bjorn,
> >>
> >> Sorry for these late remarks/questions
> >>
> >
> > No worries, I'm happy to see you reading the patch!
> >
> >>
> >> On 11/30/2017 02:16 AM, Bjorn Andersson wrote:
> > [..]
> >>> diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
> > [..]
> >>> @@ -785,17 +785,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
> >>>
> >>> unroll_registration:
> >>> list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
> >>> - subdev->remove(subdev);
> >>> + subdev->remove(subdev, false);
> >> Why do you need to do a non graceful remove in this case? This could
> >> lead to side effect like memory leakage...
> >>
> >
> > Regardless of this being true or false resources should always be
> > reclaimed.
> >
> > The reason for introducing this is that the modem in the Qualcomm
> > platforms implements persistent storage and it's preferred to tell it to
> > flush the latest data to the storage server (on the Linux side) before
> > pulling the plug. But in the case of a firmware crash this mechanism
> > will not be operational and there's no point in attempting this
> > "graceful shutdown".
> I understand your usecase for Qualcomm, but in rproc_probe_subdevices
> there is not crash of the remote firmware , so remove should be graceful.
>
Now I get your point, sorry. I agree with you, as this is triggering a
clean stop of the system this should be marked "graceful".
Will update, thanks.
> >
> > [..]
> >>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> >>> index 44e630eb3d94..20a9467744ea 100644
> >>> --- a/include/linux/remoteproc.h
> >>> +++ b/include/linux/remoteproc.h
> >>> @@ -456,7 +456,7 @@ struct rproc_subdev {
> >>> struct list_head node;
> >>>
> >>> int (*probe)(struct rproc_subdev *subdev);
> >>> - void (*remove)(struct rproc_subdev *subdev);
> >>> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
> >> What about adding a new ops instead of a parameter, like a recovery
> >> callback?
> >>
> >
> > I think that for symmetry purposes it should be probe/remove in both
> > code paths. A possible alternative to the proposal would be to introduce
> > an operation "request_shutdown()" the would be called in the proposed
> > graceful code path.
> >
> >
> > However, in the Qualcomm SMD and GLINK (conceptually equivalent to
> > virtio-rpmsg) it is possible to open and close communication channels
> > and it's conceivable to see that the graceful case would close all
> > channels cleanly while the non-graceful case would just remove the rpmsg
> > devices (and leave the channel states/memory as is).
> >
> > In this case a "request_shutdown()" would complicate things, compared to
> > the boolean.
> >
> I would be more for a specific ops that inform sub-dev on a crash. This
> would allow sub-dev to perform specific action (for instance dump) and
> store crash information, then on remove, sub_dev would perform specific
> action.
There is a separate discussion (although dormant) on how to gather core
dumps, which should cover these cases.
> This could be something like "trigger_recovery" that is propagated to
> the sub-dev.
>
Right, this step does make sense, but is the opposite of what I need -
i.e. a means to trigger a clean shutdown.
> That would sound more flexible from my point of view.
>
At this point I see this flexibility as unnecessary complexity, if such
need show up (beyond the core dump gathering) we should bring this up
again.
Regards,
Bjorn
Hello,
I saw your new version but i 'm answering to this one to continue
discussion.
On 12/05/2017 06:17 PM, Bjorn Andersson wrote:
> On Tue 05 Dec 02:54 PST 2017, Arnaud Pouliquen wrote:
>
>>
>>
>> On 12/05/2017 07:46 AM, Bjorn Andersson wrote:
>>> On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
>>>
>>>> hello Bjorn,
>>>>
>>>> Sorry for these late remarks/questions
>>>>
>>>
>>> No worries, I'm happy to see you reading the patch!
>>>
>>>>
>>>> On 11/30/2017 02:16 AM, Bjorn Andersson wrote:
>>> [..]
>>>>> diff --git a/drivers/remoteproc/qcom_common.c b/drivers/remoteproc/qcom_common.c
>>> [..]
>>>>> @@ -785,17 +785,17 @@ static int rproc_probe_subdevices(struct rproc *rproc)
>>>>>
>>>>> unroll_registration:
>>>>> list_for_each_entry_continue_reverse(subdev, &rproc->subdevs, node)
>>>>> - subdev->remove(subdev);
>>>>> + subdev->remove(subdev, false);
>>>> Why do you need to do a non graceful remove in this case? This could
>>>> lead to side effect like memory leakage...
>>>>
>>>
>>> Regardless of this being true or false resources should always be
>>> reclaimed.
>>>
>>> The reason for introducing this is that the modem in the Qualcomm
>>> platforms implements persistent storage and it's preferred to tell it to
>>> flush the latest data to the storage server (on the Linux side) before
>>> pulling the plug. But in the case of a firmware crash this mechanism
>>> will not be operational and there's no point in attempting this
>>> "graceful shutdown".
>> I understand your usecase for Qualcomm, but in rproc_probe_subdevices
>> there is not crash of the remote firmware , so remove should be graceful.
>>
>
> Now I get your point, sorry. I agree with you, as this is triggering a
> clean stop of the system this should be marked "graceful".
>
> Will update, thanks.
>
>>>
>>> [..]
>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>> index 44e630eb3d94..20a9467744ea 100644
>>>>> --- a/include/linux/remoteproc.h
>>>>> +++ b/include/linux/remoteproc.h
>>>>> @@ -456,7 +456,7 @@ struct rproc_subdev {
>>>>> struct list_head node;
>>>>>
>>>>> int (*probe)(struct rproc_subdev *subdev);
>>>>> - void (*remove)(struct rproc_subdev *subdev);
>>>>> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
>>>> What about adding a new ops instead of a parameter, like a recovery
>>>> callback?
>>>>
>>>
>>> I think that for symmetry purposes it should be probe/remove in both
>>> code paths. A possible alternative to the proposal would be to introduce
>>> an operation "request_shutdown()" the would be called in the proposed
>>> graceful code path.
>>>
>>>
>>> However, in the Qualcomm SMD and GLINK (conceptually equivalent to
>>> virtio-rpmsg) it is possible to open and close communication channels
>>> and it's conceivable to see that the graceful case would close all
>>> channels cleanly while the non-graceful case would just remove the rpmsg
>>> devices (and leave the channel states/memory as is).
>>>
>>> In this case a "request_shutdown()" would complicate things, compared to
>>> the boolean.
>>>
>> I would be more for a specific ops that inform sub-dev on a crash. This
>> would allow sub-dev to perform specific action (for instance dump) and
>> store crash information, then on remove, sub_dev would perform specific
>> action.
>
> There is a separate discussion (although dormant) on how to gather core
> dumps, which should cover these cases.
>
>> This could be something like "trigger_recovery" that is propagated to
>> the sub-dev.
>>
>
> Right, this step does make sense, but is the opposite of what I need -
> i.e. a means to trigger a clean shutdown.
Could you clarify this point? i do not see my proposal as the opposite.
In your proposal:
- rproc_trigger_recovery: graceful is set to false
- rproc_shutdown: Graceful is set to true
My proposal is to call an new ops (if defined) before the stop in
rproc_trigger_recovery. If you set a local variable in Qualcomm subdev
drivers this should do the job for your need.
I tried to have a look in Qualcomm part to understand implementation.
but seems that you just add the parameter for time being.
I think that main point that bother me here, is the that the "graceful"
mode should be the normal mode. And in your implementation this look
like the exception mode. Perhaps more a feeling than anything else...but
if you decide to keep argument i would propose to inverse logic using
something like "rproc_crashed" instead of "graceful".
>
>> That would sound more flexible from my point of view.
>>
>
> At this point I see this flexibility as unnecessary complexity, if such
> need show up (beyond the core dump gathering) we should bring this up
> again.
I let you decide what is the best solution.
My concerns is to be sure that your solution is enough generic and not
too Qualcomm platform oriented. As you mentioned in OpenAMP meeting it
is quite difficult to come back on an implementation , especially if it
impacts the API.
Regards
Arnaud
>
> Regards,
> Bjorn
>
On Wed 06 Dec 00:55 PST 2017, Arnaud Pouliquen wrote:
> Hello,
>
> I saw your new version but i 'm answering to this one to continue
> discussion.
>
That's fine.
> On 12/05/2017 06:17 PM, Bjorn Andersson wrote:
> > On Tue 05 Dec 02:54 PST 2017, Arnaud Pouliquen wrote:
> >> On 12/05/2017 07:46 AM, Bjorn Andersson wrote:
> >>> On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
> >>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
> >>>>> index 44e630eb3d94..20a9467744ea 100644
> >>>>> --- a/include/linux/remoteproc.h
> >>>>> +++ b/include/linux/remoteproc.h
> >>>>> @@ -456,7 +456,7 @@ struct rproc_subdev {
> >>>>> struct list_head node;
> >>>>>
> >>>>> int (*probe)(struct rproc_subdev *subdev);
> >>>>> - void (*remove)(struct rproc_subdev *subdev);
> >>>>> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
> >>>> What about adding a new ops instead of a parameter, like a recovery
> >>>> callback?
> >>>>
> >>>
> >>> I think that for symmetry purposes it should be probe/remove in both
> >>> code paths. A possible alternative to the proposal would be to introduce
> >>> an operation "request_shutdown()" the would be called in the proposed
> >>> graceful code path.
> >>>
> >>>
> >>> However, in the Qualcomm SMD and GLINK (conceptually equivalent to
> >>> virtio-rpmsg) it is possible to open and close communication channels
> >>> and it's conceivable to see that the graceful case would close all
> >>> channels cleanly while the non-graceful case would just remove the rpmsg
> >>> devices (and leave the channel states/memory as is).
> >>>
> >>> In this case a "request_shutdown()" would complicate things, compared to
> >>> the boolean.
> >>>
> >> I would be more for a specific ops that inform sub-dev on a crash. This
> >> would allow sub-dev to perform specific action (for instance dump) and
> >> store crash information, then on remove, sub_dev would perform specific
> >> action.
> >
> > There is a separate discussion (although dormant) on how to gather core
> > dumps, which should cover these cases.
> >
> >> This could be something like "trigger_recovery" that is propagated to
> >> the sub-dev.
> >>
> >
> > Right, this step does make sense, but is the opposite of what I need -
> > i.e. a means to trigger a clean shutdown.
> Could you clarify this point? i do not see my proposal as the opposite.
> In your proposal:
> - rproc_trigger_recovery: graceful is set to false
> - rproc_shutdown: Graceful is set to true
>
Correct
> My proposal is to call an new ops (if defined) before the stop in
> rproc_trigger_recovery. If you set a local variable in Qualcomm subdev
> drivers this should do the job for your need.
>
In all use cases that comes to mind the gracefulness makes one step of
the teardown operation kick in/be optional, it's not a separate
operation. I don't see the benefit of enforcing the subdev to keep this
state.
> I tried to have a look in Qualcomm part to understand implementation.
> but seems that you just add the parameter for time being.
>
The following patch, adding sysmon, make use of this. But I have yet to
post patches that affects the SMD and GLINK implementations.
> I think that main point that bother me here, is the that the "graceful"
> mode should be the normal mode. And in your implementation this look
> like the exception mode.
I agree, I consider the recovery path to be the exception, but I'm not
seeing the necessity of making the "true" state of this parameter mean
"yes we have an exception".
Regardless of the value here, the remove() function's purpose is to
clean up resources/state. But in the case of a graceful shutdown (i.e.
not recovery path) the subdevices can be expected to tear things down in
a fashion that permits the remote side to act, so if anything this
"true" would imply that this extra steps should be taken.
> Perhaps more a feeling than anything else...but if you decide to keep
> argument i would propose to inverse logic using something like
> "rproc_crashed" instead of "graceful".
>
The difference between "this is a graceful shutdown, let's inform the
remote" and "this is not a crash, let's inform the remote". The prior
sounds much more to the point in my view.
> >
> >> That would sound more flexible from my point of view.
> >>
> >
> > At this point I see this flexibility as unnecessary complexity, if such
> > need show up (beyond the core dump gathering) we should bring this up
> > again.
>
> I let you decide what is the best solution.
> My concerns is to be sure that your solution is enough generic and not
> too Qualcomm platform oriented.
That's a fair concern. I'm very interested in finding more complex use
cases that requires this type of logic, to see if this is generic
enough.
Note that the discussions that we've had related to e.g. clocks that
should be on during the life of the remote would not fit into the
current subdev life cycle anyways, so this either needs to be
complemented or extended.
> As you mentioned in OpenAMP meeting it is quite difficult to come back
> on an implementation , especially if it impacts the API.
>
No, what I said is that it's impossible to go back once we have changed
the file format, the interface towards the remote or the interface
towards user space. All it takes to change an interface within the
kernel is a single patch.
Regards,
Bjorn
On 12/06/2017 10:53 PM, Bjorn Andersson wrote:
> On Wed 06 Dec 00:55 PST 2017, Arnaud Pouliquen wrote:
>
>> Hello,
>>
>> I saw your new version but i 'm answering to this one to continue
>> discussion.
>>
>
> That's fine.
>
>> On 12/05/2017 06:17 PM, Bjorn Andersson wrote:
>>> On Tue 05 Dec 02:54 PST 2017, Arnaud Pouliquen wrote:
>>>> On 12/05/2017 07:46 AM, Bjorn Andersson wrote:
>>>>> On Fri 01 Dec 06:50 PST 2017, Arnaud Pouliquen wrote:
>>>>>>> diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
>>>>>>> index 44e630eb3d94..20a9467744ea 100644
>>>>>>> --- a/include/linux/remoteproc.h
>>>>>>> +++ b/include/linux/remoteproc.h
>>>>>>> @@ -456,7 +456,7 @@ struct rproc_subdev {
>>>>>>> struct list_head node;
>>>>>>>
>>>>>>> int (*probe)(struct rproc_subdev *subdev);
>>>>>>> - void (*remove)(struct rproc_subdev *subdev);
>>>>>>> + void (*remove)(struct rproc_subdev *subdev, bool graceful);
>>>>>> What about adding a new ops instead of a parameter, like a recovery
>>>>>> callback?
>>>>>>
>>>>>
>>>>> I think that for symmetry purposes it should be probe/remove in both
>>>>> code paths. A possible alternative to the proposal would be to introduce
>>>>> an operation "request_shutdown()" the would be called in the proposed
>>>>> graceful code path.
>>>>>
>>>>>
>>>>> However, in the Qualcomm SMD and GLINK (conceptually equivalent to
>>>>> virtio-rpmsg) it is possible to open and close communication channels
>>>>> and it's conceivable to see that the graceful case would close all
>>>>> channels cleanly while the non-graceful case would just remove the rpmsg
>>>>> devices (and leave the channel states/memory as is).
>>>>>
>>>>> In this case a "request_shutdown()" would complicate things, compared to
>>>>> the boolean.
>>>>>
>>>> I would be more for a specific ops that inform sub-dev on a crash. This
>>>> would allow sub-dev to perform specific action (for instance dump) and
>>>> store crash information, then on remove, sub_dev would perform specific
>>>> action.
>>>
>>> There is a separate discussion (although dormant) on how to gather core
>>> dumps, which should cover these cases.
>>>
>>>> This could be something like "trigger_recovery" that is propagated to
>>>> the sub-dev.
>>>>
>>>
>>> Right, this step does make sense, but is the opposite of what I need -
>>> i.e. a means to trigger a clean shutdown.
>> Could you clarify this point? i do not see my proposal as the opposite.
>> In your proposal:
>> - rproc_trigger_recovery: graceful is set to false
>> - rproc_shutdown: Graceful is set to true
>>
>
> Correct
>
>> My proposal is to call an new ops (if defined) before the stop in
>> rproc_trigger_recovery. If you set a local variable in Qualcomm subdev
>> drivers this should do the job for your need.
>>
>
> In all use cases that comes to mind the gracefulness makes one step of
> the teardown operation kick in/be optional, it's not a separate
> operation. I don't see the benefit of enforcing the subdev to keep this
> state.
>
>> I tried to have a look in Qualcomm part to understand implementation.
>> but seems that you just add the parameter for time being.
>>
>
> The following patch, adding sysmon, make use of this. But I have yet to
> post patches that affects the SMD and GLINK implementations.
>
>> I think that main point that bother me here, is the that the "graceful"
>> mode should be the normal mode. And in your implementation this look
>> like the exception mode.
>
> I agree, I consider the recovery path to be the exception, but I'm not
> seeing the necessity of making the "true" state of this parameter mean
> "yes we have an exception".
>
> Regardless of the value here, the remove() function's purpose is to
> clean up resources/state. But in the case of a graceful shutdown (i.e.
> not recovery path) the subdevices can be expected to tear things down in
> a fashion that permits the remote side to act, so if anything this
> "true" would imply that this extra steps should be taken.
>
>> Perhaps more a feeling than anything else...but if you decide to keep
>> argument i would propose to inverse logic using something like
>> "rproc_crashed" instead of "graceful".
>>
>
> The difference between "this is a graceful shutdown, let's inform the
> remote" and "this is not a crash, let's inform the remote". The prior
> sounds much more to the point in my view.
On the other hand, i consider "graceful" as a normal mode that is
implemented in kernel. By informing sub-dev that "this is a graceful
shutdown", you just precise a behavior that is already implemented by
default.
And in case of recovery, you inform sub-dev that "this is not a graceful
shutdown". What does it means "this is not a graceful shutdown"?...This
is confusing, from my point of view.
for this reason, I still don't like the "graceful" wording too much, but
this is a personal feeling, not a strong argument. :)
So, if you are not convince by my last argument and nobody else
comments, consider it as OK for me.
>
>>>
>>>> That would sound more flexible from my point of view.
>>>>
>>>
>>> At this point I see this flexibility as unnecessary complexity, if such
>>> need show up (beyond the core dump gathering) we should bring this up
>>> again.
>>
>> I let you decide what is the best solution.
>> My concerns is to be sure that your solution is enough generic and not
>> too Qualcomm platform oriented.
>
> That's a fair concern. I'm very interested in finding more complex use
> cases that requires this type of logic, to see if this is generic
> enough.
>
> Note that the discussions that we've had related to e.g. clocks that
> should be on during the life of the remote would not fit into the
> current subdev life cycle anyways, so this either needs to be
> complemented or extended.
>
Today the only other use case, i would have in mind (except dump) is the
management of a "hot" restart on a crash... but no concrete example.
>> As you mentioned in OpenAMP meeting it is quite difficult to come back
>> on an implementation , especially if it impacts the API.
>>
>
> No, what I said is that it's impossible to go back once we have changed
> the file format, the interface towards the remote or the interface
> towards user space. All it takes to change an interface within the
> kernel is a single patch.
>
Thanks for this clarification.
Regards,
Arnaud
> Regards,
> Bjorn
>