To enable support for performance scaling (DVFS) for generic devices with
the SCMI performance protocol, let's add an SCMI performance domain. This
is being modelled as a genpd provider, with support for performance scaling
through genpd's ->set_performance_state() callback.
Note that, this adds the initial support that allows consumer drivers for
attached devices, to vote for a new performance state via calling the
dev_pm_genpd_set_performance_state(). However, this should be avoided as
it's in most cases preferred to use the OPP library to vote for a new OPP
instead. The support using the OPP library isn't part of this change, but
needs to be implemented from subsequent changes.
Signed-off-by: Ulf Hansson <[email protected]>
---
Changes in v2:
- Converted to use the new ->domain_info_get() callback.
---
drivers/firmware/arm_scmi/Kconfig | 12 ++
drivers/firmware/arm_scmi/Makefile | 1 +
drivers/firmware/arm_scmi/scmi_perf_domain.c | 155 +++++++++++++++++++
3 files changed, 168 insertions(+)
create mode 100644 drivers/firmware/arm_scmi/scmi_perf_domain.c
diff --git a/drivers/firmware/arm_scmi/Kconfig b/drivers/firmware/arm_scmi/Kconfig
index ea0f5083ac47..706d1264d038 100644
--- a/drivers/firmware/arm_scmi/Kconfig
+++ b/drivers/firmware/arm_scmi/Kconfig
@@ -181,6 +181,18 @@ config ARM_SCMI_POWER_DOMAIN
will be called scmi_pm_domain. Note this may needed early in boot
before rootfs may be available.
+config ARM_SCMI_PERF_DOMAIN
+ tristate "SCMI performance domain driver"
+ depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
+ default y
+ select PM_GENERIC_DOMAINS if PM
+ help
+ This enables support for the SCMI performance domains which can be
+ enabled or disabled via the SCP firmware.
+
+ This driver can also be built as a module. If so, the module will be
+ called scmi_perf_domain.
+
config ARM_SCMI_POWER_CONTROL
tristate "SCMI system power control driver"
depends on ARM_SCMI_PROTOCOL || (COMPILE_TEST && OF)
diff --git a/drivers/firmware/arm_scmi/Makefile b/drivers/firmware/arm_scmi/Makefile
index b31d78fa66cc..afee66a65dcb 100644
--- a/drivers/firmware/arm_scmi/Makefile
+++ b/drivers/firmware/arm_scmi/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SCMI_PROTOCOL) += scmi-core.o
obj-$(CONFIG_ARM_SCMI_PROTOCOL) += scmi-module.o
obj-$(CONFIG_ARM_SCMI_POWER_DOMAIN) += scmi_pm_domain.o
+obj-$(CONFIG_ARM_SCMI_PERF_DOMAIN) += scmi_perf_domain.o
obj-$(CONFIG_ARM_SCMI_POWER_CONTROL) += scmi_power_control.o
ifeq ($(CONFIG_THUMB2_KERNEL)$(CONFIG_CC_IS_CLANG),yy)
diff --git a/drivers/firmware/arm_scmi/scmi_perf_domain.c b/drivers/firmware/arm_scmi/scmi_perf_domain.c
new file mode 100644
index 000000000000..4ed2401995be
--- /dev/null
+++ b/drivers/firmware/arm_scmi/scmi_perf_domain.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SCMI performance domain support.
+ *
+ * Copyright (C) 2023 Linaro Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pm_domain.h>
+#include <linux/scmi_protocol.h>
+#include <linux/slab.h>
+
+struct scmi_perf_domain {
+ struct generic_pm_domain genpd;
+ const struct scmi_perf_proto_ops *perf_ops;
+ const struct scmi_protocol_handle *ph;
+ const struct scmi_perf_domain_info *info;
+ u32 domain_id;
+};
+
+#define to_scmi_pd(pd) container_of(pd, struct scmi_perf_domain, genpd)
+
+static int
+scmi_pd_set_perf_state(struct generic_pm_domain *genpd, unsigned int state)
+{
+ struct scmi_perf_domain *pd = to_scmi_pd(genpd);
+ int ret;
+
+ if (!pd->info->set_perf)
+ return 0;
+
+ ret = pd->perf_ops->level_set(pd->ph, pd->domain_id, state, true);
+ if (ret)
+ dev_warn(&genpd->dev, "Failed with %d when trying to set %d perf level",
+ ret, state);
+
+ return ret;
+}
+
+static int scmi_perf_domain_probe(struct scmi_device *sdev)
+{
+ struct device *dev = &sdev->dev;
+ const struct scmi_handle *handle = sdev->handle;
+ const struct scmi_perf_proto_ops *perf_ops;
+ struct scmi_protocol_handle *ph;
+ struct scmi_perf_domain *scmi_pd;
+ struct genpd_onecell_data *scmi_pd_data;
+ struct generic_pm_domain **domains;
+ int num_domains, i, ret = 0;
+ u32 perf_level;
+
+ if (!handle)
+ return -ENODEV;
+
+ /* The OF node must specify us as a power-domain provider. */
+ if (!of_find_property(dev->of_node, "#power-domain-cells", NULL))
+ return 0;
+
+ perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph);
+ if (IS_ERR(perf_ops))
+ return PTR_ERR(perf_ops);
+
+ num_domains = perf_ops->num_domains_get(ph);
+ if (num_domains < 0) {
+ dev_warn(dev, "Failed with %d when getting num perf domains\n",
+ num_domains);
+ return num_domains;
+ } else if (!num_domains) {
+ return 0;
+ }
+
+ scmi_pd = devm_kcalloc(dev, num_domains, sizeof(*scmi_pd), GFP_KERNEL);
+ if (!scmi_pd)
+ return -ENOMEM;
+
+ scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
+ if (!scmi_pd_data)
+ return -ENOMEM;
+
+ domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
+ if (!domains)
+ return -ENOMEM;
+
+ for (i = 0; i < num_domains; i++, scmi_pd++) {
+ scmi_pd->info = perf_ops->domain_info_get(ph, i);
+
+ scmi_pd->domain_id = i;
+ scmi_pd->perf_ops = perf_ops;
+ scmi_pd->ph = ph;
+ scmi_pd->genpd.name = scmi_pd->info->name;
+ scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
+ scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
+
+ ret = perf_ops->level_get(ph, i, &perf_level, false);
+ if (ret) {
+ dev_dbg(dev, "Failed to get perf level for %s",
+ scmi_pd->genpd.name);
+ perf_level = 0;
+ }
+
+ /* Let the perf level indicate the power-state too. */
+ ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
+ if (ret)
+ goto err;
+
+ domains[i] = &scmi_pd->genpd;
+ }
+
+ scmi_pd_data->domains = domains;
+ scmi_pd_data->num_domains = num_domains;
+
+ ret = of_genpd_add_provider_onecell(dev->of_node, scmi_pd_data);
+ if (ret)
+ goto err;
+
+ dev_set_drvdata(dev, scmi_pd_data);
+ dev_info(dev, "Initialized %d performance domains", num_domains);
+ return 0;
+err:
+ for (i--; i >= 0; i--)
+ pm_genpd_remove(domains[i]);
+ return ret;
+}
+
+static void scmi_perf_domain_remove(struct scmi_device *sdev)
+{
+ struct device *dev = &sdev->dev;
+ struct genpd_onecell_data *scmi_pd_data = dev_get_drvdata(dev);
+ int i;
+
+ of_genpd_del_provider(dev->of_node);
+
+ for (i = 0; i < scmi_pd_data->num_domains; i++)
+ pm_genpd_remove(scmi_pd_data->domains[i]);
+}
+
+static const struct scmi_device_id scmi_id_table[] = {
+ { SCMI_PROTOCOL_PERF, "perf" },
+ { },
+};
+MODULE_DEVICE_TABLE(scmi, scmi_id_table);
+
+static struct scmi_driver scmi_perf_domain_driver = {
+ .name = "scmi-perf-domain",
+ .probe = scmi_perf_domain_probe,
+ .remove = scmi_perf_domain_remove,
+ .id_table = scmi_id_table,
+};
+module_scmi_driver(scmi_perf_domain_driver);
+
+MODULE_AUTHOR("Ulf Hansson <[email protected]>");
+MODULE_DESCRIPTION("ARM SCMI perf domain driver");
+MODULE_LICENSE("GPL v2");
--
2.34.1
On Thu, Jul 13, 2023 at 04:17:37PM +0200, Ulf Hansson wrote:
> To enable support for performance scaling (DVFS) for generic devices with
> the SCMI performance protocol, let's add an SCMI performance domain. This
> is being modelled as a genpd provider, with support for performance scaling
> through genpd's ->set_performance_state() callback.
>
> Note that, this adds the initial support that allows consumer drivers for
> attached devices, to vote for a new performance state via calling the
> dev_pm_genpd_set_performance_state(). However, this should be avoided as
> it's in most cases preferred to use the OPP library to vote for a new OPP
> instead. The support using the OPP library isn't part of this change, but
> needs to be implemented from subsequent changes.
>
Hi Ulf,
a couple of remarks down below.
> Signed-off-by: Ulf Hansson <[email protected]>
> ---
>
> Changes in v2:
> - Converted to use the new ->domain_info_get() callback.
>
> ---
> drivers/firmware/arm_scmi/Kconfig | 12 ++
> drivers/firmware/arm_scmi/Makefile | 1 +
> drivers/firmware/arm_scmi/scmi_perf_domain.c | 155 +++++++++++++++++++
> 3 files changed, 168 insertions(+)
> create mode 100644 drivers/firmware/arm_scmi/scmi_perf_domain.c
[snip]
> +static int scmi_perf_domain_probe(struct scmi_device *sdev)
> +{
> + struct device *dev = &sdev->dev;
> + const struct scmi_handle *handle = sdev->handle;
> + const struct scmi_perf_proto_ops *perf_ops;
> + struct scmi_protocol_handle *ph;
> + struct scmi_perf_domain *scmi_pd;
> + struct genpd_onecell_data *scmi_pd_data;
> + struct generic_pm_domain **domains;
> + int num_domains, i, ret = 0;
> + u32 perf_level;
> +
> + if (!handle)
> + return -ENODEV;
> +
> + /* The OF node must specify us as a power-domain provider. */
> + if (!of_find_property(dev->of_node, "#power-domain-cells", NULL))
> + return 0;
> +
> + perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph);
> + if (IS_ERR(perf_ops))
> + return PTR_ERR(perf_ops);
> +
> + num_domains = perf_ops->num_domains_get(ph);
> + if (num_domains < 0) {
> + dev_warn(dev, "Failed with %d when getting num perf domains\n",
> + num_domains);
> + return num_domains;
> + } else if (!num_domains) {
> + return 0;
> + }
> +
> + scmi_pd = devm_kcalloc(dev, num_domains, sizeof(*scmi_pd), GFP_KERNEL);
> + if (!scmi_pd)
> + return -ENOMEM;
> +
> + scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
> + if (!scmi_pd_data)
> + return -ENOMEM;
> +
> + domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
> + if (!domains)
> + return -ENOMEM;
> +
> + for (i = 0; i < num_domains; i++, scmi_pd++) {
> + scmi_pd->info = perf_ops->domain_info_get(ph, i);
So here you are grabbing all the performance domains exposed by the
platform via PERF protocol and then a few lines down below you are
registering them with pm_genpd_init(), but the list of domains obtained
from the platform will contain NOT only devices but also CPUs possibly,
already managed by the SCMI CPUFreq driver.
In fact the SCMI CPUFreq driver, on his side, takes care to pick only
domains that are bound in the DT to a CPU (via scmi_cpu_domain_id DT
parsing) but here you are registering all domains with GenPD upfront.
Is it not possible that, once registered, GenPD can decide, at some point
in the future, to try act on some of these domains associated with a CPU ?
(like Clock framework does at the end of boot trying to disable unused
clocks...not familiar with internals of GenPD, though)
> + scmi_pd->domain_id = i;
> + scmi_pd->perf_ops = perf_ops;
> + scmi_pd->ph = ph;
> + scmi_pd->genpd.name = scmi_pd->info->name;
> + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> +
> + ret = perf_ops->level_get(ph, i, &perf_level, false);
> + if (ret) {
> + dev_dbg(dev, "Failed to get perf level for %s",
> + scmi_pd->genpd.name);
> + perf_level = 0;
> + }
> +
> + /* Let the perf level indicate the power-state too. */
> + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
In SCMI world PERF levels should have nothing to do with the Power
state of a domain: you have the POWER protocol for that, so you should
not assume that perf level 0 means OFF, but you can use the POWER protocol
operation .state_get() to lookup the power state. (and you can grab both
perf and power ops from the same driver)
The tricky part would be to match the PERF domain at hand with the
related POWER domain to query the state for, I suppose.
Indeed, recently, while looking at SCMI v3.2 PERF evolutions, I was
tempted to just start rejecting any level_set() or set_freq() request
for ZERO since they really can be abused to power off a domain. (if the
platform complies...)
Apologize if I missed something about how GenPD behaviour...
Thanks,
Cristian
On Wed, Jul 19, 2023 at 03:51:45PM +0100, Cristian Marussi wrote:
> On Thu, Jul 13, 2023 at 04:17:37PM +0200, Ulf Hansson wrote:
[...]
> > + scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
> > + if (!scmi_pd_data)
> > + return -ENOMEM;
> > +
> > + domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
> > + if (!domains)
> > + return -ENOMEM;
> > +
> > + for (i = 0; i < num_domains; i++, scmi_pd++) {
> > + scmi_pd->info = perf_ops->domain_info_get(ph, i);
>
> So here you are grabbing all the performance domains exposed by the
> platform via PERF protocol and then a few lines down below you are
> registering them with pm_genpd_init(), but the list of domains obtained
> from the platform will contain NOT only devices but also CPUs possibly,
> already managed by the SCMI CPUFreq driver.
>
Agreed, I pointed out briefly in the previous patch I think. I am not sure
how will that work if the performance and power domains are not 1-1 mapping
or if they are CPUs then this might confusing ? Not sure but looks like
we might be creating a spaghetti here :(.
> In fact the SCMI CPUFreq driver, on his side, takes care to pick only
> domains that are bound in the DT to a CPU (via scmi_cpu_domain_id DT
> parsing) but here you are registering all domains with GenPD upfront.
>
+1
> Is it not possible that, once registered, GenPD can decide, at some point
> in the future, to try act on some of these domains associated with a CPU ?
IIRC, all unused genpd are turned off right. It may not impact here but still
super confusing as we will be creating power domains for the set of domains
actually advertised as power domains by the firmware. This will add another
set.
> (like Clock framework does at the end of boot trying to disable unused
> clocks...not familiar with internals of GenPD, though)
>
Ah, I am reading too much serialised. Just agreed and wrote the same above.
> > + scmi_pd->domain_id = i;
> > + scmi_pd->perf_ops = perf_ops;
> > + scmi_pd->ph = ph;
> > + scmi_pd->genpd.name = scmi_pd->info->name;
> > + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> > + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> > +
> > + ret = perf_ops->level_get(ph, i, &perf_level, false);
> > + if (ret) {
> > + dev_dbg(dev, "Failed to get perf level for %s",
> > + scmi_pd->genpd.name);
> > + perf_level = 0;
> > + }
> > +
> > + /* Let the perf level indicate the power-state too. */
> > + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
>
> In SCMI world PERF levels should have nothing to do with the Power
> state of a domain: you have the POWER protocol for that, so you should
> not assume that perf level 0 means OFF, but you can use the POWER protocol
> operation .state_get() to lookup the power state. (and you can grab both
> perf and power ops from the same driver)
>
> The tricky part would be to match the PERF domain at hand with the
> related POWER domain to query the state for, I suppose.
>
I wanted to ask the same. E.g. on juno, GPU has perf domain 2 and power domain
9. It would be good if we can how it would work there ? What is expected
from the gpu driver in terms of managing perf and power ? Does it need
to specify 2 power domains now and specify which is perf and which power in
its bindings ?
> Indeed, recently, while looking at SCMI v3.2 PERF evolutions, I was
> tempted to just start rejecting any level_set() or set_freq() request
> for ZERO since they really can be abused to power off a domain. (if the
> platform complies...)
>
Good point I need to dig the spec before I can comment on this.
--
Regards,
Sudeep
Hi Cristian,
On Wed, 19 Jul 2023 at 16:51, Cristian Marussi <[email protected]> wrote:
>
> On Thu, Jul 13, 2023 at 04:17:37PM +0200, Ulf Hansson wrote:
> > To enable support for performance scaling (DVFS) for generic devices with
> > the SCMI performance protocol, let's add an SCMI performance domain. This
> > is being modelled as a genpd provider, with support for performance scaling
> > through genpd's ->set_performance_state() callback.
> >
> > Note that, this adds the initial support that allows consumer drivers for
> > attached devices, to vote for a new performance state via calling the
> > dev_pm_genpd_set_performance_state(). However, this should be avoided as
> > it's in most cases preferred to use the OPP library to vote for a new OPP
> > instead. The support using the OPP library isn't part of this change, but
> > needs to be implemented from subsequent changes.
> >
>
> Hi Ulf,
>
> a couple of remarks down below.
>
> > Signed-off-by: Ulf Hansson <[email protected]>
> > ---
> >
> > Changes in v2:
> > - Converted to use the new ->domain_info_get() callback.
> >
> > ---
> > drivers/firmware/arm_scmi/Kconfig | 12 ++
> > drivers/firmware/arm_scmi/Makefile | 1 +
> > drivers/firmware/arm_scmi/scmi_perf_domain.c | 155 +++++++++++++++++++
> > 3 files changed, 168 insertions(+)
> > create mode 100644 drivers/firmware/arm_scmi/scmi_perf_domain.c
>
> [snip]
>
> > +static int scmi_perf_domain_probe(struct scmi_device *sdev)
> > +{
> > + struct device *dev = &sdev->dev;
> > + const struct scmi_handle *handle = sdev->handle;
> > + const struct scmi_perf_proto_ops *perf_ops;
> > + struct scmi_protocol_handle *ph;
> > + struct scmi_perf_domain *scmi_pd;
> > + struct genpd_onecell_data *scmi_pd_data;
> > + struct generic_pm_domain **domains;
> > + int num_domains, i, ret = 0;
> > + u32 perf_level;
> > +
> > + if (!handle)
> > + return -ENODEV;
> > +
> > + /* The OF node must specify us as a power-domain provider. */
> > + if (!of_find_property(dev->of_node, "#power-domain-cells", NULL))
> > + return 0;
> > +
> > + perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph);
> > + if (IS_ERR(perf_ops))
> > + return PTR_ERR(perf_ops);
> > +
> > + num_domains = perf_ops->num_domains_get(ph);
> > + if (num_domains < 0) {
> > + dev_warn(dev, "Failed with %d when getting num perf domains\n",
> > + num_domains);
> > + return num_domains;
> > + } else if (!num_domains) {
> > + return 0;
> > + }
> > +
> > + scmi_pd = devm_kcalloc(dev, num_domains, sizeof(*scmi_pd), GFP_KERNEL);
> > + if (!scmi_pd)
> > + return -ENOMEM;
> > +
> > + scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
> > + if (!scmi_pd_data)
> > + return -ENOMEM;
> > +
> > + domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
> > + if (!domains)
> > + return -ENOMEM;
> > +
> > + for (i = 0; i < num_domains; i++, scmi_pd++) {
> > + scmi_pd->info = perf_ops->domain_info_get(ph, i);
>
> So here you are grabbing all the performance domains exposed by the
> platform via PERF protocol and then a few lines down below you are
> registering them with pm_genpd_init(), but the list of domains obtained
> from the platform will contain NOT only devices but also CPUs possibly,
> already managed by the SCMI CPUFreq driver.
Correct.
>
> In fact the SCMI CPUFreq driver, on his side, takes care to pick only
> domains that are bound in the DT to a CPU (via scmi_cpu_domain_id DT
> parsing) but here you are registering all domains with GenPD upfront.
Right, genpds are acting as providers, which need to be registered
upfront to allow consumer devices to be attached when they get probed.
This isn't specific to this case, but how the genpd infrastructure is
working per design.
>
> Is it not possible that, once registered, GenPD can decide, at some point
> in the future, to try act on some of these domains associated with a CPU ?
> (like Clock framework does at the end of boot trying to disable unused
> clocks...not familiar with internals of GenPD, though)
The "magic" that exists in genpd is to save/restore the performance
state at genpd_runtime_suspend|resume().
That means the consumer device needs to be attached and runtime PM
enabled, otherwise genpd will just leave the performance level
unchanged. In other words, the control is entirely at the consumer
driver (scmi cpufreq driver).
>
> > + scmi_pd->domain_id = i;
> > + scmi_pd->perf_ops = perf_ops;
> > + scmi_pd->ph = ph;
> > + scmi_pd->genpd.name = scmi_pd->info->name;
> > + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> > + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> > +
> > + ret = perf_ops->level_get(ph, i, &perf_level, false);
> > + if (ret) {
> > + dev_dbg(dev, "Failed to get perf level for %s",
> > + scmi_pd->genpd.name);
> > + perf_level = 0;
> > + }
> > +
> > + /* Let the perf level indicate the power-state too. */
> > + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
>
> In SCMI world PERF levels should have nothing to do with the Power
> state of a domain: you have the POWER protocol for that, so you should
> not assume that perf level 0 means OFF, but you can use the POWER protocol
> operation .state_get() to lookup the power state. (and you can grab both
> perf and power ops from the same driver)
Well, I think this may be SCMI FW implementation specific, but
honestly I don't know exactly what the spec says about this. In any
case, I don't think it's a good idea to mix this with the POWER
domain, as that's something that is entirely different. We have no
clue of those relationships (domain IDs).
My main idea behind this, is just to give the genpd internals a
reasonably defined value for its power state.
>
> The tricky part would be to match the PERF domain at hand with the
> related POWER domain to query the state for, I suppose.
>
> Indeed, recently, while looking at SCMI v3.2 PERF evolutions, I was
> tempted to just start rejecting any level_set() or set_freq() request
> for ZERO since they really can be abused to power off a domain. (if the
> platform complies...)
I didn't know that this was against the spec, but in a way what you
say, seems reasonable.
>
> Apologize if I missed something about how GenPD behaviour...
Np, thanks a lot for reviewing! Much appreciated!
>
> Thanks,
> Cristian
Kind regards
Uffe
On Wed, 19 Jul 2023 at 17:59, Sudeep Holla <[email protected]> wrote:
>
> On Wed, Jul 19, 2023 at 03:51:45PM +0100, Cristian Marussi wrote:
> > On Thu, Jul 13, 2023 at 04:17:37PM +0200, Ulf Hansson wrote:
>
> [...]
>
> > > + scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
> > > + if (!scmi_pd_data)
> > > + return -ENOMEM;
> > > +
> > > + domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
> > > + if (!domains)
> > > + return -ENOMEM;
> > > +
> > > + for (i = 0; i < num_domains; i++, scmi_pd++) {
> > > + scmi_pd->info = perf_ops->domain_info_get(ph, i);
> >
> > So here you are grabbing all the performance domains exposed by the
> > platform via PERF protocol and then a few lines down below you are
> > registering them with pm_genpd_init(), but the list of domains obtained
> > from the platform will contain NOT only devices but also CPUs possibly,
> > already managed by the SCMI CPUFreq driver.
> >
>
> Agreed, I pointed out briefly in the previous patch I think. I am not sure
> how will that work if the performance and power domains are not 1-1 mapping
> or if they are CPUs then this might confusing ? Not sure but looks like
> we might be creating a spaghetti here :(.
I assume the discussions around the DT bindings are making it more
clear on how this should work. The scmi performance-domain and the
scmi power-domain are two separate providers.
>
> > In fact the SCMI CPUFreq driver, on his side, takes care to pick only
> > domains that are bound in the DT to a CPU (via scmi_cpu_domain_id DT
> > parsing) but here you are registering all domains with GenPD upfront.
> >
>
> +1
>
> > Is it not possible that, once registered, GenPD can decide, at some point
> > in the future, to try act on some of these domains associated with a CPU ?
>
> IIRC, all unused genpd are turned off right. It may not impact here but still
> super confusing as we will be creating power domains for the set of domains
> actually advertised as power domains by the firmware. This will add another
> set.
>
> > (like Clock framework does at the end of boot trying to disable unused
> > clocks...not familiar with internals of GenPD, though)
> >
>
> Ah, I am reading too much serialised. Just agreed and wrote the same above.
>
> > > + scmi_pd->domain_id = i;
> > > + scmi_pd->perf_ops = perf_ops;
> > > + scmi_pd->ph = ph;
> > > + scmi_pd->genpd.name = scmi_pd->info->name;
> > > + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> > > + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> > > +
> > > + ret = perf_ops->level_get(ph, i, &perf_level, false);
> > > + if (ret) {
> > > + dev_dbg(dev, "Failed to get perf level for %s",
> > > + scmi_pd->genpd.name);
> > > + perf_level = 0;
> > > + }
> > > +
> > > + /* Let the perf level indicate the power-state too. */
> > > + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
> >
> > In SCMI world PERF levels should have nothing to do with the Power
> > state of a domain: you have the POWER protocol for that, so you should
> > not assume that perf level 0 means OFF, but you can use the POWER protocol
> > operation .state_get() to lookup the power state. (and you can grab both
> > perf and power ops from the same driver)
> >
> > The tricky part would be to match the PERF domain at hand with the
> > related POWER domain to query the state for, I suppose.
> >
>
> I wanted to ask the same. E.g. on juno, GPU has perf domain 2 and power domain
> 9. It would be good if we can how it would work there ? What is expected
> from the gpu driver in terms of managing perf and power ? Does it need
> to specify 2 power domains now and specify which is perf and which power in
> its bindings ?
Yes, correct.
Note that, we already have plenty of consumer devices/drivers that are
managing multiple PM domains. They use
dev_pm_domain_attach_by_id|name() to attach their devices to their
corresponding domain(s). In addition, they often use device_link_add()
to simplify runtime PM management.
That said, we should expect to see some boilerplate code in consumer
drivers that deals with this attaching/detaching of multiple PM
domains. That's a separate problem we may want to address later on. In
fact, it's already been discussed earlier at LKML (I can't find the
link right now).
[...]
Kind regards
Uffe
On Fri, Jul 21, 2023 at 05:19:51PM +0200, Ulf Hansson wrote:
> Hi Cristian,
>
Hi,
> On Wed, 19 Jul 2023 at 16:51, Cristian Marussi <[email protected]> wrote:
> >
> > On Thu, Jul 13, 2023 at 04:17:37PM +0200, Ulf Hansson wrote:
> > > To enable support for performance scaling (DVFS) for generic devices with
> > > the SCMI performance protocol, let's add an SCMI performance domain. This
> > > is being modelled as a genpd provider, with support for performance scaling
> > > through genpd's ->set_performance_state() callback.
> > >
> > > Note that, this adds the initial support that allows consumer drivers for
> > > attached devices, to vote for a new performance state via calling the
> > > dev_pm_genpd_set_performance_state(). However, this should be avoided as
> > > it's in most cases preferred to use the OPP library to vote for a new OPP
> > > instead. The support using the OPP library isn't part of this change, but
> > > needs to be implemented from subsequent changes.
> > >
> >
> > Hi Ulf,
> >
> > a couple of remarks down below.
> >
> > > Signed-off-by: Ulf Hansson <[email protected]>
> > > ---
> > >
> > > Changes in v2:
> > > - Converted to use the new ->domain_info_get() callback.
> > >
> > > ---
> > > drivers/firmware/arm_scmi/Kconfig | 12 ++
> > > drivers/firmware/arm_scmi/Makefile | 1 +
> > > drivers/firmware/arm_scmi/scmi_perf_domain.c | 155 +++++++++++++++++++
> > > 3 files changed, 168 insertions(+)
> > > create mode 100644 drivers/firmware/arm_scmi/scmi_perf_domain.c
> >
> > [snip]
> >
> > > +static int scmi_perf_domain_probe(struct scmi_device *sdev)
> > > +{
> > > + struct device *dev = &sdev->dev;
> > > + const struct scmi_handle *handle = sdev->handle;
> > > + const struct scmi_perf_proto_ops *perf_ops;
> > > + struct scmi_protocol_handle *ph;
> > > + struct scmi_perf_domain *scmi_pd;
> > > + struct genpd_onecell_data *scmi_pd_data;
> > > + struct generic_pm_domain **domains;
> > > + int num_domains, i, ret = 0;
> > > + u32 perf_level;
> > > +
> > > + if (!handle)
> > > + return -ENODEV;
> > > +
> > > + /* The OF node must specify us as a power-domain provider. */
> > > + if (!of_find_property(dev->of_node, "#power-domain-cells", NULL))
> > > + return 0;
> > > +
> > > + perf_ops = handle->devm_protocol_get(sdev, SCMI_PROTOCOL_PERF, &ph);
> > > + if (IS_ERR(perf_ops))
> > > + return PTR_ERR(perf_ops);
> > > +
> > > + num_domains = perf_ops->num_domains_get(ph);
> > > + if (num_domains < 0) {
> > > + dev_warn(dev, "Failed with %d when getting num perf domains\n",
> > > + num_domains);
> > > + return num_domains;
> > > + } else if (!num_domains) {
> > > + return 0;
> > > + }
> > > +
> > > + scmi_pd = devm_kcalloc(dev, num_domains, sizeof(*scmi_pd), GFP_KERNEL);
> > > + if (!scmi_pd)
> > > + return -ENOMEM;
> > > +
> > > + scmi_pd_data = devm_kzalloc(dev, sizeof(*scmi_pd_data), GFP_KERNEL);
> > > + if (!scmi_pd_data)
> > > + return -ENOMEM;
> > > +
> > > + domains = devm_kcalloc(dev, num_domains, sizeof(*domains), GFP_KERNEL);
> > > + if (!domains)
> > > + return -ENOMEM;
> > > +
> > > + for (i = 0; i < num_domains; i++, scmi_pd++) {
> > > + scmi_pd->info = perf_ops->domain_info_get(ph, i);
> >
> > So here you are grabbing all the performance domains exposed by the
> > platform via PERF protocol and then a few lines down below you are
> > registering them with pm_genpd_init(), but the list of domains obtained
> > from the platform will contain NOT only devices but also CPUs possibly,
> > already managed by the SCMI CPUFreq driver.
>
> Correct.
>
> >
> > In fact the SCMI CPUFreq driver, on his side, takes care to pick only
> > domains that are bound in the DT to a CPU (via scmi_cpu_domain_id DT
> > parsing) but here you are registering all domains with GenPD upfront.
>
> Right, genpds are acting as providers, which need to be registered
> upfront to allow consumer devices to be attached when they get probed.
>
> This isn't specific to this case, but how the genpd infrastructure is
> working per design.
>
> >
> > Is it not possible that, once registered, GenPD can decide, at some point
> > in the future, to try act on some of these domains associated with a CPU ?
> > (like Clock framework does at the end of boot trying to disable unused
> > clocks...not familiar with internals of GenPD, though)
>
> The "magic" that exists in genpd is to save/restore the performance
> state at genpd_runtime_suspend|resume().
>
> That means the consumer device needs to be attached and runtime PM
> enabled, otherwise genpd will just leave the performance level
> unchanged. In other words, the control is entirely at the consumer
> driver (scmi cpufreq driver).
>
Ok, so if the DT is well formed and a CPU-related perf domain is not
wrongly referred from a driver looking for a device perf-domain, the
genPD subsystem wont act on the CPUs domains.
> >
> > > + scmi_pd->domain_id = i;
> > > + scmi_pd->perf_ops = perf_ops;
> > > + scmi_pd->ph = ph;
> > > + scmi_pd->genpd.name = scmi_pd->info->name;
> > > + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> > > + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> > > +
> > > + ret = perf_ops->level_get(ph, i, &perf_level, false);
> > > + if (ret) {
> > > + dev_dbg(dev, "Failed to get perf level for %s",
> > > + scmi_pd->genpd.name);
> > > + perf_level = 0;
> > > + }
> > > +
> > > + /* Let the perf level indicate the power-state too. */
> > > + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
> >
> > In SCMI world PERF levels should have nothing to do with the Power
> > state of a domain: you have the POWER protocol for that, so you should
> > not assume that perf level 0 means OFF, but you can use the POWER protocol
> > operation .state_get() to lookup the power state. (and you can grab both
> > perf and power ops from the same driver)
>
> Well, I think this may be SCMI FW implementation specific, but
> honestly I don't know exactly what the spec says about this. In any
> case, I don't think it's a good idea to mix this with the POWER
> domain, as that's something that is entirely different. We have no
> clue of those relationships (domain IDs).
>
> My main idea behind this, is just to give the genpd internals a
> reasonably defined value for its power state.
>
The thing is that in the SCMI world you cannot assume that perf_level 0
means powered off, the current SCP/SCMI platform fw, as an example, wont
advertise a 0-perf-level and wont act on such a request, because you are
supposed to use POWER protocol to get/set the power-state of a device.
So it could be fine, as long as genPD wont try to set the level to 0
expecting the domain to be as a consequence also powered off and as
long as it is fine for these genpd domains to be always initialized
as ON. (since perf_level could never be found as zero..)
Thanks,
Cristian
[...]
> > >
> > > Is it not possible that, once registered, GenPD can decide, at some point
> > > in the future, to try act on some of these domains associated with a CPU ?
> > > (like Clock framework does at the end of boot trying to disable unused
> > > clocks...not familiar with internals of GenPD, though)
> >
> > The "magic" that exists in genpd is to save/restore the performance
> > state at genpd_runtime_suspend|resume().
> >
> > That means the consumer device needs to be attached and runtime PM
> > enabled, otherwise genpd will just leave the performance level
> > unchanged. In other words, the control is entirely at the consumer
> > driver (scmi cpufreq driver).
> >
>
> Ok, so if the DT is well formed and a CPU-related perf domain is not
> wrongly referred from a driver looking for a device perf-domain, the
> genPD subsystem wont act on the CPUs domains.
Yes, correct!
>
> > >
> > > > + scmi_pd->domain_id = i;
> > > > + scmi_pd->perf_ops = perf_ops;
> > > > + scmi_pd->ph = ph;
> > > > + scmi_pd->genpd.name = scmi_pd->info->name;
> > > > + scmi_pd->genpd.flags = GENPD_FLAG_OPP_TABLE_FW;
> > > > + scmi_pd->genpd.set_performance_state = scmi_pd_set_perf_state;
> > > > +
> > > > + ret = perf_ops->level_get(ph, i, &perf_level, false);
> > > > + if (ret) {
> > > > + dev_dbg(dev, "Failed to get perf level for %s",
> > > > + scmi_pd->genpd.name);
> > > > + perf_level = 0;
> > > > + }
> > > > +
> > > > + /* Let the perf level indicate the power-state too. */
> > > > + ret = pm_genpd_init(&scmi_pd->genpd, NULL, perf_level == 0);
> > >
> > > In SCMI world PERF levels should have nothing to do with the Power
> > > state of a domain: you have the POWER protocol for that, so you should
> > > not assume that perf level 0 means OFF, but you can use the POWER protocol
> > > operation .state_get() to lookup the power state. (and you can grab both
> > > perf and power ops from the same driver)
> >
> > Well, I think this may be SCMI FW implementation specific, but
> > honestly I don't know exactly what the spec says about this. In any
> > case, I don't think it's a good idea to mix this with the POWER
> > domain, as that's something that is entirely different. We have no
> > clue of those relationships (domain IDs).
> >
> > My main idea behind this, is just to give the genpd internals a
> > reasonably defined value for its power state.
> >
>
> The thing is that in the SCMI world you cannot assume that perf_level 0
> means powered off, the current SCP/SCMI platform fw, as an example, wont
> advertise a 0-perf-level and wont act on such a request, because you are
> supposed to use POWER protocol to get/set the power-state of a device.
Right, thanks for clarifying this!
>
> So it could be fine, as long as genPD wont try to set the level to 0
> expecting the domain to be as a consequence also powered off and as
> long as it is fine for these genpd domains to be always initialized
> as ON. (since perf_level could never be found as zero..)
Okay, so to me, it sounds like that we should set GENPD_FLAG_ALWAYS_ON
for the genpd. This makes it clear that the domain can't be powered
off.
Moreover, we should prevent genpd internals from setting the
performance state to 0 for the scmi performance domain. Let me look
into how to best deal with that.
Kind regards
Uffe