LinuxLists.cc - [PATCH 0/5] Introduce LMh driver for Qualcomm SoCs

2021-06-08 22:32:09

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 0/5] Introduce LMh driver for Qualcomm SoCs

Limits Management Hardware(LMh) is a hardware infrastructure on some
Qualcomm SoCs that can enforce temperature and current limits as programmed
by software for certain IPs like CPU. On many newer SoCs LMh is configured
by firmware/TZ and no programming is needed from the kernel side. But on
certain SoCs like sdm845 the firmware does not do a complete programming of
the h/w block. On such SoCs kernel software has to explicitly set up the
temperature limits and turn on various monitoring and enforcing algorithms
on the hardware.

Introduce support for enabling and programming various limit settings and
monitoring capabilities of Limits Management Hardware(LMh) associated with
cpu clusters. Also introduce support in cpufreq hardware driver to monitor
the interrupt associated with cpu frequency throttling so that this
information can be conveyed to the schdeuler via thermal pressure
interface.

With this patch series following cpu performance improvement(30-70%) is
observed on sdm845. The reasoning here is that without LMh being programmed
properly from the kernel, the default settings were enabling thermal
mitigation for CPUs at too low a temperature (around 70-75 degree C). This
in turn meant that many a time CPUs were never actually allowed to hit the
maximum possible/required frequencies.

UnixBench whets and dhry (./Run whets dhry)
System Benchmarks Index Score

Without LMh Support With LMh Support
1 copy test 1353.7 1773.2

8 copy tests 4473.6 7402.3

Sysbench cpu
sysbench cpu --threads=8 --time=60 --cpu-max-prime=100000 run

Without LMh Support With LMh Support
Events per
second 355 614

Avg Latency(ms) 21.84 13.02

Thara Gopinath (5):
firmware: qcom_scm: Introduce SCM calls to access LMh
thermal: qcom: Add support for LMh driver
cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support
arm64: boot: dts: sdm45: Add support for LMh node
arm64: boot: dts: qcom: sdm845: Remove passive trip points for thermal
zones 0-7

arch/arm64/boot/dts/qcom/sdm845.dtsi | 246 +++------------------------
drivers/cpufreq/qcom-cpufreq-hw.c | 100 +++++++++++
drivers/firmware/qcom_scm.c | 47 +++++
drivers/firmware/qcom_scm.h | 4 +
drivers/thermal/qcom/Kconfig | 10 ++
drivers/thermal/qcom/Makefile | 1 +
drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++
include/linux/qcom_scm.h | 13 ++
8 files changed, 440 insertions(+), 225 deletions(-)
create mode 100644 drivers/thermal/qcom/lmh.c

--
2.25.1

2021-06-08 22:32:10

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

Add interrupt support to notify the kernel of h/w initiated frequency
throttling by LMh. Convey this to scheduler via thermal presssure
interface.

Signed-off-by: Thara Gopinath <[email protected]>
---
drivers/cpufreq/qcom-cpufreq-hw.c | 100 ++++++++++++++++++++++++++++++
1 file changed, 100 insertions(+)

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index f86859bf76f1..95e17330aa9d 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -13,6 +13,7 @@
#include <linux/of_platform.h>
#include <linux/pm_opp.h>
#include <linux/slab.h>
+#include <linux/interrupt.h>

#define LUT_MAX_ENTRIES 40U
#define LUT_SRC GENMASK(31, 30)
@@ -22,10 +23,13 @@
#define CLK_HW_DIV 2
#define LUT_TURBO_IND 1

+#define HZ_PER_KHZ 1000
+
struct qcom_cpufreq_soc_data {
u32 reg_enable;
u32 reg_freq_lut;
u32 reg_volt_lut;
+ u32 reg_current_vote;
u32 reg_perf_state;
u8 lut_row_size;
};
@@ -33,7 +37,11 @@ struct qcom_cpufreq_soc_data {
struct qcom_cpufreq_data {
void __iomem *base;
struct resource *res;
+ struct delayed_work lmh_dcvs_poll_work;
const struct qcom_cpufreq_soc_data *soc_data;
+ cpumask_var_t cpus;
+ unsigned long throttled_freq;
+ int lmh_dcvs_irq;
};

static unsigned long cpu_hw_rate, xo_rate;
@@ -251,10 +259,79 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
}
}

+static inline unsigned long qcom_lmh_vote_to_freq(u32 val)
+{
+ return (val & 0x3FF) * 19200;
+}
+
+static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
+{
+ struct cpufreq_policy policy;
+ struct dev_pm_opp *opp;
+ struct device *dev;
+ unsigned long max_capacity, capacity, freq_hz;
+ unsigned int val, freq;
+
+ val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
+ freq = qcom_lmh_vote_to_freq(val);
+ freq_hz = freq * HZ_PER_KHZ;
+
+ /* Do I need to calculate ceil and floor ? */
+ dev = get_cpu_device(cpumask_first(data->cpus));
+ opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
+ if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
+
+ data->throttled_freq = freq_hz / HZ_PER_KHZ;
+
+ cpufreq_get_policy(&policy, cpumask_first(data->cpus));
+
+ /* Update thermal pressure */
+ max_capacity = arch_scale_cpu_capacity(cpumask_first(data->cpus));
+ capacity = data->throttled_freq * max_capacity;
+ capacity /= policy.cpuinfo.max_freq;
+ /* Don't pass boost capacity to scheduler */
+ if (capacity > max_capacity)
+ capacity = max_capacity;
+ arch_set_thermal_pressure(data->cpus, max_capacity - capacity);
+}
+
+static void qcom_lmh_dcvs_poll(struct work_struct *work)
+{
+ struct qcom_cpufreq_data *data;
+
+ data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
+
+ qcom_lmh_dcvs_notify(data);
+ /**
+ * If h/w throttled frequency is higher than what cpufreq has requested for, stop
+ * polling and switch back to interrupt mechanism
+ */
+ if (data->throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(data->cpus)))
+ /* Clear the existing interrupts and enable it back */
+ enable_irq(data->lmh_dcvs_irq);
+ else
+ mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
+ msecs_to_jiffies(10));
+}
+
+static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
+{
+ struct qcom_cpufreq_data *c_data = data;
+
+ /* Disable interrupt and enable polling */
+ disable_irq_nosync(c_data->lmh_dcvs_irq);
+ qcom_lmh_dcvs_notify(c_data);
+ mod_delayed_work(system_highpri_wq, &c_data->lmh_dcvs_poll_work, msecs_to_jiffies(10));
+
+ return 0;
+}
+
static const struct qcom_cpufreq_soc_data qcom_soc_data = {
.reg_enable = 0x0,
.reg_freq_lut = 0x110,
.reg_volt_lut = 0x114,
+ .reg_current_vote = 0x704,
.reg_perf_state = 0x920,
.lut_row_size = 32,
};
@@ -285,6 +362,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
void __iomem *base;
struct qcom_cpufreq_data *data;
int ret, index;
+ bool lmh_mitigation_enabled = false;

cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
@@ -305,6 +383,8 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)

index = args.args[0];

+ lmh_mitigation_enabled = of_property_read_bool(pdev->dev.of_node, "qcom,support-lmh");
+
res = platform_get_resource(pdev, IORESOURCE_MEM, index);
if (!res) {
dev_err(dev, "failed to get mem resource %d\n", index);
@@ -329,6 +409,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
goto unmap_base;
}

+ if (!alloc_cpumask_var(&data->cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto unmap_base;
+ }
+
data->soc_data = of_device_get_match_data(&pdev->dev);
data->base = base;
data->res = res;
@@ -347,6 +432,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
goto error;
}

+ cpumask_copy(data->cpus, policy->cpus);
policy->driver_data = data;

ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
@@ -370,6 +456,20 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
}

+ if (lmh_mitigation_enabled) {
+ data->lmh_dcvs_irq = platform_get_irq(pdev, index);
+ if (data->lmh_dcvs_irq < 0) {
+ ret = data->lmh_dcvs_irq;
+ goto error;
+ }
+ ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
+ 0, "dcvsh-irq", data);
+ if (ret) {
+ dev_err(dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
+ goto error;
+ }
+ INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
+ }
return 0;
error:
kfree(data);
--
2.25.1

2021-06-09 11:43:23

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 4/5] arm64: boot: dts: sdm45: Add support for LMh node

Add LMh nodes for cpu cluster0 and cpu cluster1. Also add interrupt
support in cpufreq node to capture the LMh interrupt and let the scheduler
know of the max frequency throttling.

Signed-off-by: Thara Gopinath <[email protected]>
---
arch/arm64/boot/dts/qcom/sdm845.dtsi | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 0a86fe71a66d..fdd8d816f728 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -3646,6 +3646,24 @@ swm: swm@c85 {
};
};

+ lmh_cluster1: lmh@17d70800 {
+ compatible = "qcom,msm-hw-limits";
+ reg = <0 0x17d70800 0 0x401>;
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+ qcom,lmh-cpu-id = <0x4>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ lmh_cluster0: lmh@17d78800 {
+ compatible = "qcom,msm-hw-limits";
+ reg = <0 0x17d78800 0 0x401>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>;
+ qcom,lmh-cpu-id = <0x0>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
sound: sound {
};

@@ -4911,10 +4929,13 @@ cpufreq_hw: cpufreq@17d43000 {
reg = <0 0x17d43000 0 0x1400>, <0 0x17d45800 0 0x1400>;
reg-names = "freq-domain0", "freq-domain1";

+ interrupts-extended = <&lmh_cluster0 0>, <&lmh_cluster1 0>;
+
clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
clock-names = "xo", "alternate";

#freq-domain-cells = <1>;
+ qcom,support-lmh = <1>;
};

wifi: wifi@18800000 {
--
2.25.1

2021-06-09 11:43:23

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 2/5] thermal: qcom: Add support for LMh driver

Driver enabling various pieces of Limits Management Hardware(LMh) for cpu
cluster0 and cpu cluster1 namely kick starting monitoring of temperature,
current, battery current violations, enabling reliability algorithm and
setting up various temperature limits.

The following has been explained in the cover letter. I am including this
here so that this remains in the commit message as well.

LMh is a hardware infrastructure on some Qualcomm SoCs that can enforce
temperature and current limits as programmed by software for certain IPs
like CPU. On many newer SoCs LMh is configured by firmware/TZ and no
programming is needed from the kernel side. But on certain SoCs like sdm845
the firmware does not do a complete programming of the h/w. On such SoCs
kernel software has to explicitly set up the temperature limits and turn on
various monitoring and enforcing algorithms on the hardware.

Signed-off-by: Thara Gopinath <[email protected]>
---
drivers/thermal/qcom/Kconfig | 10 ++
drivers/thermal/qcom/Makefile | 1 +
drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++++++++++
3 files changed, 255 insertions(+)
create mode 100644 drivers/thermal/qcom/lmh.c

diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
index 8d5ac2df26dc..c95b95e254d7 100644
--- a/drivers/thermal/qcom/Kconfig
+++ b/drivers/thermal/qcom/Kconfig
@@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
trip points. The temperature reported by the thermal sensor reflects the
real time die temperature if an ADC is present or an estimate of the
temperature based upon the over temperature stage value.
+
+config QCOM_LMH
+ tristate "Qualcomm Limits Management Hardware"
+ depends on ARCH_QCOM
+ help
+ This enables initialization of Qualcomm limits management
+ hardware(LMh). LMh allows for h/w enforced mitigation for cpus based on
+ input from temperature and current sensors. On many newer Qualcomm SoCs
+ LMH is configure in the firmware and this feature need not be enabled.
+ However, on certain SoCs like sdm845 LMH has to be configured from HLOS.
diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
index 252ea7d9da0b..0fa2512042e7 100644
--- a/drivers/thermal/qcom/Makefile
+++ b/drivers/thermal/qcom/Makefile
@@ -5,3 +5,4 @@ qcom_tsens-y += tsens.o tsens-v2.o tsens-v1.o tsens-v0_1.o \
tsens-8960.o
obj-$(CONFIG_QCOM_SPMI_ADC_TM5) += qcom-spmi-adc-tm5.o
obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o
+obj-$(CONFIG_QCOM_LMH) += lmh.o
diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
new file mode 100644
index 000000000000..8741a36cb674
--- /dev/null
+++ b/drivers/thermal/qcom/lmh.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2021, Linaro Limited. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/qcom_scm.h>
+
+#define LMH_NODE_DCVS 0x44435653
+#define LMH_CLUSTER0_NODE_ID 0x6370302D
+#define LMH_CLUSTER1_NODE_ID 0x6370312D
+
+#define LMH_SUB_FN_THERMAL 0x54484D4C
+#define LMH_SUB_FN_CRNT 0x43524E54
+#define LMH_SUB_FN_REL 0x52454C00
+#define LMH_SUB_FN_BCL 0x42434C00
+
+#define LMH_ALGO_MODE_ENABLE 0x454E424C
+#define LMH_TH_HI_THRESHOLD 0x48494748
+#define LMH_TH_LOW_THRESHOLD 0x4C4F5700
+#define LMH_TH_ARM_THRESHOLD 0x41524D00
+
+#define LMH_TH_HI_TEMP 95000
+#define LMH_TH_LOW_TEMP 94500
+#define LMH_TH_ARM_TEMP 65000
+
+#define LMH_REG_DCVS_INTR_CLR 0x8
+
+struct lmh_hw_data {
+ void __iomem *base;
+ struct irq_domain *domain;
+ int irq;
+ u32 payload[5];
+ u32 payload_size;
+ u32 cpu_id;
+};
+
+static void update_payload(struct lmh_hw_data *lmh_data, u32 fn, u32 reg, u32 val)
+{
+ lmh_data->payload[0] = fn;
+ lmh_data->payload[1] = 0;
+ lmh_data->payload[2] = reg;
+ lmh_data->payload[3] = 1;
+ lmh_data->payload[4] = val;
+}
+
+static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
+{
+ struct lmh_hw_data *lmh_data = data;
+ int irq = irq_find_mapping(lmh_data->domain, 0);
+
+ /*
+ * Disable interrupt and call the cpufreq driver to handle the interrupt
+ * cpufreq will enable the interrupt once finished processing.
+ */
+ disable_irq_nosync(lmh_data->irq);
+ if (irq)
+ generic_handle_irq(irq);
+
+ return 0;
+}
+
+static void lmh_enable_interrupt(struct irq_data *d)
+{
+ struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
+
+ /* Clear the existing interrupt */
+ writel_relaxed(0xFF, lmh_data->base + LMH_REG_DCVS_INTR_CLR);
+ enable_irq(lmh_data->irq);
+}
+
+static struct irq_chip lmh_irq_chip = {
+ .name = "lmh",
+ .irq_enable = lmh_enable_interrupt,
+};
+
+static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+{
+ struct lmh_hw_data *lmh_data = d->host_data;
+
+ irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq);
+ irq_set_chip_data(irq, lmh_data);
+
+ return 0;
+}
+
+static const struct irq_domain_ops lmh_irq_ops = {
+ .map = lmh_irq_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static int lmh_probe(struct platform_device *pdev)
+{
+ struct device *dev;
+ struct device_node *np;
+ struct lmh_hw_data *lmh_data;
+ u32 node_id;
+ int ret;
+
+ dev = &pdev->dev;
+ np = dev->of_node;
+ if (!np)
+ return -EINVAL;
+
+ lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
+ if (!lmh_data)
+ return -ENOMEM;
+
+ lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(lmh_data->base))
+ return PTR_ERR(lmh_data->base);
+
+ ret = of_property_read_u32(np, "qcom,lmh-cpu-id", &lmh_data->cpu_id);
+ if (ret)
+ return -ENODEV;
+
+ /*
+ * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
+ * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
+ * of a dt match table.
+ */
+ if (lmh_data->cpu_id == 0) {
+ node_id = LMH_CLUSTER0_NODE_ID;
+ } else if (lmh_data->cpu_id == 4) {
+ node_id = LMH_CLUSTER1_NODE_ID;
+ } else {
+ dev_err(dev, "Wrong cpu id associated with lmh node\n");
+ return -EINVAL;
+ }
+
+ /* Payload size is five bytes for now */
+ lmh_data->payload_size = 5 * sizeof(u32);
+
+ platform_set_drvdata(pdev, lmh_data);
+
+ if (!qcom_scm_lmh_dcvsh_available())
+ return -EINVAL;
+
+ /* Enable Thermal Algorithm */
+ update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+ return ret;
+ }
+
+ /* Enable Current Sensing Algorithm */
+ update_payload(lmh_data, LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling current subfunction\n", ret);
+ return ret;
+ }
+
+ /* Enable Reliability Algorithm */
+ update_payload(lmh_data, LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+ return ret;
+ }
+
+ /* Enable BCL Algorithm */
+ update_payload(lmh_data, LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_profile_change(0x1);
+ if (ret) {
+ dev_err(dev, "Error %d changing profile\n", ret);
+ return ret;
+ }
+
+ /* Set default thermal trips */
+ update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, LMH_TH_ARM_TEMP);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
+ return ret;
+ }
+
+ update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, LMH_TH_HI_TEMP);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal HI thershold%d\n", ret);
+ return ret;
+ }
+ update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, LMH_TH_LOW_TEMP);
+ ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
+ return ret;
+ }
+
+ lmh_data->irq = platform_get_irq(pdev, 0);
+ lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
+ if (!lmh_data->domain) {
+ dev_err(dev, "Error adding irq_domain\n");
+ return -EINVAL;
+ }
+
+ ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
+ IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_SUSPEND,
+ "lmh-irq", lmh_data);
+ if (ret) {
+ dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
+ irq_domain_remove(lmh_data->domain);
+ return ret;
+ }
+ return 0;
+}
+
+static const struct of_device_id lmh_table[] = {
+ { .compatible = "qcom,msm-hw-limits", },
+ {},
+};
+
+static struct platform_driver lmh_driver = {
+ .probe = lmh_probe,
+ .driver = {
+ .name = "qcom-lmh",
+ .of_match_table = lmh_table,
+ },
+};
+module_platform_driver(lmh_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("QCOM LMH driver");
--
2.25.1

2021-06-09 16:52:28

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 5/5] arm64: boot: dts: qcom: sdm845: Remove passive trip points for thermal zones 0-7

Now that Limits h/w is enabled to monitor thermal events around cpus and
throttle the cpu frequencies, remove the s/w montoring of the same which
was happening via tsens. We keep critical trip points for these zones so
that system shutdown can be initiated if the temperature exceeds critical
trip.

Signed-off-by: Thara Gopinath <[email protected]>
---
arch/arm64/boot/dts/qcom/sdm845.dtsi | 225 ---------------------------
1 file changed, 225 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index fdd8d816f728..9a494a1b7a09 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -4971,18 +4971,6 @@ cpu0-thermal {
thermal-sensors = <&tsens0 1>;

trips {
- cpu0_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu0_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu0_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -4990,22 +4978,6 @@ cpu0_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu0_alert0>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu0_alert1>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu1-thermal {
@@ -5015,18 +4987,6 @@ cpu1-thermal {
thermal-sensors = <&tsens0 2>;

trips {
- cpu1_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu1_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu1_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5034,22 +4994,6 @@ cpu1_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu1_alert0>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu1_alert1>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu2-thermal {
@@ -5059,18 +5003,6 @@ cpu2-thermal {
thermal-sensors = <&tsens0 3>;

trips {
- cpu2_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu2_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu2_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5078,22 +5010,6 @@ cpu2_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu2_alert0>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu2_alert1>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu3-thermal {
@@ -5103,41 +5019,12 @@ cpu3-thermal {
thermal-sensors = <&tsens0 4>;

trips {
- cpu3_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu3_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu3_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
type = "critical";
};
};
-
- cooling-maps {
- map0 {
- trip = <&cpu3_alert0>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu3_alert1>;
- cooling-device = <&CPU0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu4-thermal {
@@ -5147,18 +5034,6 @@ cpu4-thermal {
thermal-sensors = <&tsens0 7>;

trips {
- cpu4_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu4_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu4_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5166,22 +5041,6 @@ cpu4_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu4_alert0>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu4_alert1>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu5-thermal {
@@ -5191,18 +5050,6 @@ cpu5-thermal {
thermal-sensors = <&tsens0 8>;

trips {
- cpu5_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu5_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu5_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5210,22 +5057,6 @@ cpu5_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu5_alert0>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu5_alert1>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu6-thermal {
@@ -5235,18 +5066,6 @@ cpu6-thermal {
thermal-sensors = <&tsens0 9>;

trips {
- cpu6_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu6_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu6_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5254,22 +5073,6 @@ cpu6_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu6_alert0>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu6_alert1>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

cpu7-thermal {
@@ -5279,18 +5082,6 @@ cpu7-thermal {
thermal-sensors = <&tsens0 10>;

trips {
- cpu7_alert0: trip-point0 {
- temperature = <90000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
- cpu7_alert1: trip-point1 {
- temperature = <95000>;
- hysteresis = <2000>;
- type = "passive";
- };
-
cpu7_crit: cpu_crit {
temperature = <110000>;
hysteresis = <1000>;
@@ -5298,22 +5089,6 @@ cpu7_crit: cpu_crit {
};
};

- cooling-maps {
- map0 {
- trip = <&cpu7_alert0>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- map1 {
- trip = <&cpu7_alert1>;
- cooling-device = <&CPU4 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU5 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU6 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
- <&CPU7 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
- };
- };
};

aoss0-thermal {
--
2.25.1

2021-06-09 16:53:11

by Thara Gopinath

[permalink] [raw]

Subject: [PATCH 1/5] firmware: qcom_scm: Introduce SCM calls to access LMh

Introduce SCM calls to access/configure limits management hardware(LMh).

Signed-off-by: Thara Gopinath <[email protected]>
---
drivers/firmware/qcom_scm.c | 47 +++++++++++++++++++++++++++++++++++++
drivers/firmware/qcom_scm.h | 4 ++++
include/linux/qcom_scm.h | 13 ++++++++++
3 files changed, 64 insertions(+)

diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index ee9cb545e73b..0259e9ffb8a1 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -1147,6 +1147,53 @@ int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
}
EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);

+bool qcom_scm_lmh_dcvsh_available(void)
+{
+ return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_LMH, QCOM_SCM_LMH_LIMIT_DCVSH);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh_available);
+
+int qcom_scm_lmh_profile_change(u32 profile_id)
+{
+ struct qcom_scm_desc desc = {
+ .svc = QCOM_SCM_SVC_LMH,
+ .cmd = QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE,
+ .arginfo = QCOM_SCM_ARGS(1, QCOM_SCM_VAL),
+ .args[0] = profile_id,
+ .owner = ARM_SMCCC_OWNER_SIP,
+ };
+
+ return qcom_scm_call(__scm->dev, &desc, NULL);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_profile_change);
+
+int qcom_scm_lmh_dcvsh(u32 *payload, u32 payload_size, u64 limit_node, u32 node_id, u64 version)
+{
+ dma_addr_t payload_phys;
+ void *payload_buf;
+
+ struct qcom_scm_desc desc = {
+ .svc = QCOM_SCM_SVC_LMH,
+ .cmd = QCOM_SCM_LMH_LIMIT_DCVSH,
+ .arginfo = QCOM_SCM_ARGS(5, QCOM_SCM_RO, QCOM_SCM_VAL, QCOM_SCM_VAL,
+ QCOM_SCM_VAL, QCOM_SCM_VAL),
+ .args[1] = payload_size,
+ .args[2] = limit_node,
+ .args[3] = node_id,
+ .args[4] = version,
+ .owner = ARM_SMCCC_OWNER_SIP,
+ };
+
+ payload_buf = dma_alloc_coherent(__scm->dev, payload_size, &payload_phys, GFP_KERNEL);
+ if (!payload_buf)
+ return -ENOMEM;
+ memcpy(payload_buf, payload, payload_size);
+
+ desc.args[0] = payload_phys;
+ return qcom_scm_call(__scm->dev, &desc, NULL);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh);
+
static int qcom_scm_find_dload_address(struct device *dev, u64 *addr)
{
struct device_node *tcsr;
diff --git a/drivers/firmware/qcom_scm.h b/drivers/firmware/qcom_scm.h
index 632fe3142462..d92156ceb3ac 100644
--- a/drivers/firmware/qcom_scm.h
+++ b/drivers/firmware/qcom_scm.h
@@ -114,6 +114,10 @@ extern int scm_legacy_call(struct device *dev, const struct qcom_scm_desc *desc,
#define QCOM_SCM_SVC_HDCP 0x11
#define QCOM_SCM_HDCP_INVOKE 0x01

+#define QCOM_SCM_SVC_LMH 0x13
+#define QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE 0x01
+#define QCOM_SCM_LMH_LIMIT_DCVSH 0x10
+
#define QCOM_SCM_SVC_SMMU_PROGRAM 0x15
#define QCOM_SCM_SMMU_CONFIG_ERRATA1 0x03
#define QCOM_SCM_SMMU_CONFIG_ERRATA1_CLIENT_ALL 0x02
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 0165824c5128..0c92197769e7 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -109,6 +109,12 @@ extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
u32 *resp);

extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
+
+extern int qcom_scm_lmh_dcvsh(u32 *payload, u32 payload_size, u64 limit_node,
+ u32 node_id, u64 version);
+extern int qcom_scm_lmh_profile_change(u32 profile_id);
+extern bool qcom_scm_lmh_dcvsh_available(void);
+
#else

#include <linux/errno.h>
@@ -170,5 +176,12 @@ static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,

static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
{ return -ENODEV; }
+
+int qcom_scm_lmh_dcvsh(u32 *payload, u32 payload_size, u64 limit_node,
+ u32 node_id, u64 version)
+ { return -ENODEV; }
+int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
+
+bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
#endif
#endif
--
2.25.1

2021-06-09 17:05:20

by Randy Dunlap

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

On 6/8/21 3:29 PM, Thara Gopinath wrote:
> Driver enabling various pieces of Limits Management Hardware(LMh) for cpu
> cluster0 and cpu cluster1 namely kick starting monitoring of temperature,
> current, battery current violations, enabling reliability algorithm and
> setting up various temperature limits.
>
> The following has been explained in the cover letter. I am including this
> here so that this remains in the commit message as well.
>
> LMh is a hardware infrastructure on some Qualcomm SoCs that can enforce
> temperature and current limits as programmed by software for certain IPs
> like CPU. On many newer SoCs LMh is configured by firmware/TZ and no
> programming is needed from the kernel side. But on certain SoCs like sdm845
> the firmware does not do a complete programming of the h/w. On such SoCs
> kernel software has to explicitly set up the temperature limits and turn on
> various monitoring and enforcing algorithms on the hardware.
>
> Signed-off-by: Thara Gopinath <[email protected]>
> ---
> drivers/thermal/qcom/Kconfig | 10 ++
> drivers/thermal/qcom/Makefile | 1 +
> drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++++++++++
> 3 files changed, 255 insertions(+)
> create mode 100644 drivers/thermal/qcom/lmh.c
>
> diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
> index 8d5ac2df26dc..c95b95e254d7 100644
> --- a/drivers/thermal/qcom/Kconfig
> +++ b/drivers/thermal/qcom/Kconfig
> @@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
> trip points. The temperature reported by the thermal sensor reflects the
> real time die temperature if an ADC is present or an estimate of the
> temperature based upon the over temperature stage value.
> +
> +config QCOM_LMH
> + tristate "Qualcomm Limits Management Hardware"
> + depends on ARCH_QCOM
> + help
> + This enables initialization of Qualcomm limits management
> + hardware(LMh). LMh allows for h/w enforced mitigation for cpus based on

hardware-enforced CPUs

> + input from temperature and current sensors. On many newer Qualcomm SoCs
> + LMH is configure in the firmware and this feature need not be enabled.

LMh

> + However, on certain SoCs like sdm845 LMH has to be configured from HLOS.

LMh

What is HLOS?

> diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
> new file mode 100644
> index 000000000000..8741a36cb674
> --- /dev/null
> +++ b/drivers/thermal/qcom/lmh.c
> @@ -0,0 +1,244 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2021, Linaro Limited. All rights reserved.
> + */

[snip]

> +static int lmh_probe(struct platform_device *pdev)
> +{
> + struct device *dev;
> + struct device_node *np;
> + struct lmh_hw_data *lmh_data;
> + u32 node_id;
> + int ret;
> +
> + dev = &pdev->dev;
> + np = dev->of_node;
> + if (!np)
> + return -EINVAL;
> +
> + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
> + if (!lmh_data)
> + return -ENOMEM;
> +
> + lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
> + if (IS_ERR(lmh_data->base))
> + return PTR_ERR(lmh_data->base);
> +
> + ret = of_property_read_u32(np, "qcom,lmh-cpu-id", &lmh_data->cpu_id);
> + if (ret)
> + return -ENODEV;
> +
> + /*
> + * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
> + * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
> + * of a dt match table.
> + */
> + if (lmh_data->cpu_id == 0) {
> + node_id = LMH_CLUSTER0_NODE_ID;
> + } else if (lmh_data->cpu_id == 4) {
> + node_id = LMH_CLUSTER1_NODE_ID;
> + } else {
> + dev_err(dev, "Wrong cpu id associated with lmh node\n");

CPU LMh

> + return -EINVAL;
> + }
> +
> + /* Payload size is five bytes for now */
> + lmh_data->payload_size = 5 * sizeof(u32);
> +
> + platform_set_drvdata(pdev, lmh_data);
> +
> + if (!qcom_scm_lmh_dcvsh_available())
> + return -EINVAL;
> +
> + /* Enable Thermal Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable Current Sensing Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling current subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable Reliability Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable BCL Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling BCL subfunction\n", ret);

What is BCL?

> + return ret;
> + }
> +
> + ret = qcom_scm_lmh_profile_change(0x1);
> + if (ret) {
> + dev_err(dev, "Error %d changing profile\n", ret);
> + return ret;
> + }
> +
> + /* Set default thermal trips */
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, LMH_TH_ARM_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);

threshold

> + return ret;
> + }
> +
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, LMH_TH_HI_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal HI thershold%d\n", ret);

threshold

> + return ret;
> + }
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, LMH_TH_LOW_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);

threshold

> + return ret;
> + }
> +
> + lmh_data->irq = platform_get_irq(pdev, 0);
> + lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
> + if (!lmh_data->domain) {
> + dev_err(dev, "Error adding irq_domain\n");
> + return -EINVAL;
> + }
> +
> + ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
> + IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_SUSPEND,
> + "lmh-irq", lmh_data);
> + if (ret) {
> + dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
> + irq_domain_remove(lmh_data->domain);
> + return ret;
> + }
> + return 0;
> +}
> +
> +static const struct of_device_id lmh_table[] = {
> + { .compatible = "qcom,msm-hw-limits", },
> + {},
> +};
> +
> +static struct platform_driver lmh_driver = {
> + .probe = lmh_probe,
> + .driver = {
> + .name = "qcom-lmh",
> + .of_match_table = lmh_table,
> + },
> +};
> +module_platform_driver(lmh_driver);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_DESCRIPTION("QCOM LMH driver");

LMh

thanks.
--
~Randy

2021-06-14 10:36:51

by Viresh Kumar

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

On 08-06-21, 18:29, Thara Gopinath wrote:
> Add interrupt support to notify the kernel of h/w initiated frequency
> throttling by LMh. Convey this to scheduler via thermal presssure
> interface.
>
> Signed-off-by: Thara Gopinath <[email protected]>
> ---
> drivers/cpufreq/qcom-cpufreq-hw.c | 100 ++++++++++++++++++++++++++++++
> 1 file changed, 100 insertions(+)
>
> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> index f86859bf76f1..95e17330aa9d 100644
> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
> @@ -13,6 +13,7 @@
> #include <linux/of_platform.h>
> #include <linux/pm_opp.h>
> #include <linux/slab.h>
> +#include <linux/interrupt.h>
>
> #define LUT_MAX_ENTRIES 40U
> #define LUT_SRC GENMASK(31, 30)
> @@ -22,10 +23,13 @@
> #define CLK_HW_DIV 2
> #define LUT_TURBO_IND 1
>
> +#define HZ_PER_KHZ 1000
> +
> struct qcom_cpufreq_soc_data {
> u32 reg_enable;
> u32 reg_freq_lut;
> u32 reg_volt_lut;
> + u32 reg_current_vote;
> u32 reg_perf_state;
> u8 lut_row_size;
> };
> @@ -33,7 +37,11 @@ struct qcom_cpufreq_soc_data {
> struct qcom_cpufreq_data {
> void __iomem *base;
> struct resource *res;
> + struct delayed_work lmh_dcvs_poll_work;
> const struct qcom_cpufreq_soc_data *soc_data;
> + cpumask_var_t cpus;
> + unsigned long throttled_freq;
> + int lmh_dcvs_irq;
> };
>
> static unsigned long cpu_hw_rate, xo_rate;
> @@ -251,10 +259,79 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
> }
> }
>
> +static inline unsigned long qcom_lmh_vote_to_freq(u32 val)
> +{
> + return (val & 0x3FF) * 19200;
> +}
> +
> +static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
> +{
> + struct cpufreq_policy policy;
> + struct dev_pm_opp *opp;
> + struct device *dev;
> + unsigned long max_capacity, capacity, freq_hz;
> + unsigned int val, freq;
> +
> + val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
> + freq = qcom_lmh_vote_to_freq(val);
> + freq_hz = freq * HZ_PER_KHZ;
> +
> + /* Do I need to calculate ceil and floor ? */

You don't know ?

> + dev = get_cpu_device(cpumask_first(data->cpus));
> + opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
> + if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
> + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
> +
> + data->throttled_freq = freq_hz / HZ_PER_KHZ;
> +

What exactly are we trying to do here ? A comment would be good as
well.

> + cpufreq_get_policy(&policy, cpumask_first(data->cpus));
> +
> + /* Update thermal pressure */
> + max_capacity = arch_scale_cpu_capacity(cpumask_first(data->cpus));

Set capacity of a single CPU from a policy ?

> + capacity = data->throttled_freq * max_capacity;
> + capacity /= policy.cpuinfo.max_freq;
> + /* Don't pass boost capacity to scheduler */
> + if (capacity > max_capacity)
> + capacity = max_capacity;
> + arch_set_thermal_pressure(data->cpus, max_capacity - capacity);

You should really be using policy->cpus instead of allocating
data->cpus..

> +}
> +
> +static void qcom_lmh_dcvs_poll(struct work_struct *work)
> +{
> + struct qcom_cpufreq_data *data;
> +
> + data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
> +
> + qcom_lmh_dcvs_notify(data);

You should really move the below stuff the disable_irq_nosync(), it
will make your life easier.

> + /**
> + * If h/w throttled frequency is higher than what cpufreq has requested for, stop
> + * polling and switch back to interrupt mechanism
> + */
> + if (data->throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(data->cpus)))
> + /* Clear the existing interrupts and enable it back */
> + enable_irq(data->lmh_dcvs_irq);
> + else
> + mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
> + msecs_to_jiffies(10));
> +}
> +
> +static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
> +{
> + struct qcom_cpufreq_data *c_data = data;
> +
> + /* Disable interrupt and enable polling */
> + disable_irq_nosync(c_data->lmh_dcvs_irq);
> + qcom_lmh_dcvs_notify(c_data);
> + mod_delayed_work(system_highpri_wq, &c_data->lmh_dcvs_poll_work, msecs_to_jiffies(10));
> +
> + return 0;
> +}
> +
> static const struct qcom_cpufreq_soc_data qcom_soc_data = {
> .reg_enable = 0x0,
> .reg_freq_lut = 0x110,
> .reg_volt_lut = 0x114,
> + .reg_current_vote = 0x704,

Should this be a different patch ?

> .reg_perf_state = 0x920,
> .lut_row_size = 32,
> };
> @@ -285,6 +362,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> void __iomem *base;
> struct qcom_cpufreq_data *data;
> int ret, index;
> + bool lmh_mitigation_enabled = false;

You just overwrite it below, no need to initialize it.

>
> cpu_dev = get_cpu_device(policy->cpu);
> if (!cpu_dev) {
> @@ -305,6 +383,8 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>
> index = args.args[0];
>
> + lmh_mitigation_enabled = of_property_read_bool(pdev->dev.of_node, "qcom,support-lmh");
> +
> res = platform_get_resource(pdev, IORESOURCE_MEM, index);
> if (!res) {
> dev_err(dev, "failed to get mem resource %d\n", index);
> @@ -329,6 +409,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> goto unmap_base;
> }
>
> + if (!alloc_cpumask_var(&data->cpus, GFP_KERNEL)) {
> + ret = -ENOMEM;
> + goto unmap_base;
> + }
> +
> data->soc_data = of_device_get_match_data(&pdev->dev);
> data->base = base;
> data->res = res;
> @@ -347,6 +432,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> goto error;
> }
>
> + cpumask_copy(data->cpus, policy->cpus);
> policy->driver_data = data;
>
> ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
> @@ -370,6 +456,20 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
> }
>
> + if (lmh_mitigation_enabled) {

Shouldn't you move the allocation and setting of data->cpus here ? I
suggest creating a separate routine for all initialization around this
stuff.

> + data->lmh_dcvs_irq = platform_get_irq(pdev, index);
> + if (data->lmh_dcvs_irq < 0) {
> + ret = data->lmh_dcvs_irq;
> + goto error;
> + }
> + ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
> + 0, "dcvsh-irq", data);

I would rather pass policy as data here.

> + if (ret) {
> + dev_err(dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
> + goto error;
> + }
> + INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
> + }
> return 0;
> error:
> kfree(data);

--
viresh

2021-06-14 20:55:39

by Bjorn Andersson

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:

> Driver enabling various pieces of Limits Management Hardware(LMh) for cpu
> cluster0 and cpu cluster1 namely kick starting monitoring of temperature,
> current, battery current violations, enabling reliability algorithm and
> setting up various temperature limits.
>
> The following has been explained in the cover letter. I am including this
> here so that this remains in the commit message as well.
>
> LMh is a hardware infrastructure on some Qualcomm SoCs that can enforce
> temperature and current limits as programmed by software for certain IPs
> like CPU. On many newer SoCs LMh is configured by firmware/TZ and no
> programming is needed from the kernel side. But on certain SoCs like sdm845
> the firmware does not do a complete programming of the h/w. On such SoCs
> kernel software has to explicitly set up the temperature limits and turn on
> various monitoring and enforcing algorithms on the hardware.
>
> Signed-off-by: Thara Gopinath <[email protected]>
> ---
> drivers/thermal/qcom/Kconfig | 10 ++
> drivers/thermal/qcom/Makefile | 1 +
> drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++++++++++
> 3 files changed, 255 insertions(+)
> create mode 100644 drivers/thermal/qcom/lmh.c
>
> diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
> index 8d5ac2df26dc..c95b95e254d7 100644
> --- a/drivers/thermal/qcom/Kconfig
> +++ b/drivers/thermal/qcom/Kconfig
> @@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
> trip points. The temperature reported by the thermal sensor reflects the
> real time die temperature if an ADC is present or an estimate of the
> temperature based upon the over temperature stage value.
> +
> +config QCOM_LMH
> + tristate "Qualcomm Limits Management Hardware"
> + depends on ARCH_QCOM
> + help
> + This enables initialization of Qualcomm limits management
> + hardware(LMh). LMh allows for h/w enforced mitigation for cpus based on
> + input from temperature and current sensors. On many newer Qualcomm SoCs
> + LMH is configure in the firmware and this feature need not be enabled.
> + However, on certain SoCs like sdm845 LMH has to be configured from HLOS.
> diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
> index 252ea7d9da0b..0fa2512042e7 100644
> --- a/drivers/thermal/qcom/Makefile
> +++ b/drivers/thermal/qcom/Makefile
> @@ -5,3 +5,4 @@ qcom_tsens-y += tsens.o tsens-v2.o tsens-v1.o tsens-v0_1.o \
> tsens-8960.o
> obj-$(CONFIG_QCOM_SPMI_ADC_TM5) += qcom-spmi-adc-tm5.o
> obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o
> +obj-$(CONFIG_QCOM_LMH) += lmh.o
> diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
> new file mode 100644
> index 000000000000..8741a36cb674
> --- /dev/null
> +++ b/drivers/thermal/qcom/lmh.c
> @@ -0,0 +1,244 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Copyright (C) 2021, Linaro Limited. All rights reserved.
> + */
> +#include <linux/module.h>
> +#include <linux/interrupt.h>
> +#include <linux/irqdomain.h>
> +#include <linux/err.h>
> +#include <linux/platform_device.h>
> +#include <linux/of_platform.h>
> +#include <linux/slab.h>
> +#include <linux/qcom_scm.h>
> +
> +#define LMH_NODE_DCVS 0x44435653
> +#define LMH_CLUSTER0_NODE_ID 0x6370302D
> +#define LMH_CLUSTER1_NODE_ID 0x6370312D
> +
> +#define LMH_SUB_FN_THERMAL 0x54484D4C
> +#define LMH_SUB_FN_CRNT 0x43524E54
> +#define LMH_SUB_FN_REL 0x52454C00
> +#define LMH_SUB_FN_BCL 0x42434C00
> +
> +#define LMH_ALGO_MODE_ENABLE 0x454E424C
> +#define LMH_TH_HI_THRESHOLD 0x48494748
> +#define LMH_TH_LOW_THRESHOLD 0x4C4F5700
> +#define LMH_TH_ARM_THRESHOLD 0x41524D00
> +
> +#define LMH_TH_HI_TEMP 95000
> +#define LMH_TH_LOW_TEMP 94500
> +#define LMH_TH_ARM_TEMP 65000
> +
> +#define LMH_REG_DCVS_INTR_CLR 0x8
> +
> +struct lmh_hw_data {
> + void __iomem *base;
> + struct irq_domain *domain;
> + int irq;
> + u32 payload[5];
> + u32 payload_size;
> + u32 cpu_id;
> +};
> +
> +static void update_payload(struct lmh_hw_data *lmh_data, u32 fn, u32 reg, u32 val)

Please pass fn, reg and val in the scm function call instead and stuff
the payload array in the scm driver instead.

> +{
> + lmh_data->payload[0] = fn;
> + lmh_data->payload[1] = 0;
> + lmh_data->payload[2] = reg;
> + lmh_data->payload[3] = 1;
> + lmh_data->payload[4] = val;
> +}
> +
> +static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
> +{
> + struct lmh_hw_data *lmh_data = data;
> + int irq = irq_find_mapping(lmh_data->domain, 0);
> +
> + /*
> + * Disable interrupt and call the cpufreq driver to handle the interrupt
> + * cpufreq will enable the interrupt once finished processing.
> + */
> + disable_irq_nosync(lmh_data->irq);

The contract between this driver's disabling of the IRQ and the
cpufreq-hw driver's enabling it when we're done polling does worry me.

In the case of EPSS, don't we disable the interrupt during the polling
there as well? If that's the case wouldn't it be better to implement
irq_chip->irq_disable and have the cpufreq-hw driver do the disable in
both cases?

> + if (irq)
> + generic_handle_irq(irq);
> +
> + return 0;
> +}
> +
> +static void lmh_enable_interrupt(struct irq_data *d)
> +{
> + struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
> +
> + /* Clear the existing interrupt */
> + writel_relaxed(0xFF, lmh_data->base + LMH_REG_DCVS_INTR_CLR);

Please avoid using _relaxed versions of writel, unless there's a strong
reason and please lowercase the hex digits.

> + enable_irq(lmh_data->irq);
> +}
> +
> +static struct irq_chip lmh_irq_chip = {
> + .name = "lmh",
> + .irq_enable = lmh_enable_interrupt,
> +};
> +
> +static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
> +{
> + struct lmh_hw_data *lmh_data = d->host_data;
> +
> + irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq);
> + irq_set_chip_data(irq, lmh_data);
> +
> + return 0;
> +}
> +
> +static const struct irq_domain_ops lmh_irq_ops = {
> + .map = lmh_irq_map,
> + .xlate = irq_domain_xlate_onecell,
> +};
> +
> +static int lmh_probe(struct platform_device *pdev)
> +{
> + struct device *dev;
> + struct device_node *np;
> + struct lmh_hw_data *lmh_data;
> + u32 node_id;
> + int ret;
> +
> + dev = &pdev->dev;
> + np = dev->of_node;
> + if (!np)
> + return -EINVAL;
> +
> + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
> + if (!lmh_data)
> + return -ENOMEM;
> +
> + lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
> + if (IS_ERR(lmh_data->base))
> + return PTR_ERR(lmh_data->base);
> +
> + ret = of_property_read_u32(np, "qcom,lmh-cpu-id", &lmh_data->cpu_id);
> + if (ret)
> + return -ENODEV;
> +
> + /*
> + * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
> + * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
> + * of a dt match table.
> + */
> + if (lmh_data->cpu_id == 0) {
> + node_id = LMH_CLUSTER0_NODE_ID;
> + } else if (lmh_data->cpu_id == 4) {
> + node_id = LMH_CLUSTER1_NODE_ID;
> + } else {
> + dev_err(dev, "Wrong cpu id associated with lmh node\n");
> + return -EINVAL;
> + }
> +
> + /* Payload size is five bytes for now */
> + lmh_data->payload_size = 5 * sizeof(u32);
> +
> + platform_set_drvdata(pdev, lmh_data);
> +
> + if (!qcom_scm_lmh_dcvsh_available())
> + return -EINVAL;
> +
> + /* Enable Thermal Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable Current Sensing Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling current subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable Reliability Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
> + return ret;
> + }
> +
> + /* Enable BCL Algorithm */
> + update_payload(lmh_data, LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
> + return ret;
> + }
> +
> + ret = qcom_scm_lmh_profile_change(0x1);
> + if (ret) {
> + dev_err(dev, "Error %d changing profile\n", ret);
> + return ret;
> + }
> +
> + /* Set default thermal trips */
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, LMH_TH_ARM_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
> + return ret;
> + }
> +
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, LMH_TH_HI_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal HI thershold%d\n", ret);
> + return ret;
> + }
> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, LMH_TH_LOW_TEMP);
> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
> + LMH_NODE_DCVS, node_id, 0);
> + if (ret) {
> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
> + return ret;
> + }
> +
> + lmh_data->irq = platform_get_irq(pdev, 0);
> + lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
> + if (!lmh_data->domain) {
> + dev_err(dev, "Error adding irq_domain\n");
> + return -EINVAL;
> + }
> +
> + ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
> + IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_SUSPEND,
> + "lmh-irq", lmh_data);
> + if (ret) {
> + dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
> + irq_domain_remove(lmh_data->domain);
> + return ret;
> + }
> + return 0;
> +}
> +
> +static const struct of_device_id lmh_table[] = {
> + { .compatible = "qcom,msm-hw-limits", },

Don't we need platform specific compatibles? Perhaps
"qcom,<platform>-lmh"?

If we're going with a generic compatible I think that should be done in
addition to a platform-specific one (and qcom,lmh should be sufficient?)

> + {},

Please omit the comma here.

> +};

Driver is tristate, so you need a MODULE_DEVICE_TABLE(of, lmh_table);
here, to make sure the module is loaded automatically based on the
compatible strings in DT.

Regards,
Bjorn

> +
> +static struct platform_driver lmh_driver = {
> + .probe = lmh_probe,
> + .driver = {
> + .name = "qcom-lmh",
> + .of_match_table = lmh_table,
> + },
> +};
> +module_platform_driver(lmh_driver);
> +
> +MODULE_LICENSE("GPL v2");
> +MODULE_DESCRIPTION("QCOM LMH driver");
> --
> 2.25.1
>

2021-06-15 02:29:23

by Thara Gopinath

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

On 6/14/21 6:31 AM, Viresh Kumar wrote:
> On 08-06-21, 18:29, Thara Gopinath wrote:
>> Add interrupt support to notify the kernel of h/w initiated frequency
>> throttling by LMh. Convey this to scheduler via thermal presssure
>> interface.
>>
>> Signed-off-by: Thara Gopinath <[email protected]>
>> ---
>> drivers/cpufreq/qcom-cpufreq-hw.c | 100 ++++++++++++++++++++++++++++++
>> 1 file changed, 100 insertions(+)
>>
>> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
>> index f86859bf76f1..95e17330aa9d 100644
>> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
>> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
>> @@ -13,6 +13,7 @@
>> #include <linux/of_platform.h>
>> #include <linux/pm_opp.h>
>> #include <linux/slab.h>
>> +#include <linux/interrupt.h>
>>
>> #define LUT_MAX_ENTRIES 40U
>> #define LUT_SRC GENMASK(31, 30)
>> @@ -22,10 +23,13 @@
>> #define CLK_HW_DIV 2
>> #define LUT_TURBO_IND 1
>>
>> +#define HZ_PER_KHZ 1000
>> +
>> struct qcom_cpufreq_soc_data {
>> u32 reg_enable;
>> u32 reg_freq_lut;
>> u32 reg_volt_lut;
>> + u32 reg_current_vote;
>> u32 reg_perf_state;
>> u8 lut_row_size;
>> };
>> @@ -33,7 +37,11 @@ struct qcom_cpufreq_soc_data {
>> struct qcom_cpufreq_data {
>> void __iomem *base;
>> struct resource *res;
>> + struct delayed_work lmh_dcvs_poll_work;
>> const struct qcom_cpufreq_soc_data *soc_data;
>> + cpumask_var_t cpus;
>> + unsigned long throttled_freq;
>> + int lmh_dcvs_irq;
>> };
>>
>> static unsigned long cpu_hw_rate, xo_rate;
>> @@ -251,10 +259,79 @@ static void qcom_get_related_cpus(int index, struct cpumask *m)
>> }
>> }
>>
>> +static inline unsigned long qcom_lmh_vote_to_freq(u32 val)
>> +{
>> + return (val & 0x3FF) * 19200;
>> +}
>> +
>> +static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
>> +{
>> + struct cpufreq_policy policy;
>> + struct dev_pm_opp *opp;
>> + struct device *dev;
>> + unsigned long max_capacity, capacity, freq_hz;
>> + unsigned int val, freq;
>> +
>> + val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
>> + freq = qcom_lmh_vote_to_freq(val);
>> + freq_hz = freq * HZ_PER_KHZ;
>> +
>> + /* Do I need to calculate ceil and floor ? */
>
> You don't know ?

stray comment! Will remove it.

>
>> + dev = get_cpu_device(cpumask_first(data->cpus));
>> + opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
>> + if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
>> + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
>> +
>> + data->throttled_freq = freq_hz / HZ_PER_KHZ;
>> +
>
> What exactly are we trying to do here ? A comment would be good as
> well.

You want me to put a comment saying converting frequency in hz to khz ?

>
>> + cpufreq_get_policy(&policy, cpumask_first(data->cpus));
>> +
>> + /* Update thermal pressure */
>> + max_capacity = arch_scale_cpu_capacity(cpumask_first(data->cpus));
>
> Set capacity of a single CPU from a policy ?

Get maximum capacity of a cpu.

>
>> + capacity = data->throttled_freq * max_capacity;
>> + capacity /= policy.cpuinfo.max_freq;
>> + /* Don't pass boost capacity to scheduler */
>> + if (capacity > max_capacity)
>> + capacity = max_capacity;
>> + arch_set_thermal_pressure(data->cpus, max_capacity - capacity);
>
> You should really be using policy->cpus instead of allocating
> data->cpus..

Yes I should be. But I still need data->cpus to get the policy.

>
>> +}
>> +
>> +static void qcom_lmh_dcvs_poll(struct work_struct *work)
>> +{
>> + struct qcom_cpufreq_data *data;
>> +
>> + data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
>> +
>> + qcom_lmh_dcvs_notify(data);
>
> You should really move the below stuff the disable_irq_nosync(), it
> will make your life easier.

I don't understand your comment here. I want to disable irq. call
notify. Start polling. And in polling I want to call notify and if the
thermal event has passed stop polling else continue polling.
>
>> + /**
>> + * If h/w throttled frequency is higher than what cpufreq has requested for, stop
>> + * polling and switch back to interrupt mechanism
>> + */
>> + if (data->throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(data->cpus)))
>> + /* Clear the existing interrupts and enable it back */
>> + enable_irq(data->lmh_dcvs_irq);
>> + else
>> + mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
>> + msecs_to_jiffies(10));
>> +}
>> +
>> +static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
>> +{
>> + struct qcom_cpufreq_data *c_data = data;
>> +
>> + /* Disable interrupt and enable polling */
>> + disable_irq_nosync(c_data->lmh_dcvs_irq);
>> + qcom_lmh_dcvs_notify(c_data);
>> + mod_delayed_work(system_highpri_wq, &c_data->lmh_dcvs_poll_work, msecs_to_jiffies(10));
>> +
>> + return 0;
>> +}
>> +
>> static const struct qcom_cpufreq_soc_data qcom_soc_data = {
>> .reg_enable = 0x0,
>> .reg_freq_lut = 0x110,
>> .reg_volt_lut = 0x114,
>> + .reg_current_vote = 0x704,
>
> Should this be a different patch ?

Why ? This is the register to read the throttled frequency.

>
>> .reg_perf_state = 0x920,
>> .lut_row_size = 32,
>> };
>> @@ -285,6 +362,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> void __iomem *base;
>> struct qcom_cpufreq_data *data;
>> int ret, index;
>> + bool lmh_mitigation_enabled = false;
>
> You just overwrite it below, no need to initialize it.

Sure.

>
>>
>> cpu_dev = get_cpu_device(policy->cpu);
>> if (!cpu_dev) {
>> @@ -305,6 +383,8 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>>
>> index = args.args[0];
>>
>> + lmh_mitigation_enabled = of_property_read_bool(pdev->dev.of_node, "qcom,support-lmh");
>> +
>> res = platform_get_resource(pdev, IORESOURCE_MEM, index);
>> if (!res) {
>> dev_err(dev, "failed to get mem resource %d\n", index);
>> @@ -329,6 +409,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> goto unmap_base;
>> }
>>
>> + if (!alloc_cpumask_var(&data->cpus, GFP_KERNEL)) {
>> + ret = -ENOMEM;
>> + goto unmap_base;
>> + }
>> +
>> data->soc_data = of_device_get_match_data(&pdev->dev);
>> data->base = base;
>> data->res = res;
>> @@ -347,6 +432,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> goto error;
>> }
>>
>> + cpumask_copy(data->cpus, policy->cpus);
>> policy->driver_data = data;
>>
>> ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
>> @@ -370,6 +456,20 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
>> }
>>
>> + if (lmh_mitigation_enabled) {
>
> Shouldn't you move the allocation and setting of data->cpus here ? I
> suggest creating a separate routine for all initialization around this
> stuff.

I should considering nothing else is using data->cpus. Yes I will create
a separate init function.

>
>> + data->lmh_dcvs_irq = platform_get_irq(pdev, index);
>> + if (data->lmh_dcvs_irq < 0) {
>> + ret = data->lmh_dcvs_irq;
>> + goto error;
>> + }
>> + ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
>> + 0, "dcvsh-irq", data);
>
> I would rather pass policy as data here.

So policy for a cpu can change runtime, right ?

>
>> + if (ret) {
>> + dev_err(dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
>> + goto error;
>> + }
>> + INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
>> + }
>> return 0;
>> error:
>> kfree(data);
>

--
Warm Regards
Thara (She/Her/Hers)

2021-06-15 02:44:16

by Thara Gopinath

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

Hi Bjorn,

Thanks for the review

On 6/14/21 4:53 PM, Bjorn Andersson wrote:
> On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:
>
>> Driver enabling various pieces of Limits Management Hardware(LMh) for cpu
>> cluster0 and cpu cluster1 namely kick starting monitoring of temperature,
>> current, battery current violations, enabling reliability algorithm and
>> setting up various temperature limits.
>>
>> The following has been explained in the cover letter. I am including this
>> here so that this remains in the commit message as well.
>>
>> LMh is a hardware infrastructure on some Qualcomm SoCs that can enforce
>> temperature and current limits as programmed by software for certain IPs
>> like CPU. On many newer SoCs LMh is configured by firmware/TZ and no
>> programming is needed from the kernel side. But on certain SoCs like sdm845
>> the firmware does not do a complete programming of the h/w. On such SoCs
>> kernel software has to explicitly set up the temperature limits and turn on
>> various monitoring and enforcing algorithms on the hardware.
>>
>> Signed-off-by: Thara Gopinath <[email protected]>
>> ---
>> drivers/thermal/qcom/Kconfig | 10 ++
>> drivers/thermal/qcom/Makefile | 1 +
>> drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++++++++++
>> 3 files changed, 255 insertions(+)
>> create mode 100644 drivers/thermal/qcom/lmh.c
>>
>> diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
>> index 8d5ac2df26dc..c95b95e254d7 100644
>> --- a/drivers/thermal/qcom/Kconfig
>> +++ b/drivers/thermal/qcom/Kconfig
>> @@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
>> trip points. The temperature reported by the thermal sensor reflects the
>> real time die temperature if an ADC is present or an estimate of the
>> temperature based upon the over temperature stage value.
>> +
>> +config QCOM_LMH
>> + tristate "Qualcomm Limits Management Hardware"
>> + depends on ARCH_QCOM
>> + help
>> + This enables initialization of Qualcomm limits management
>> + hardware(LMh). LMh allows for h/w enforced mitigation for cpus based on
>> + input from temperature and current sensors. On many newer Qualcomm SoCs
>> + LMH is configure in the firmware and this feature need not be enabled.
>> + However, on certain SoCs like sdm845 LMH has to be configured from HLOS.
>> diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
>> index 252ea7d9da0b..0fa2512042e7 100644
>> --- a/drivers/thermal/qcom/Makefile
>> +++ b/drivers/thermal/qcom/Makefile
>> @@ -5,3 +5,4 @@ qcom_tsens-y += tsens.o tsens-v2.o tsens-v1.o tsens-v0_1.o \
>> tsens-8960.o
>> obj-$(CONFIG_QCOM_SPMI_ADC_TM5) += qcom-spmi-adc-tm5.o
>> obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o
>> +obj-$(CONFIG_QCOM_LMH) += lmh.o
>> diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
>> new file mode 100644
>> index 000000000000..8741a36cb674
>> --- /dev/null
>> +++ b/drivers/thermal/qcom/lmh.c
>> @@ -0,0 +1,244 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +
>> +/*
>> + * Copyright (C) 2021, Linaro Limited. All rights reserved.
>> + */
>> +#include <linux/module.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/irqdomain.h>
>> +#include <linux/err.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/of_platform.h>
>> +#include <linux/slab.h>
>> +#include <linux/qcom_scm.h>
>> +
>> +#define LMH_NODE_DCVS 0x44435653
>> +#define LMH_CLUSTER0_NODE_ID 0x6370302D
>> +#define LMH_CLUSTER1_NODE_ID 0x6370312D
>> +
>> +#define LMH_SUB_FN_THERMAL 0x54484D4C
>> +#define LMH_SUB_FN_CRNT 0x43524E54
>> +#define LMH_SUB_FN_REL 0x52454C00
>> +#define LMH_SUB_FN_BCL 0x42434C00
>> +
>> +#define LMH_ALGO_MODE_ENABLE 0x454E424C
>> +#define LMH_TH_HI_THRESHOLD 0x48494748
>> +#define LMH_TH_LOW_THRESHOLD 0x4C4F5700
>> +#define LMH_TH_ARM_THRESHOLD 0x41524D00
>> +
>> +#define LMH_TH_HI_TEMP 95000
>> +#define LMH_TH_LOW_TEMP 94500
>> +#define LMH_TH_ARM_TEMP 65000
>> +
>> +#define LMH_REG_DCVS_INTR_CLR 0x8
>> +
>> +struct lmh_hw_data {
>> + void __iomem *base;
>> + struct irq_domain *domain;
>> + int irq;
>> + u32 payload[5];
>> + u32 payload_size;
>> + u32 cpu_id;
>> +};
>> +
>> +static void update_payload(struct lmh_hw_data *lmh_data, u32 fn, u32 reg, u32 val)
>
> Please pass fn, reg and val in the scm function call instead and stuff
> the payload array in the scm driver instead.

Sure . I will redo this part.

>
>> +{
>> + lmh_data->payload[0] = fn;
>> + lmh_data->payload[1] = 0;
>> + lmh_data->payload[2] = reg;
>> + lmh_data->payload[3] = 1;
>> + lmh_data->payload[4] = val;
>> +}
>> +
>> +static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
>> +{
>> + struct lmh_hw_data *lmh_data = data;
>> + int irq = irq_find_mapping(lmh_data->domain, 0);
>> +
>> + /*
>> + * Disable interrupt and call the cpufreq driver to handle the interrupt
>> + * cpufreq will enable the interrupt once finished processing.
>> + */
>> + disable_irq_nosync(lmh_data->irq);
>
> The contract between this driver's disabling of the IRQ and the
> cpufreq-hw driver's enabling it when we're done polling does worry me.
>
> In the case of EPSS, don't we disable the interrupt during the polling
> there as well? If that's the case wouldn't it be better to implement
> irq_chip->irq_disable and have the cpufreq-hw driver do the disable in
> both cases?

Yes. You are right. In case of EPSS, the cpufreq-hw will have to disable
the interrupt. I did think of the approach you suggested here. My only
issue is that we will dispatch the interrupt to cpufreq-hw without it
disabling it and hence the interrupt could fire again, right ?

>
>> + if (irq)
>> + generic_handle_irq(irq);
>> +
>> + return 0;
>> +}
>> +
>> +static void lmh_enable_interrupt(struct irq_data *d)
>> +{
>> + struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
>> +
>> + /* Clear the existing interrupt */
>> + writel_relaxed(0xFF, lmh_data->base + LMH_REG_DCVS_INTR_CLR);
>
> Please avoid using _relaxed versions of writel, unless there's a strong
> reason and please lowercase the hex digits.

Sure.

>
>> + enable_irq(lmh_data->irq);
>> +}
>> +
>> +static struct irq_chip lmh_irq_chip = {
>> + .name = "lmh",
>> + .irq_enable = lmh_enable_interrupt,
>> +};
>> +
>> +static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
>> +{
>> + struct lmh_hw_data *lmh_data = d->host_data;
>> +
>> + irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq);
>> + irq_set_chip_data(irq, lmh_data);
>> +
>> + return 0;
>> +}
>> +
>> +static const struct irq_domain_ops lmh_irq_ops = {
>> + .map = lmh_irq_map,
>> + .xlate = irq_domain_xlate_onecell,
>> +};
>> +
>> +static int lmh_probe(struct platform_device *pdev)
>> +{
>> + struct device *dev;
>> + struct device_node *np;
>> + struct lmh_hw_data *lmh_data;
>> + u32 node_id;
>> + int ret;
>> +
>> + dev = &pdev->dev;
>> + np = dev->of_node;
>> + if (!np)
>> + return -EINVAL;
>> +
>> + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
>> + if (!lmh_data)
>> + return -ENOMEM;
>> +
>> + lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
>> + if (IS_ERR(lmh_data->base))
>> + return PTR_ERR(lmh_data->base);
>> +
>> + ret = of_property_read_u32(np, "qcom,lmh-cpu-id", &lmh_data->cpu_id);
>> + if (ret)
>> + return -ENODEV;
>> +
>> + /*
>> + * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
>> + * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
>> + * of a dt match table.
>> + */
>> + if (lmh_data->cpu_id == 0) {
>> + node_id = LMH_CLUSTER0_NODE_ID;
>> + } else if (lmh_data->cpu_id == 4) {
>> + node_id = LMH_CLUSTER1_NODE_ID;
>> + } else {
>> + dev_err(dev, "Wrong cpu id associated with lmh node\n");
>> + return -EINVAL;
>> + }
>> +
>> + /* Payload size is five bytes for now */
>> + lmh_data->payload_size = 5 * sizeof(u32);
>> +
>> + platform_set_drvdata(pdev, lmh_data);
>> +
>> + if (!qcom_scm_lmh_dcvsh_available())
>> + return -EINVAL;
>> +
>> + /* Enable Thermal Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable Current Sensing Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling current subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable Reliability Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable BCL Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + ret = qcom_scm_lmh_profile_change(0x1);
>> + if (ret) {
>> + dev_err(dev, "Error %d changing profile\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Set default thermal trips */
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, LMH_TH_ARM_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
>> + return ret;
>> + }
>> +
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, LMH_TH_HI_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal HI thershold%d\n", ret);
>> + return ret;
>> + }
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, LMH_TH_LOW_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
>> + return ret;
>> + }
>> +
>> + lmh_data->irq = platform_get_irq(pdev, 0);
>> + lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
>> + if (!lmh_data->domain) {
>> + dev_err(dev, "Error adding irq_domain\n");
>> + return -EINVAL;
>> + }
>> +
>> + ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
>> + IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_SUSPEND,
>> + "lmh-irq", lmh_data);
>> + if (ret) {
>> + dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
>> + irq_domain_remove(lmh_data->domain);
>> + return ret;
>> + }
>> + return 0;
>> +}
>> +
>> +static const struct of_device_id lmh_table[] = {
>> + { .compatible = "qcom,msm-hw-limits", },
>
> Don't we need platform specific compatibles? Perhaps
> "qcom,<platform>-lmh"?

Yes considering that is the norm, I will follow it.

>
> If we're going with a generic compatible I think that should be done in
> addition to a platform-specific one (and qcom,lmh should be sufficien >
>> + {},
>
> Please omit the comma here.

Ok.

>
>> +};
>
> Driver is tristate, so you need a MODULE_DEVICE_TABLE(of, lmh_table);
> here, to make sure the module is loaded automatically based on the
> compatible strings in DT.

Right. I will fix it .
>
> Regards,
> Bjorn
>
>> +
>> +static struct platform_driver lmh_driver = {
>> + .probe = lmh_probe,
>> + .driver = {
>> + .name = "qcom-lmh",
>> + .of_match_table = lmh_table,
>> + },
>> +};
>> +module_platform_driver(lmh_driver);
>> +
>> +MODULE_LICENSE("GPL v2");
>> +MODULE_DESCRIPTION("QCOM LMH driver");
>> --
>> 2.25.1
>>

--
Warm Regards
Thara (She/Her/Hers)

2021-06-15 02:44:30

by Thara Gopinath

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

Hi Randy,

Thanks for the review. I somehow did not see your review earlier. I
noticed it today morning when Bjorn replied to this patch. Apologies for
the delay

On 6/8/21 10:25 PM, Randy Dunlap wrote:
> On 6/8/21 3:29 PM, Thara Gopinath wrote:
>> Driver enabling various pieces of Limits Management Hardware(LMh) for cpu
>> cluster0 and cpu cluster1 namely kick starting monitoring of temperature,
>> current, battery current violations, enabling reliability algorithm and
>> setting up various temperature limits.
>>
>> The following has been explained in the cover letter. I am including this
>> here so that this remains in the commit message as well.
>>
>> LMh is a hardware infrastructure on some Qualcomm SoCs that can enforce
>> temperature and current limits as programmed by software for certain IPs
>> like CPU. On many newer SoCs LMh is configured by firmware/TZ and no
>> programming is needed from the kernel side. But on certain SoCs like sdm845
>> the firmware does not do a complete programming of the h/w. On such SoCs
>> kernel software has to explicitly set up the temperature limits and turn on
>> various monitoring and enforcing algorithms on the hardware.
>>
>> Signed-off-by: Thara Gopinath <[email protected]>
>> ---
>> drivers/thermal/qcom/Kconfig | 10 ++
>> drivers/thermal/qcom/Makefile | 1 +
>> drivers/thermal/qcom/lmh.c | 244 ++++++++++++++++++++++++++++++++++
>> 3 files changed, 255 insertions(+)
>> create mode 100644 drivers/thermal/qcom/lmh.c
>>
>> diff --git a/drivers/thermal/qcom/Kconfig b/drivers/thermal/qcom/Kconfig
>> index 8d5ac2df26dc..c95b95e254d7 100644
>> --- a/drivers/thermal/qcom/Kconfig
>> +++ b/drivers/thermal/qcom/Kconfig
>> @@ -31,3 +31,13 @@ config QCOM_SPMI_TEMP_ALARM
>> trip points. The temperature reported by the thermal sensor reflects the
>> real time die temperature if an ADC is present or an estimate of the
>> temperature based upon the over temperature stage value.
>> +
>> +config QCOM_LMH
>> + tristate "Qualcomm Limits Management Hardware"
>> + depends on ARCH_QCOM
>> + help
>> + This enables initialization of Qualcomm limits management
>> + hardware(LMh). LMh allows for h/w enforced mitigation for cpus based on
>
> hardware-enforced CPUs
>
>> + input from temperature and current sensors. On many newer Qualcomm SoCs
>> + LMH is configure in the firmware and this feature need not be enabled.
>
> LMh
>
>> + However, on certain SoCs like sdm845 LMH has to be configured from HLOS.
>
> LMh
>
> What is HLOS?

High Level Operating System. But I will change it to Linux kernel.
>
>
>> diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
>> new file mode 100644
>> index 000000000000..8741a36cb674
>> --- /dev/null
>> +++ b/drivers/thermal/qcom/lmh.c
>> @@ -0,0 +1,244 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +
>> +/*
>> + * Copyright (C) 2021, Linaro Limited. All rights reserved.
>> + */
>
> [snip]
>
>> +static int lmh_probe(struct platform_device *pdev)
>> +{
>> + struct device *dev;
>> + struct device_node *np;
>> + struct lmh_hw_data *lmh_data;
>> + u32 node_id;
>> + int ret;
>> +
>> + dev = &pdev->dev;
>> + np = dev->of_node;
>> + if (!np)
>> + return -EINVAL;
>> +
>> + lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
>> + if (!lmh_data)
>> + return -ENOMEM;
>> +
>> + lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
>> + if (IS_ERR(lmh_data->base))
>> + return PTR_ERR(lmh_data->base);
>> +
>> + ret = of_property_read_u32(np, "qcom,lmh-cpu-id", &lmh_data->cpu_id);
>> + if (ret)
>> + return -ENODEV;
>> +
>> + /*
>> + * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
>> + * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
>> + * of a dt match table.
>> + */
>> + if (lmh_data->cpu_id == 0) {
>> + node_id = LMH_CLUSTER0_NODE_ID;
>> + } else if (lmh_data->cpu_id == 4) {
>> + node_id = LMH_CLUSTER1_NODE_ID;
>> + } else {
>> + dev_err(dev, "Wrong cpu id associated with lmh node\n");
>
> CPU LMh

will fix it. Also will fix all the typos you have caught below.

>
>> + return -EINVAL;
>> + }
>> +
>> + /* Payload size is five bytes for now */
>> + lmh_data->payload_size = 5 * sizeof(u32);
>> +
>> + platform_set_drvdata(pdev, lmh_data);
>> +
>> + if (!qcom_scm_lmh_dcvsh_available())
>> + return -EINVAL;
>> +
>> + /* Enable Thermal Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable Current Sensing Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling current subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable Reliability Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Enable BCL Algorithm */
>> + update_payload(lmh_data, LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
>
> What is BCL?

Battery Current Limits

--
Warm Regards
Thara
>
>> + return ret;
>> + }
>> +
>> + ret = qcom_scm_lmh_profile_change(0x1);
>> + if (ret) {
>> + dev_err(dev, "Error %d changing profile\n", ret);
>> + return ret;
>> + }
>> +
>> + /* Set default thermal trips */
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, LMH_TH_ARM_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
>
> threshold
>
>> + return ret;
>> + }
>> +
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, LMH_TH_HI_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal HI thershold%d\n", ret);
>
> threshold
>
>> + return ret;
>> + }
>> + update_payload(lmh_data, LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, LMH_TH_LOW_TEMP);
>> + ret = qcom_scm_lmh_dcvsh(lmh_data->payload, lmh_data->payload_size,
>> + LMH_NODE_DCVS, node_id, 0);
>> + if (ret) {
>> + dev_err(dev, "Error setting thermal ARM thershold%d\n", ret);
>
> threshold
>
>> + return ret;
>> + }
>> +
>> + lmh_data->irq = platform_get_irq(pdev, 0);
>> + lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
>> + if (!lmh_data->domain) {
>> + dev_err(dev, "Error adding irq_domain\n");
>> + return -EINVAL;
>> + }
>> +
>> + ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
>> + IRQF_TRIGGER_HIGH | IRQF_ONESHOT | IRQF_NO_SUSPEND,
>> + "lmh-irq", lmh_data);
>> + if (ret) {
>> + dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
>> + irq_domain_remove(lmh_data->domain);
>> + return ret;
>> + }
>> + return 0;
>> +}
>> +
>> +static const struct of_device_id lmh_table[] = {
>> + { .compatible = "qcom,msm-hw-limits", },
>> + {},
>> +};
>> +
>> +static struct platform_driver lmh_driver = {
>> + .probe = lmh_probe,
>> + .driver = {
>> + .name = "qcom-lmh",
>> + .of_match_table = lmh_table,
>> + },
>> +};
>> +module_platform_driver(lmh_driver);
>> +
>> +MODULE_LICENSE("GPL v2");
>> +MODULE_DESCRIPTION("QCOM LMH driver");
>
> LMh
>
>
> thanks.
>

2021-06-15 05:20:50

by Viresh Kumar

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

On 14-06-21, 21:58, Thara Gopinath wrote:
> On 6/14/21 6:31 AM, Viresh Kumar wrote:
> > On 08-06-21, 18:29, Thara Gopinath wrote:
> > > +static void qcom_lmh_dcvs_notify(struct qcom_cpufreq_data *data)
> > > +{
> > > + struct cpufreq_policy policy;
> > > + struct dev_pm_opp *opp;
> > > + struct device *dev;
> > > + unsigned long max_capacity, capacity, freq_hz;
> > > + unsigned int val, freq;
> > > +
> > > + val = readl_relaxed(data->base + data->soc_data->reg_current_vote);
> > > + freq = qcom_lmh_vote_to_freq(val);
> > > + freq_hz = freq * HZ_PER_KHZ;
> > > +
> > > + /* Do I need to calculate ceil and floor ? */
> >
> > You don't know ?
>
> stray comment! Will remove it.
>
> >
> > > + dev = get_cpu_device(cpumask_first(data->cpus));
> > > + opp = dev_pm_opp_find_freq_floor(dev, &freq_hz);
> > > + if (IS_ERR(opp) && PTR_ERR(opp) == -ERANGE)
> > > + opp = dev_pm_opp_find_freq_ceil(dev, &freq_hz);
> > > +
> > > + data->throttled_freq = freq_hz / HZ_PER_KHZ;
> > > +
> >
> > What exactly are we trying to do here ? A comment would be good as
> > well.
>
> You want me to put a comment saying converting frequency in hz to khz ?

Not that, but for the entire routine. What exactly are we looking to
do here and why?

> >
> > > + cpufreq_get_policy(&policy, cpumask_first(data->cpus));
> > > +
> > > + /* Update thermal pressure */
> > > + max_capacity = arch_scale_cpu_capacity(cpumask_first(data->cpus));
> >
> > Set capacity of a single CPU from a policy ?
>
> Get maximum capacity of a cpu.

Ahh, I thought you are setting it :(

> >
> > > + capacity = data->throttled_freq * max_capacity;
> > > + capacity /= policy.cpuinfo.max_freq;
> > > + /* Don't pass boost capacity to scheduler */
> > > + if (capacity > max_capacity)
> > > + capacity = max_capacity;
> > > + arch_set_thermal_pressure(data->cpus, max_capacity - capacity);
> >
> > You should really be using policy->cpus instead of allocating
> > data->cpus..
>
> Yes I should be. But I still need data->cpus to get the policy.

From the comment which comes later on, you shall get the policy here
anyway.

> > > +}
> > > +
> > > +static void qcom_lmh_dcvs_poll(struct work_struct *work)
> > > +{
> > > + struct qcom_cpufreq_data *data;
> > > +
> > > + data = container_of(work, struct qcom_cpufreq_data, lmh_dcvs_poll_work.work);
> > > +
> > > + qcom_lmh_dcvs_notify(data);
> >
> > You should really move the below stuff the disable_irq_nosync(), it
> > will make your life easier.

Damn, s/disable_irq_nosync/qcom_lmh_dcvs_notify/

> I don't understand your comment here. I want to disable irq. call notify.
> Start polling. And in polling I want to call notify and if the thermal event
> has passed stop polling else continue polling.

Yeah, I messed up in the comment. I was asking to move the enable-irq
and mod_delayed_work to qcom_lmh_dcvs_notify() itself.

> > > + /**
> > > + * If h/w throttled frequency is higher than what cpufreq has requested for, stop
> > > + * polling and switch back to interrupt mechanism
> > > + */
> > > + if (data->throttled_freq >= qcom_cpufreq_hw_get(cpumask_first(data->cpus)))
> > > + /* Clear the existing interrupts and enable it back */
> > > + enable_irq(data->lmh_dcvs_irq);
> > > + else
> > > + mod_delayed_work(system_highpri_wq, &data->lmh_dcvs_poll_work,
> > > + msecs_to_jiffies(10));
> > > +}
> > > +
> > > +static irqreturn_t qcom_lmh_dcvs_handle_irq(int irq, void *data)
> > > +{
> > > + struct qcom_cpufreq_data *c_data = data;
> > > +
> > > + /* Disable interrupt and enable polling */
> > > + disable_irq_nosync(c_data->lmh_dcvs_irq);
> > > + qcom_lmh_dcvs_notify(c_data);
> > > + mod_delayed_work(system_highpri_wq, &c_data->lmh_dcvs_poll_work, msecs_to_jiffies(10));
> > > +
> > > + return 0;
> > > +}
> > > +
> > > static const struct qcom_cpufreq_soc_data qcom_soc_data = {
> > > .reg_enable = 0x0,
> > > .reg_freq_lut = 0x110,
> > > .reg_volt_lut = 0x114,
> > > + .reg_current_vote = 0x704,
> >
> > Should this be a different patch ?
>
> Why ? This is the register to read the throttled frequency.

Okay, it looked this is separate. Leave it.

> > > + ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
> > > + 0, "dcvsh-irq", data);
> >
> > I would rather pass policy as data here.
>
> So policy for a cpu can change runtime, right ?

No, it is allocated just once.

--
viresh

2021-06-18 19:36:30

by Bjorn Andersson

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:
> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
[..]
> @@ -305,6 +383,8 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>
> index = args.args[0];
>
> + lmh_mitigation_enabled = of_property_read_bool(pdev->dev.of_node, "qcom,support-lmh");

Rather than adding a new interrupt _and_ a flag to tell the driver that
this new interrupt should be used, wouldn't it be sufficient to just see
if the interrupt is specified?

> +
> res = platform_get_resource(pdev, IORESOURCE_MEM, index);
> if (!res) {
> dev_err(dev, "failed to get mem resource %d\n", index);
> @@ -329,6 +409,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> goto unmap_base;
> }
>
> + if (!alloc_cpumask_var(&data->cpus, GFP_KERNEL)) {
> + ret = -ENOMEM;
> + goto unmap_base;
> + }
> +
> data->soc_data = of_device_get_match_data(&pdev->dev);
> data->base = base;
> data->res = res;
> @@ -347,6 +432,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> goto error;
> }
>
> + cpumask_copy(data->cpus, policy->cpus);
> policy->driver_data = data;
>
> ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
> @@ -370,6 +456,20 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
> dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
> }
>
> + if (lmh_mitigation_enabled) {
> + data->lmh_dcvs_irq = platform_get_irq(pdev, index);
> + if (data->lmh_dcvs_irq < 0) {

This will be -ENXIO if the interrupt isn't specified and <0 for other
errors, so you should be able to distinguish the two failure cases.

Regards,
Bjorn

> + ret = data->lmh_dcvs_irq;
> + goto error;
> + }
> + ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
> + 0, "dcvsh-irq", data);
> + if (ret) {
> + dev_err(dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
> + goto error;
> + }
> + INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
> + }
> return 0;
> error:
> kfree(data);
> --
> 2.25.1
>

2021-06-18 23:00:29

by Bjorn Andersson

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

On Mon 14 Jun 20:38 CDT 2021, Thara Gopinath wrote:
> On 6/14/21 4:53 PM, Bjorn Andersson wrote:
> > On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:
> > > diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
[..]
> > > +static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
> > > +{
> > > + struct lmh_hw_data *lmh_data = data;
> > > + int irq = irq_find_mapping(lmh_data->domain, 0);
> > > +
> > > + /*
> > > + * Disable interrupt and call the cpufreq driver to handle the interrupt
> > > + * cpufreq will enable the interrupt once finished processing.
> > > + */
> > > + disable_irq_nosync(lmh_data->irq);
> >
> > The contract between this driver's disabling of the IRQ and the
> > cpufreq-hw driver's enabling it when we're done polling does worry me.
> >
> > In the case of EPSS, don't we disable the interrupt during the polling
> > there as well? If that's the case wouldn't it be better to implement
> > irq_chip->irq_disable and have the cpufreq-hw driver do the disable in
> > both cases?
>
> Yes. You are right. In case of EPSS, the cpufreq-hw will have to disable the
> interrupt. I did think of the approach you suggested here. My only issue is
> that we will dispatch the interrupt to cpufreq-hw without it disabling it
> and hence the interrupt could fire again, right ?
>

Does it fire again before you INTR_CLK it?

Regards,
Bjorn

2021-06-19 07:05:12

by Thara Gopinath

[permalink] [raw]

Subject: Re: [PATCH 2/5] thermal: qcom: Add support for LMh driver

On 6/18/21 1:54 PM, Bjorn Andersson wrote:
> On Mon 14 Jun 20:38 CDT 2021, Thara Gopinath wrote:
>> On 6/14/21 4:53 PM, Bjorn Andersson wrote:
>>> On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:
>>>> diff --git a/drivers/thermal/qcom/Makefile b/drivers/thermal/qcom/Makefile
> [..]
>>>> +static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
>>>> +{
>>>> + struct lmh_hw_data *lmh_data = data;
>>>> + int irq = irq_find_mapping(lmh_data->domain, 0);
>>>> +
>>>> + /*
>>>> + * Disable interrupt and call the cpufreq driver to handle the interrupt
>>>> + * cpufreq will enable the interrupt once finished processing.
>>>> + */
>>>> + disable_irq_nosync(lmh_data->irq);
>>>
>>> The contract between this driver's disabling of the IRQ and the
>>> cpufreq-hw driver's enabling it when we're done polling does worry me.
>>>
>>> In the case of EPSS, don't we disable the interrupt during the polling
>>> there as well? If that's the case wouldn't it be better to implement
>>> irq_chip->irq_disable and have the cpufreq-hw driver do the disable in
>>> both cases?
>>
>> Yes. You are right. In case of EPSS, the cpufreq-hw will have to disable the
>> interrupt. I did think of the approach you suggested here. My only issue is
>> that we will dispatch the interrupt to cpufreq-hw without it disabling it
>> and hence the interrupt could fire again, right ?
>>
>
> Does it fire again before you INTR_CLK it?

You mean clear it ? I couldn't reproduce it either way. I did not try
the irq_chip->irq_disable either. So I will give it a try and if my
tests pass , I will post it.

>
> Regards,
> Bjorn
>

--
Warm Regards
Thara (She/Her/Hers)

2021-06-19 07:05:15

by Thara Gopinath

[permalink] [raw]

Subject: Re: [PATCH 3/5] cpufreq: qcom-cpufreq-hw: Add dcvs interrupt support

On 6/18/21 2:16 PM, Bjorn Andersson wrote:
> On Tue 08 Jun 17:29 CDT 2021, Thara Gopinath wrote:
>> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> [..]
>> @@ -305,6 +383,8 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>>
>> index = args.args[0];
>>
>> + lmh_mitigation_enabled = of_property_read_bool(pdev->dev.of_node, "qcom,support-lmh");
>
> Rather than adding a new interrupt _and_ a flag to tell the driver that
> this new interrupt should be used, wouldn't it be sufficient to just see
> if the interrupt is specified?

Yes. you are right. It should be. Though when I wrote it there was some
reason which I forget now. I will remove it.

--
Warm Regards
Thara (She/Her/Hers)

>
>> +
>> res = platform_get_resource(pdev, IORESOURCE_MEM, index);
>> if (!res) {
>> dev_err(dev, "failed to get mem resource %d\n", index);
>> @@ -329,6 +409,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> goto unmap_base;
>> }
>>
>> + if (!alloc_cpumask_var(&data->cpus, GFP_KERNEL)) {
>> + ret = -ENOMEM;
>> + goto unmap_base;
>> + }
>> +
>> data->soc_data = of_device_get_match_data(&pdev->dev);
>> data->base = base;
>> data->res = res;
>> @@ -347,6 +432,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> goto error;
>> }
>>
>> + cpumask_copy(data->cpus, policy->cpus);
>> policy->driver_data = data;
>>
>> ret = qcom_cpufreq_hw_read_lut(cpu_dev, policy);
>> @@ -370,6 +456,20 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
>> dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
>> }
>>
>> + if (lmh_mitigation_enabled) {
>> + data->lmh_dcvs_irq = platform_get_irq(pdev, index);
>> + if (data->lmh_dcvs_irq < 0) {
>
> This will be -ENXIO if the interrupt isn't specified and <0 for other
> errors, so you should be able to distinguish the two failure cases.
>
> Regards,
> Bjorn
>
>> + ret = data->lmh_dcvs_irq;
>> + goto error;
>> + }
>> + ret = devm_request_irq(dev, data->lmh_dcvs_irq, qcom_lmh_dcvs_handle_irq,
>> + 0, "dcvsh-irq", data);
>> + if (ret) {
>> + dev_err(dev, "Error %d registering irq %x\n", ret, data->lmh_dcvs_irq);
>> + goto error;
>> + }
>> + INIT_DEFERRABLE_WORK(&data->lmh_dcvs_poll_work, qcom_lmh_dcvs_poll);
>> + }
>> return 0;
>> error:
>> kfree(data);
>> --
>> 2.25.1
>>