2020-08-25 08:23:08

by Adrian Hunter

[permalink] [raw]
Subject: [PATCH] scsi: ufs-pci: Add LTR support for Intel controllers

Intel host controllers support the setting of latency tolerance.
Accordingly, implement the PM QoS ->set_latency_tolerance() callback. The
raw register values are also exposed via debugfs.

Signed-off-by: Adrian Hunter <[email protected]>
---
drivers/scsi/ufs/ufshcd-pci.c | 122 +++++++++++++++++++++++++++++++++-
1 file changed, 120 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 5a95a7bfbab0..e10f05013ae6 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c
@@ -13,6 +13,14 @@
#include "ufshcd.h"
#include <linux/pci.h>
#include <linux/pm_runtime.h>
+#include <linux/pm_qos.h>
+#include <linux/debugfs.h>
+
+struct intel_host {
+ u32 active_ltr;
+ u32 idle_ltr;
+ struct dentry *debugfs_root;
+};

static int ufs_intel_disable_lcc(struct ufs_hba *hba)
{
@@ -44,20 +52,129 @@ static int ufs_intel_link_startup_notify(struct ufs_hba *hba,
return err;
}

+#define INTEL_ACTIVELTR 0x804
+#define INTEL_IDLELTR 0x808
+
+#define INTEL_LTR_REQ BIT(15)
+#define INTEL_LTR_SCALE_MASK GENMASK(11, 10)
+#define INTEL_LTR_SCALE_1US (2 << 10)
+#define INTEL_LTR_SCALE_32US (3 << 10)
+#define INTEL_LTR_VALUE_MASK GENMASK(9, 0)
+
+static void intel_cache_ltr(struct ufs_hba *hba)
+{
+ struct intel_host *host = ufshcd_get_variant(hba);
+
+ host->active_ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
+ host->idle_ltr = readl(hba->mmio_base + INTEL_IDLELTR);
+}
+
+static void intel_ltr_set(struct device *dev, s32 val)
+{
+ struct ufs_hba *hba = dev_get_drvdata(dev);
+ struct intel_host *host = ufshcd_get_variant(hba);
+ u32 ltr;
+
+ pm_runtime_get_sync(dev);
+
+ /*
+ * Program latency tolerance (LTR) accordingly what has been asked
+ * by the PM QoS layer or disable it in case we were passed
+ * negative value or PM_QOS_LATENCY_ANY.
+ */
+ ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
+
+ if (val == PM_QOS_LATENCY_ANY || val < 0) {
+ ltr &= ~INTEL_LTR_REQ;
+ } else {
+ ltr |= INTEL_LTR_REQ;
+ ltr &= ~INTEL_LTR_SCALE_MASK;
+ ltr &= ~INTEL_LTR_VALUE_MASK;
+
+ if (val > INTEL_LTR_VALUE_MASK) {
+ val >>= 5;
+ if (val > INTEL_LTR_VALUE_MASK)
+ val = INTEL_LTR_VALUE_MASK;
+ ltr |= INTEL_LTR_SCALE_32US | val;
+ } else {
+ ltr |= INTEL_LTR_SCALE_1US | val;
+ }
+ }
+
+ if (ltr == host->active_ltr)
+ goto out;
+
+ writel(ltr, hba->mmio_base + INTEL_ACTIVELTR);
+ writel(ltr, hba->mmio_base + INTEL_IDLELTR);
+
+ /* Cache the values into intel_host structure */
+ intel_cache_ltr(hba);
+out:
+ pm_runtime_put(dev);
+}
+
+static void ufs_intel_ltr_expose(struct ufs_hba *hba)
+{
+ struct intel_host *host = ufshcd_get_variant(hba);
+ struct dentry *dir = host->debugfs_root;
+ struct device *dev = hba->dev;
+
+ dev->power.set_latency_tolerance = intel_ltr_set;
+ dev_pm_qos_expose_latency_tolerance(dev);
+
+ intel_cache_ltr(hba);
+
+ debugfs_create_x32("active_ltr", 0444, dir, &host->active_ltr);
+ debugfs_create_x32("idle_ltr", 0444, dir, &host->idle_ltr);
+}
+
+static void ufs_intel_ltr_hide(struct ufs_hba *hba)
+{
+ struct device *dev = hba->dev;
+
+ dev_pm_qos_hide_latency_tolerance(dev);
+ dev->power.set_latency_tolerance = NULL;
+}
+
+static int ufs_intel_common_init(struct ufs_hba *hba)
+{
+ struct device *dev = hba->dev;
+ struct intel_host *host;
+
+ host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
+ if (!host)
+ return -ENOMEM;
+ ufshcd_set_variant(hba, host);
+ host->debugfs_root = debugfs_create_dir(dev_name(dev), NULL);
+ ufs_intel_ltr_expose(hba);
+ return 0;
+}
+
+static void ufs_intel_common_exit(struct ufs_hba *hba)
+{
+ struct intel_host *host = ufshcd_get_variant(hba);
+
+ debugfs_remove_recursive(host->debugfs_root);
+ ufs_intel_ltr_hide(hba);
+}
+
static int ufs_intel_ehl_init(struct ufs_hba *hba)
{
hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
- return 0;
+ return ufs_intel_common_init(hba);
}

static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
.name = "intel-pci",
+ .init = ufs_intel_common_init,
+ .exit = ufs_intel_common_exit,
.link_startup_notify = ufs_intel_link_startup_notify,
};

static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
.name = "intel-pci",
.init = ufs_intel_ehl_init,
+ .exit = ufs_intel_common_exit,
.link_startup_notify = ufs_intel_link_startup_notify,
};

@@ -162,6 +279,8 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return err;
}

+ pci_set_drvdata(pdev, hba);
+
hba->vops = (struct ufs_hba_variant_ops *)id->driver_data;

err = ufshcd_init(hba, mmio_base, pdev->irq);
@@ -171,7 +290,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return err;
}

- pci_set_drvdata(pdev, hba);
pm_runtime_put_noidle(&pdev->dev);
pm_runtime_allow(&pdev->dev);

--
2.17.1


2020-08-26 06:15:09

by Avri Altman

[permalink] [raw]
Subject: RE: [PATCH] scsi: ufs-pci: Add LTR support for Intel controllers


>
> Intel host controllers support the setting of latency tolerance.
> Accordingly, implement the PM QoS ->set_latency_tolerance() callback. The
> raw register values are also exposed via debugfs.
>
> Signed-off-by: Adrian Hunter <[email protected]>
Reviewed-by: Avri Altman <[email protected]>

Some nits below.

Thanks,
Avri

> ---
> drivers/scsi/ufs/ufshcd-pci.c | 122 +++++++++++++++++++++++++++++++++-
> 1 file changed, 120 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
> index 5a95a7bfbab0..e10f05013ae6 100644
> --- a/drivers/scsi/ufs/ufshcd-pci.c
> +++ b/drivers/scsi/ufs/ufshcd-pci.c
> @@ -13,6 +13,14 @@
> #include "ufshcd.h"
> #include <linux/pci.h>
> #include <linux/pm_runtime.h>
> +#include <linux/pm_qos.h>
> +#include <linux/debugfs.h>
> +
> +struct intel_host {
> + u32 active_ltr;
> + u32 idle_ltr;
> + struct dentry *debugfs_root;
> +};
>
> static int ufs_intel_disable_lcc(struct ufs_hba *hba)
> {
> @@ -44,20 +52,129 @@ static int ufs_intel_link_startup_notify(struct ufs_hba
> *hba,
> return err;
> }
>
> +#define INTEL_ACTIVELTR 0x804
> +#define INTEL_IDLELTR 0x808
> +
> +#define INTEL_LTR_REQ BIT(15)
> +#define INTEL_LTR_SCALE_MASK GENMASK(11, 10)
> +#define INTEL_LTR_SCALE_1US (2 << 10)
> +#define INTEL_LTR_SCALE_32US (3 << 10)
> +#define INTEL_LTR_VALUE_MASK GENMASK(9, 0)
> +
> +static void intel_cache_ltr(struct ufs_hba *hba)
> +{
> + struct intel_host *host = ufshcd_get_variant(hba);
> +
> + host->active_ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
> + host->idle_ltr = readl(hba->mmio_base + INTEL_IDLELTR);
You might want to use the standard ufshcd_readl

> +}
> +
> +static void intel_ltr_set(struct device *dev, s32 val)
> +{
> + struct ufs_hba *hba = dev_get_drvdata(dev);
> + struct intel_host *host = ufshcd_get_variant(hba);
> + u32 ltr;
> +
> + pm_runtime_get_sync(dev);
> +
> + /*
> + * Program latency tolerance (LTR) accordingly what has been asked
> + * by the PM QoS layer or disable it in case we were passed
> + * negative value or PM_QOS_LATENCY_ANY.
> + */
> + ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
> +
> + if (val == PM_QOS_LATENCY_ANY || val < 0) {
> + ltr &= ~INTEL_LTR_REQ;
> + } else {
> + ltr |= INTEL_LTR_REQ;
> + ltr &= ~INTEL_LTR_SCALE_MASK;
> + ltr &= ~INTEL_LTR_VALUE_MASK;
> +
> + if (val > INTEL_LTR_VALUE_MASK) {
> + val >>= 5;
> + if (val > INTEL_LTR_VALUE_MASK)
> + val = INTEL_LTR_VALUE_MASK;
> + ltr |= INTEL_LTR_SCALE_32US | val;
> + } else {
> + ltr |= INTEL_LTR_SCALE_1US | val;
> + }
> + }
> +
> + if (ltr == host->active_ltr)
> + goto out;
> +
> + writel(ltr, hba->mmio_base + INTEL_ACTIVELTR);
> + writel(ltr, hba->mmio_base + INTEL_IDLELTR);
> +
> + /* Cache the values into intel_host structure */
> + intel_cache_ltr(hba);
> +out:
> + pm_runtime_put(dev);
> +}
> +
> +static void ufs_intel_ltr_expose(struct ufs_hba *hba)
> +{
> + struct intel_host *host = ufshcd_get_variant(hba);
> + struct dentry *dir = host->debugfs_root;
> + struct device *dev = hba->dev;
> +
> + dev->power.set_latency_tolerance = intel_ltr_set;
> + dev_pm_qos_expose_latency_tolerance(dev);
> +
> + intel_cache_ltr(hba);
> +
> + debugfs_create_x32("active_ltr", 0444, dir, &host->active_ltr);
> + debugfs_create_x32("idle_ltr", 0444, dir, &host->idle_ltr);
You might as well allow those values to be traced, e.g. use dev_pm_qos_update_user_latency_tolerance

> +}
> +
> +static void ufs_intel_ltr_hide(struct ufs_hba *hba)
> +{
> + struct device *dev = hba->dev;
> +
> + dev_pm_qos_hide_latency_tolerance(dev);
> + dev->power.set_latency_tolerance = NULL;
> +}
> +
> +static int ufs_intel_common_init(struct ufs_hba *hba)
> +{
> + struct device *dev = hba->dev;
> + struct intel_host *host;
> +
> + host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
> + if (!host)
> + return -ENOMEM;
> + ufshcd_set_variant(hba, host);
> + host->debugfs_root = debugfs_create_dir(dev_name(dev), NULL);
Maybe pack the debugfs code together, i.e. move this just above debugfs_create_x32 ....

> + ufs_intel_ltr_expose(hba);
> + return 0;
> +}
> +
> +static void ufs_intel_common_exit(struct ufs_hba *hba)
> +{
> + struct intel_host *host = ufshcd_get_variant(hba);
> +
> + debugfs_remove_recursive(host->debugfs_root);
> + ufs_intel_ltr_hide(hba);
> +}
> +
> static int ufs_intel_ehl_init(struct ufs_hba *hba)
> {
> hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
> - return 0;
> + return ufs_intel_common_init(hba);
> }
>
> static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
> .name = "intel-pci",
> + .init = ufs_intel_common_init,
> + .exit = ufs_intel_common_exit,
> .link_startup_notify = ufs_intel_link_startup_notify,
> };
>
> static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
> .name = "intel-pci",
> .init = ufs_intel_ehl_init,
> + .exit = ufs_intel_common_exit,
> .link_startup_notify = ufs_intel_link_startup_notify,
> };
>
> @@ -162,6 +279,8 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct
> pci_device_id *id)
> return err;
> }
>
> + pci_set_drvdata(pdev, hba);
> +
> hba->vops = (struct ufs_hba_variant_ops *)id->driver_data;
>
> err = ufshcd_init(hba, mmio_base, pdev->irq);
> @@ -171,7 +290,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct
> pci_device_id *id)
> return err;
> }
>
> - pci_set_drvdata(pdev, hba);
> pm_runtime_put_noidle(&pdev->dev);
> pm_runtime_allow(&pdev->dev);
>
> --
> 2.17.1

2020-08-26 09:15:37

by Adrian Hunter

[permalink] [raw]
Subject: Re: [PATCH] scsi: ufs-pci: Add LTR support for Intel controllers

On 26/08/20 9:13 am, Avri Altman wrote:
>
>>
>> Intel host controllers support the setting of latency tolerance.
>> Accordingly, implement the PM QoS ->set_latency_tolerance() callback. The
>> raw register values are also exposed via debugfs.
>>
>> Signed-off-by: Adrian Hunter <[email protected]>
> Reviewed-by: Avri Altman <[email protected]>

Thanks for the quick review!

I will send a V2, refer below

>
> Some nits below.
>
> Thanks,
> Avri
>
>> ---
>> drivers/scsi/ufs/ufshcd-pci.c | 122 +++++++++++++++++++++++++++++++++-
>> 1 file changed, 120 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
>> index 5a95a7bfbab0..e10f05013ae6 100644
>> --- a/drivers/scsi/ufs/ufshcd-pci.c
>> +++ b/drivers/scsi/ufs/ufshcd-pci.c
>> @@ -13,6 +13,14 @@
>> #include "ufshcd.h"
>> #include <linux/pci.h>
>> #include <linux/pm_runtime.h>
>> +#include <linux/pm_qos.h>
>> +#include <linux/debugfs.h>
>> +
>> +struct intel_host {
>> + u32 active_ltr;
>> + u32 idle_ltr;
>> + struct dentry *debugfs_root;
>> +};
>>
>> static int ufs_intel_disable_lcc(struct ufs_hba *hba)
>> {
>> @@ -44,20 +52,129 @@ static int ufs_intel_link_startup_notify(struct ufs_hba
>> *hba,
>> return err;
>> }
>>
>> +#define INTEL_ACTIVELTR 0x804
>> +#define INTEL_IDLELTR 0x808
>> +
>> +#define INTEL_LTR_REQ BIT(15)
>> +#define INTEL_LTR_SCALE_MASK GENMASK(11, 10)
>> +#define INTEL_LTR_SCALE_1US (2 << 10)
>> +#define INTEL_LTR_SCALE_32US (3 << 10)
>> +#define INTEL_LTR_VALUE_MASK GENMASK(9, 0)
>> +
>> +static void intel_cache_ltr(struct ufs_hba *hba)
>> +{
>> + struct intel_host *host = ufshcd_get_variant(hba);
>> +
>> + host->active_ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
>> + host->idle_ltr = readl(hba->mmio_base + INTEL_IDLELTR);
> You might want to use the standard ufshcd_readl

LTR is more of a PCI concept, not directly related to UFS, so I am relucant
to use ufshcd_readl which implies a closer relationship to UFS.

>
>> +}
>> +
>> +static void intel_ltr_set(struct device *dev, s32 val)
>> +{
>> + struct ufs_hba *hba = dev_get_drvdata(dev);
>> + struct intel_host *host = ufshcd_get_variant(hba);
>> + u32 ltr;
>> +
>> + pm_runtime_get_sync(dev);
>> +
>> + /*
>> + * Program latency tolerance (LTR) accordingly what has been asked
>> + * by the PM QoS layer or disable it in case we were passed
>> + * negative value or PM_QOS_LATENCY_ANY.
>> + */
>> + ltr = readl(hba->mmio_base + INTEL_ACTIVELTR);
>> +
>> + if (val == PM_QOS_LATENCY_ANY || val < 0) {
>> + ltr &= ~INTEL_LTR_REQ;
>> + } else {
>> + ltr |= INTEL_LTR_REQ;
>> + ltr &= ~INTEL_LTR_SCALE_MASK;
>> + ltr &= ~INTEL_LTR_VALUE_MASK;
>> +
>> + if (val > INTEL_LTR_VALUE_MASK) {
>> + val >>= 5;
>> + if (val > INTEL_LTR_VALUE_MASK)
>> + val = INTEL_LTR_VALUE_MASK;
>> + ltr |= INTEL_LTR_SCALE_32US | val;
>> + } else {
>> + ltr |= INTEL_LTR_SCALE_1US | val;
>> + }
>> + }
>> +
>> + if (ltr == host->active_ltr)
>> + goto out;
>> +
>> + writel(ltr, hba->mmio_base + INTEL_ACTIVELTR);
>> + writel(ltr, hba->mmio_base + INTEL_IDLELTR);
>> +
>> + /* Cache the values into intel_host structure */
>> + intel_cache_ltr(hba);
>> +out:
>> + pm_runtime_put(dev);
>> +}
>> +
>> +static void ufs_intel_ltr_expose(struct ufs_hba *hba)
>> +{
>> + struct intel_host *host = ufshcd_get_variant(hba);
>> + struct dentry *dir = host->debugfs_root;
>> + struct device *dev = hba->dev;
>> +
>> + dev->power.set_latency_tolerance = intel_ltr_set;
>> + dev_pm_qos_expose_latency_tolerance(dev);
>> +
>> + intel_cache_ltr(hba);
>> +
>> + debugfs_create_x32("active_ltr", 0444, dir, &host->active_ltr);
>> + debugfs_create_x32("idle_ltr", 0444, dir, &host->idle_ltr);
> You might as well allow those values to be traced, e.g. use dev_pm_qos_update_user_latency_tolerance

The registers default to "no requirement" and there is no use-case for
reading the initial values, other than debugging for which there is debugfs.
Add the fact that there are 2 registers, which should be the same, but what
if they are not, plus having to decode the register bits, doesn't seem worth it.

>
>> +}
>> +
>> +static void ufs_intel_ltr_hide(struct ufs_hba *hba)
>> +{
>> + struct device *dev = hba->dev;
>> +
>> + dev_pm_qos_hide_latency_tolerance(dev);
>> + dev->power.set_latency_tolerance = NULL;
>> +}
>> +
>> +static int ufs_intel_common_init(struct ufs_hba *hba)
>> +{
>> + struct device *dev = hba->dev;
>> + struct intel_host *host;
>> +
>> + host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
>> + if (!host)
>> + return -ENOMEM;
>> + ufshcd_set_variant(hba, host);
>> + host->debugfs_root = debugfs_create_dir(dev_name(dev), NULL);
> Maybe pack the debugfs code together, i.e. move this just above debugfs_create_x32 ....

Yes, I can move all the debugfs code together.

>
>> + ufs_intel_ltr_expose(hba);
>> + return 0;
>> +}
>> +
>> +static void ufs_intel_common_exit(struct ufs_hba *hba)
>> +{
>> + struct intel_host *host = ufshcd_get_variant(hba);
>> +
>> + debugfs_remove_recursive(host->debugfs_root);
>> + ufs_intel_ltr_hide(hba);
>> +}
>> +
>> static int ufs_intel_ehl_init(struct ufs_hba *hba)
>> {
>> hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
>> - return 0;
>> + return ufs_intel_common_init(hba);
>> }
>>
>> static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
>> .name = "intel-pci",
>> + .init = ufs_intel_common_init,
>> + .exit = ufs_intel_common_exit,
>> .link_startup_notify = ufs_intel_link_startup_notify,
>> };
>>
>> static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
>> .name = "intel-pci",
>> .init = ufs_intel_ehl_init,
>> + .exit = ufs_intel_common_exit,
>> .link_startup_notify = ufs_intel_link_startup_notify,
>> };
>>
>> @@ -162,6 +279,8 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct
>> pci_device_id *id)
>> return err;
>> }
>>
>> + pci_set_drvdata(pdev, hba);
>> +
>> hba->vops = (struct ufs_hba_variant_ops *)id->driver_data;
>>
>> err = ufshcd_init(hba, mmio_base, pdev->irq);
>> @@ -171,7 +290,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct
>> pci_device_id *id)
>> return err;
>> }
>>
>> - pci_set_drvdata(pdev, hba);
>> pm_runtime_put_noidle(&pdev->dev);
>> pm_runtime_allow(&pdev->dev);
>>
>> --
>> 2.17.1
>