2015-05-08 09:17:01

by Michal Kazior

[permalink] [raw]
Subject: [PATCH 0/3] ath10k: improve power efficiency

Some measurements for QCA61X4:

Loaded Started Associated
Unpatched 31mA 88mA 43mA
Patched 14mA 15mA 23mA

Loaded = module loaded, driver bound, no interface
is running

Started = Loaded, but one client interface is
running, not associated

Associated = Started, but associated to an AP as
client

Note: Unpatched was consuming more power when
Started than when Associated. This is no mistake.

I didn't have a way to measure QCA988X.


Janusz Dziedzic (2):
ath10k: enable ASPM
ath10k: fix idle power consumption

Michal Kazior (1):
ath10k: enable pci soc powersaving

drivers/net/wireless/ath/ath10k/debug.h | 1 +
drivers/net/wireless/ath/ath10k/htt_rx.c | 1 +
drivers/net/wireless/ath/ath10k/mac.c | 9 +-
drivers/net/wireless/ath/ath10k/pci.c | 317 ++++++++++++++++++++++---------
drivers/net/wireless/ath/ath10k/pci.h | 97 +++++-----
5 files changed, 285 insertions(+), 140 deletions(-)

--
2.1.4



2015-05-18 09:38:35

by Michal Kazior

[permalink] [raw]
Subject: [PATCH v2 2/3] ath10k: fix idle power consumption

From: Janusz Dziedzic <[email protected]>

mac80211 can update vif powersave state while
disconnected. Firmware doesn't behave nicely and
consumes more power than necessary if PS is
disabled on a non-started vdev. Hence
force-enable PS for non-running vdevs.

This reduces power drain on QCA61X4 from 88mA to
36mA when interface is up and not associated.
QCA988X wasn't measured.

Signed-off-by: Janusz Dziedzic <[email protected]>
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/mac.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 425dbe271495..9f576ca99f86 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1735,7 +1735,14 @@ static int ath10k_mac_vif_setup_ps(struct ath10k_vif *arvif)
enable_ps = false;
}

- if (enable_ps) {
+ if (!arvif->is_started) {
+ /* mac80211 can update vif powersave state while disconnected.
+ * Firmware doesn't behave nicely and consumes more power than
+ * necessary if PS is disabled on a non-started vdev. Hence
+ * force-enable PS for non-running vdevs.
+ */
+ psmode = WMI_STA_PS_MODE_ENABLED;
+ } else if (enable_ps) {
psmode = WMI_STA_PS_MODE_ENABLED;
param = WMI_STA_PS_PARAM_INACTIVITY_TIME;

--
2.1.4


2015-05-08 09:17:03

by Michal Kazior

[permalink] [raw]
Subject: [PATCH 1/3] ath10k: enable ASPM

From: Janusz Dziedzic <[email protected]>

It is actually safe to enable ASPM after the
device is booted up.

This reduces power drain of QCA61X4 when driver is
simply loaded (no interface is up) from 31mA to
14mA. QCA988X wasn't measured but doesn't seem to
regress in any other way.

Signed-off-by: Janusz Dziedzic <[email protected]>
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/pci.c | 15 ++++++++++-----
drivers/net/wireless/ath/ath10k/pci.h | 6 ++++++
2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 969a1231800e..8be07c653b2d 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1227,11 +1227,15 @@ static void ath10k_pci_irq_enable(struct ath10k *ar)

static int ath10k_pci_hif_start(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif start\n");

ath10k_pci_irq_enable(ar);
ath10k_pci_rx_post(ar);

+ pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ ar_pci->link_ctl);
+
return 0;
}

@@ -1981,6 +1985,7 @@ static int ath10k_pci_chip_reset(struct ath10k *ar)

static int ath10k_pci_hif_power_up(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
int ret;

ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");
@@ -1991,6 +1996,11 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)
return ret;
}

+ pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ &ar_pci->link_ctl);
+ pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ ar_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC);
+
/*
* Bring the target up cleanly.
*
@@ -2502,7 +2512,6 @@ static int ath10k_pci_claim(struct ath10k *ar)
{
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct pci_dev *pdev = ar_pci->pdev;
- u32 lcr_val;
int ret;

pci_set_drvdata(pdev, ar);
@@ -2536,10 +2545,6 @@ static int ath10k_pci_claim(struct ath10k *ar)

pci_set_master(pdev);

- /* Workaround: Disable ASPM */
- pci_read_config_dword(pdev, 0x80, &lcr_val);
- pci_write_config_dword(pdev, 0x80, (lcr_val & 0xffffff00));
-
/* Arrange for access to Target SoC registers. */
ar_pci->mem = pci_iomap(pdev, BAR_NUM, 0);
if (!ar_pci->mem) {
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index bddf54320160..ee2173d61257 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -185,6 +185,12 @@ struct ath10k_pci {
/* Map CE id to ce_state */
struct ath10k_ce_pipe ce_states[CE_COUNT_MAX];
struct timer_list rx_post_retry;
+
+ /* Due to HW quirks it is recommended to disable ASPM during device
+ * bootup. To do that the original PCI-E Link Control is stored before
+ * device bootup is executed and re-programmed later.
+ */
+ u16 link_ctl;
};

static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
--
2.1.4


2015-05-11 05:01:26

by Michal Kazior

[permalink] [raw]
Subject: Re: [PATCH 3/3] ath10k: enable pci soc powersaving

On 8 May 2015 at 19:53, Peter Oh <[email protected]> wrote:
> Hi,
>
> On 05/08/2015 02:13 AM, Michal Kazior wrote:
[...]
>> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> index b26e32f42656..889262b07d19 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> @@ -22,6 +22,7 @@
>> #include "debug.h"
>> #include "trace.h"
>> #include "mac.h"
>> +#include "hif.h"
>>
>
> Is it necessary change?

Good catch, thanks! This is a leftover from early prototyping of the
patch. I'll remove it in v2.


MichaƂ

2015-05-18 09:38:33

by Michal Kazior

[permalink] [raw]
Subject: [PATCH v2 0/3] ath10k: improve power efficiency

Some measurements for QCA61X4:

Loaded Started Associated
Unpatched 31mA 88mA 43mA
Patched 14mA 15mA 23mA

Loaded = module loaded, driver bound, no interface
is running

Started = Loaded, but one client interface is
running, not associated

Associated = Started, but associated to an AP as
client

Note: Unpatched was consuming more power when
Started than when Associated. This is no mistake.

I didn't have a way to measure QCA988X.


Janusz Dziedzic (2):
ath10k: enable ASPM
ath10k: fix idle power consumption

Michal Kazior (1):
ath10k: enable pci soc powersaving

drivers/net/wireless/ath/ath10k/debug.h | 1 +
drivers/net/wireless/ath/ath10k/mac.c | 9 +-
drivers/net/wireless/ath/ath10k/pci.c | 317 +++++++++++++++++++++++---------
drivers/net/wireless/ath/ath10k/pci.h | 97 +++++-----
4 files changed, 284 insertions(+), 140 deletions(-)

--
2.1.4


2015-05-22 10:42:12

by Kalle Valo

[permalink] [raw]
Subject: Re: [PATCH v2 0/3] ath10k: improve power efficiency

Michal Kazior <[email protected]> writes:

> Some measurements for QCA61X4:
>
> Loaded Started Associated
> Unpatched 31mA 88mA 43mA
> Patched 14mA 15mA 23mA
>
> Loaded = module loaded, driver bound, no interface
> is running
>
> Started = Loaded, but one client interface is
> running, not associated
>
> Associated = Started, but associated to an AP as
> client
>
> Note: Unpatched was consuming more power when
> Started than when Associated. This is no mistake.
>
> I didn't have a way to measure QCA988X.
>
>
> Janusz Dziedzic (2):
> ath10k: enable ASPM
> ath10k: fix idle power consumption
>
> Michal Kazior (1):
> ath10k: enable pci soc powersaving

Thanks, all three applied.

--
Kalle Valo

2015-05-08 09:17:03

by Michal Kazior

[permalink] [raw]
Subject: [PATCH 2/3] ath10k: fix idle power consumption

From: Janusz Dziedzic <[email protected]>

mac80211 can update vif powersave state while
disconnected. Firmware doesn't behave nicely and
consumes more power than necessary if PS is
disabled on a non-started vdev. Hence
force-enable PS for non-running vdevs.

This reduces power drain on QCA61X4 from 88mA to
36mA when interface is up and not associated.
QCA988X wasn't measured.

Signed-off-by: Janusz Dziedzic <[email protected]>
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/mac.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 425dbe271495..9f576ca99f86 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -1735,7 +1735,14 @@ static int ath10k_mac_vif_setup_ps(struct ath10k_vif *arvif)
enable_ps = false;
}

- if (enable_ps) {
+ if (!arvif->is_started) {
+ /* mac80211 can update vif powersave state while disconnected.
+ * Firmware doesn't behave nicely and consumes more power than
+ * necessary if PS is disabled on a non-started vdev. Hence
+ * force-enable PS for non-running vdevs.
+ */
+ psmode = WMI_STA_PS_MODE_ENABLED;
+ } else if (enable_ps) {
psmode = WMI_STA_PS_MODE_ENABLED;
param = WMI_STA_PS_PARAM_INACTIVITY_TIME;

--
2.1.4


2015-05-18 09:38:33

by Michal Kazior

[permalink] [raw]
Subject: [PATCH v2 1/3] ath10k: enable ASPM

From: Janusz Dziedzic <[email protected]>

It is actually safe to enable ASPM after the
device is booted up.

This reduces power drain of QCA61X4 when driver is
simply loaded (no interface is up) from 31mA to
14mA. QCA988X wasn't measured but doesn't seem to
regress in any other way.

Signed-off-by: Janusz Dziedzic <[email protected]>
Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/pci.c | 15 ++++++++++-----
drivers/net/wireless/ath/ath10k/pci.h | 6 ++++++
2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 969a1231800e..8be07c653b2d 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1227,11 +1227,15 @@ static void ath10k_pci_irq_enable(struct ath10k *ar)

static int ath10k_pci_hif_start(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif start\n");

ath10k_pci_irq_enable(ar);
ath10k_pci_rx_post(ar);

+ pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ ar_pci->link_ctl);
+
return 0;
}

@@ -1981,6 +1985,7 @@ static int ath10k_pci_chip_reset(struct ath10k *ar)

static int ath10k_pci_hif_power_up(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
int ret;

ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");
@@ -1991,6 +1996,11 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)
return ret;
}

+ pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ &ar_pci->link_ctl);
+ pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
+ ar_pci->link_ctl & ~PCI_EXP_LNKCTL_ASPMC);
+
/*
* Bring the target up cleanly.
*
@@ -2502,7 +2512,6 @@ static int ath10k_pci_claim(struct ath10k *ar)
{
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct pci_dev *pdev = ar_pci->pdev;
- u32 lcr_val;
int ret;

pci_set_drvdata(pdev, ar);
@@ -2536,10 +2545,6 @@ static int ath10k_pci_claim(struct ath10k *ar)

pci_set_master(pdev);

- /* Workaround: Disable ASPM */
- pci_read_config_dword(pdev, 0x80, &lcr_val);
- pci_write_config_dword(pdev, 0x80, (lcr_val & 0xffffff00));
-
/* Arrange for access to Target SoC registers. */
ar_pci->mem = pci_iomap(pdev, BAR_NUM, 0);
if (!ar_pci->mem) {
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index bddf54320160..ee2173d61257 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -185,6 +185,12 @@ struct ath10k_pci {
/* Map CE id to ce_state */
struct ath10k_ce_pipe ce_states[CE_COUNT_MAX];
struct timer_list rx_post_retry;
+
+ /* Due to HW quirks it is recommended to disable ASPM during device
+ * bootup. To do that the original PCI-E Link Control is stored before
+ * device bootup is executed and re-programmed later.
+ */
+ u16 link_ctl;
};

static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
--
2.1.4


2015-05-08 17:57:53

by Peter Oh

[permalink] [raw]
Subject: Re: [PATCH 3/3] ath10k: enable pci soc powersaving

Hi,
On 05/08/2015 02:13 AM, Michal Kazior wrote:
> By using SOC_WAKE register it is possible to bring
> down power consumption of QCA61X4 from 36mA to
> 16mA when associated and idle.
>
> Currently the sleep threshold/grace period is at a
> very conservative value of 60ms.
>
> Contrary to QCA61X4 the QCA988X firmware doesn't
> have Rx/beacon filtering available for client mode
> and SWBA events are used for beaconing in AP/IBSS
> so the SoC needs to be woken up at least every
> ~100ms in most cases. This means that QCA988X
> is at a disadvantage and the power consumption
> won't drop as much as for QCA61X4.
>
> Due to putting irq-safe spinlocks on every MMIO
> read/write it is expected this can cause a little
> performance regression on some systems. I haven't
> done any thorough measurements but some of my
> tests don't show any extreme degradation.
>
> The patch removes some explicit pci_wake calls
> that were added in 320e14b8db51aa ("ath10k: fix
> some pci wake/sleep issues"). This is safe because
> all MMIO accesses are now wrapped and the device
> is woken up automatically if necessary.
>
> Signed-off-by: Michal Kazior <[email protected]>
> ---
> drivers/net/wireless/ath/ath10k/debug.h | 1 +
> drivers/net/wireless/ath/ath10k/htt_rx.c | 1 +
> drivers/net/wireless/ath/ath10k/pci.c | 304
> ++++++++++++++++++++++---------
> drivers/net/wireless/ath/ath10k/pci.h | 91 +++++----
> 4 files changed, 262 insertions(+), 135 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/debug.h
> b/drivers/net/wireless/ath/ath10k/debug.h
> index a12b8323f9f1..53bd6a19eab6 100644
> --- a/drivers/net/wireless/ath/ath10k/debug.h
> +++ b/drivers/net/wireless/ath/ath10k/debug.h
> @@ -36,6 +36,7 @@ enum ath10k_debug_mask {
> ATH10K_DBG_REGULATORY = 0x00000800,
> ATH10K_DBG_TESTMODE = 0x00001000,
> ATH10K_DBG_WMI_PRINT = 0x00002000,
> + ATH10K_DBG_PCI_PS = 0x00004000,
> ATH10K_DBG_ANY = 0xffffffff,
> };
>
> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
> b/drivers/net/wireless/ath/ath10k/htt_rx.c
> index b26e32f42656..889262b07d19 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
> @@ -22,6 +22,7 @@
> #include "debug.h"
> #include "trace.h"
> #include "mac.h"
> +#include "hif.h"
>
Is it necessary change?
> #include <linux/log2.h>
>
> diff --git a/drivers/net/wireless/ath/ath10k/pci.c
> b/drivers/net/wireless/ath/ath10k/pci.c
> index 8be07c653b2d..17a060e8efa2 100644
> --- a/drivers/net/wireless/ath/ath10k/pci.c
> +++ b/drivers/net/wireless/ath/ath10k/pci.c
> @@ -330,6 +330,205 @@ static const struct service_to_pipe
> target_service_to_ce_map_wlan[] = {
> },
> };
>
> +static bool ath10k_pci_is_awake(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + u32 val = ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
> + RTC_STATE_ADDRESS);
> +
> + return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
> +}
> +
> +static void __ath10k_pci_wake(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> +
> + lockdep_assert_held(&ar_pci->ps_lock);
> +
> + ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake reg refcount %lu
> awake %d\n",
> + ar_pci->ps_wake_refcount, ar_pci->ps_awake);
> +
> + iowrite32(PCIE_SOC_WAKE_V_MASK,
> + ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
> + PCIE_SOC_WAKE_ADDRESS);
> +}
> +
> +static void __ath10k_pci_sleep(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> +
> + lockdep_assert_held(&ar_pci->ps_lock);
> +
> + ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep reg refcount %lu
> awake %d\n",
> + ar_pci->ps_wake_refcount, ar_pci->ps_awake);
> +
> + iowrite32(PCIE_SOC_WAKE_RESET,
> + ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
> + PCIE_SOC_WAKE_ADDRESS);
> + ar_pci->ps_awake = false;
> +}
> +
> +static int ath10k_pci_wake_wait(struct ath10k *ar)
> +{
> + int tot_delay = 0;
> + int curr_delay = 5;
> +
> + while (tot_delay < PCIE_WAKE_TIMEOUT) {
> + if (ath10k_pci_is_awake(ar))
> + return 0;
> +
> + udelay(curr_delay);
> + tot_delay += curr_delay;
> +
> + if (curr_delay < 50)
> + curr_delay += 5;
> + }
> +
> + return -ETIMEDOUT;
> +}
> +
> +static int ath10k_pci_wake(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + unsigned long flags;
> + int ret = 0;
> +
> + spin_lock_irqsave(&ar_pci->ps_lock, flags);
> +
> + ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake refcount %lu awake
> %d\n",
> + ar_pci->ps_wake_refcount, ar_pci->ps_awake);
> +
> + /* This function can be called very frequently. To avoid excessive
> + * CPU stalls for MMIO reads use a cache var to hold the device
> state.
> + */
> + if (!ar_pci->ps_awake) {
> + __ath10k_pci_wake(ar);
> +
> + ret = ath10k_pci_wake_wait(ar);
> + if (ret == 0)
> + ar_pci->ps_awake = true;
> + }
> +
> + if (ret == 0) {
> + ar_pci->ps_wake_refcount++;
> + WARN_ON(ar_pci->ps_wake_refcount == 0);
> + }
> +
> + spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
> +
> + return ret;
> +}
> +
> +static void ath10k_pci_sleep(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&ar_pci->ps_lock, flags);
> +
> + ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep refcount %lu awake
> %d\n",
> + ar_pci->ps_wake_refcount, ar_pci->ps_awake);
> +
> + if (WARN_ON(ar_pci->ps_wake_refcount == 0))
> + goto skip;
> +
> + ar_pci->ps_wake_refcount--;
> +
> + mod_timer(&ar_pci->ps_timer, jiffies +
> + msecs_to_jiffies(ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC));
> +
> +skip:
> + spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
> +}
> +
> +static void ath10k_pci_ps_timer(unsigned long ptr)
> +{
> + struct ath10k *ar = (void *)ptr;
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + unsigned long flags;
> +
> + spin_lock_irqsave(&ar_pci->ps_lock, flags);
> +
> + ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps timer refcount %lu awake
> %d\n",
> + ar_pci->ps_wake_refcount, ar_pci->ps_awake);
> +
> + if (ar_pci->ps_wake_refcount > 0)
> + goto skip;
> +
> + __ath10k_pci_sleep(ar);
> +
> +skip:
> + spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
> +}
> +
> +static void ath10k_pci_sleep_sync(struct ath10k *ar)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + unsigned long flags;
> +
> + del_timer_sync(&ar_pci->ps_timer);
> +
> + spin_lock_irqsave(&ar_pci->ps_lock, flags);
> + WARN_ON(ar_pci->ps_wake_refcount > 0);
> + __ath10k_pci_sleep(ar);
> + spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
> +}
> +
> +void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + int ret;
> +
> + ret = ath10k_pci_wake(ar);
> + if (ret) {
> + ath10k_warn(ar, "failed to wake target for write32 of
> 0x%08x at 0x%08x: %d\n",
> + value, offset, ret);
> + return;
> + }
> +
> + iowrite32(value, ar_pci->mem + offset);
> + ath10k_pci_sleep(ar);
> +}
> +
> +u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
> +{
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + u32 val;
> + int ret;
> +
> + ret = ath10k_pci_wake(ar);
> + if (ret) {
> + ath10k_warn(ar, "failed to wake target for read32 at
> 0x%08x: %d\n",
> + offset, ret);
> + return 0xffffffff;
> + }
> +
> + val = ioread32(ar_pci->mem + offset);
> + ath10k_pci_sleep(ar);
> +
> + return val;
> +}
> +
> +u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
> +{
> + return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
> +}
> +
> +void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
> +{
> + ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
> +}
> +
> +u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
> +{
> + return ath10k_pci_read32(ar, PCIE_LOCAL_BASE_ADDRESS + addr);
> +}
> +
> +void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
> +{
> + ath10k_pci_write32(ar, PCIE_LOCAL_BASE_ADDRESS + addr, val);
> +}
> +
> static bool ath10k_pci_irq_pending(struct ath10k *ar)
> {
> u32 cause;
> @@ -793,60 +992,6 @@ static int ath10k_pci_diag_write32(struct ath10k *ar,
> u32 address, u32 value)
> return ath10k_pci_diag_write_mem(ar, address, &val, sizeof(val));
> }
>
> -static bool ath10k_pci_is_awake(struct ath10k *ar)
> -{
> - u32 val = ath10k_pci_reg_read32(ar, RTC_STATE_ADDRESS);
> -
> - return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
> -}
> -
> -static int ath10k_pci_wake_wait(struct ath10k *ar)
> -{
> - int tot_delay = 0;
> - int curr_delay = 5;
> -
> - while (tot_delay < PCIE_WAKE_TIMEOUT) {
> - if (ath10k_pci_is_awake(ar))
> - return 0;
> -
> - udelay(curr_delay);
> - tot_delay += curr_delay;
> -
> - if (curr_delay < 50)
> - curr_delay += 5;
> - }
> -
> - return -ETIMEDOUT;
> -}
> -
> -/* The rule is host is forbidden from accessing device registers while
> it's
> - * asleep. Currently ath10k_pci_wake() and ath10k_pci_sleep() calls
> aren't
> - * balanced and the device is kept awake all the time. This is intended
> for a
> - * simpler solution for the following problems:
> - *
> - * * device can enter sleep during s2ram without the host knowing,
> - *
> - * * irq handlers access registers which is a problem if other device
> asserts
> - * a shared irq line when ath10k is between hif_power_down() and
> - * hif_power_up().
> - *
> - * FIXME: If power consumption is a concern (and there are *real* gains)
> then a
> - * refcounted wake/sleep needs to be implemented.
> - */
> -
> -static int ath10k_pci_wake(struct ath10k *ar)
> -{
> - ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
> - PCIE_SOC_WAKE_V_MASK);
> - return ath10k_pci_wake_wait(ar);
> -}
> -
> -static void ath10k_pci_sleep(struct ath10k *ar)
> -{
> - ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
> - PCIE_SOC_WAKE_RESET);
> -}
> -
> /* Called by lower (CE) layer when a send to Target completes. */
> static void ath10k_pci_ce_send_done(struct ath10k_ce_pipe *ce_state)
> {
> @@ -1348,6 +1493,9 @@ static void ath10k_pci_flush(struct ath10k *ar)
>
> static void ath10k_pci_hif_stop(struct ath10k *ar)
> {
> + struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> + unsigned long flags;
> +
> ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n");
>
> /* Most likely the device has HTT Rx ring configured. The only way
> to
> @@ -1366,6 +1514,10 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
> ath10k_pci_irq_disable(ar);
> ath10k_pci_irq_sync(ar);
> ath10k_pci_flush(ar);
> +
> + spin_lock_irqsave(&ar_pci->ps_lock, flags);
> + WARN_ON(ar_pci->ps_wake_refcount > 0);
> + spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
> }
>
> static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar,
> @@ -1990,12 +2142,6 @@ static int ath10k_pci_hif_power_up(struct ath10k
> *ar)
>
> ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");
>
> - ret = ath10k_pci_wake(ar);
> - if (ret) {
> - ath10k_err(ar, "failed to wake up target: %d\n", ret);
> - return ret;
> - }
> -
> pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
> &ar_pci->link_ctl);
> pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
> @@ -2047,7 +2193,6 @@ err_ce:
> ath10k_pci_ce_deinit(ar);
>
> err_sleep:
> - ath10k_pci_sleep(ar);
> return ret;
> }
>
> @@ -2064,7 +2209,12 @@ static void ath10k_pci_hif_power_down(struct ath10k
> *ar)
>
> static int ath10k_pci_hif_suspend(struct ath10k *ar)
> {
> - ath10k_pci_sleep(ar);
> + /* The grace timer can still be counting down and ar->ps_awake be
> true.
> + * It is known that the device may be asleep after resuming
> regardless
> + * of the SoC powersave state before suspending. Hence make sure
> the
> + * device is asleep before proceeding.
> + */
> + ath10k_pci_sleep_sync(ar);
>
> return 0;
> }
> @@ -2074,13 +2224,6 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
> struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> struct pci_dev *pdev = ar_pci->pdev;
> u32 val;
> - int ret;
> -
> - ret = ath10k_pci_wake(ar);
> - if (ret) {
> - ath10k_err(ar, "failed to wake device up on resume: %d\n",
> ret);
> - return ret;
> - }
>
> /* Suspend/Resume resets the PCI configuration space, so we have
> to
> * re-disable the RETRY_TIMEOUT register (0x41) to keep PCI Tx
> retries
> @@ -2091,7 +2234,7 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
> if ((val & 0x0000ff00) != 0)
> pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);
>
> - return ret;
> + return 0;
> }
> #endif
>
> @@ -2185,13 +2328,6 @@ static irqreturn_t ath10k_pci_interrupt_handler(int
> irq, void *arg)
> {
> struct ath10k *ar = arg;
> struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> - int ret;
> -
> - ret = ath10k_pci_wake(ar);
> - if (ret) {
> - ath10k_warn(ar, "failed to wake device up on irq: %d\n",
> ret);
> - return IRQ_NONE;
> - }
>
> if (ar_pci->num_msi_intrs == 0) {
> if (!ath10k_pci_irq_pending(ar))
> @@ -2638,8 +2774,12 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
> pdev->subsystem_vendor, pdev->subsystem_device);
>
> spin_lock_init(&ar_pci->ce_lock);
> + spin_lock_init(&ar_pci->ps_lock);
> +
> setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry,
> (unsigned long)ar);
> + setup_timer(&ar_pci->ps_timer, ath10k_pci_ps_timer,
> + (unsigned long)ar);
>
> ret = ath10k_pci_claim(ar);
> if (ret) {
> @@ -2647,12 +2787,6 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
> goto err_core_destroy;
> }
>
> - ret = ath10k_pci_wake(ar);
> - if (ret) {
> - ath10k_err(ar, "failed to wake up: %d\n", ret);
> - goto err_release;
> - }
> -
> ret = ath10k_pci_alloc_pipes(ar);
> if (ret) {
> ath10k_err(ar, "failed to allocate copy engine pipes:
> %d\n",
> @@ -2716,9 +2850,6 @@ err_free_pipes:
> ath10k_pci_free_pipes(ar);
>
> err_sleep:
> - ath10k_pci_sleep(ar);
> -
> -err_release:
> ath10k_pci_release(ar);
>
> err_core_destroy:
> @@ -2748,6 +2879,7 @@ static void ath10k_pci_remove(struct pci_dev *pdev)
> ath10k_pci_deinit_irq(ar);
> ath10k_pci_ce_deinit(ar);
> ath10k_pci_free_pipes(ar);
> + ath10k_pci_sleep_sync(ar);
> ath10k_pci_release(ar);
> ath10k_core_destroy(ar);
> }
> diff --git a/drivers/net/wireless/ath/ath10k/pci.h
> b/drivers/net/wireless/ath/ath10k/pci.h
> index ee2173d61257..d7696ddc03c4 100644
> --- a/drivers/net/wireless/ath/ath10k/pci.h
> +++ b/drivers/net/wireless/ath/ath10k/pci.h
> @@ -191,6 +191,35 @@ struct ath10k_pci {
> * device bootup is executed and re-programmed later.
> */
> u16 link_ctl;
> +
> + /* Protects ps_awake and ps_wake_refcount */
> + spinlock_t ps_lock;
> +
> + /* The device has a special powersave-oriented register. When
> device is
> + * considered asleep it drains less power and driver is forbidden
> from
> + * accessing most MMIO registers. If host were to access them
> without
> + * waking up the device might scribble over host memory or return
> + * 0xdeadbeef readouts.
> + */
> + unsigned long ps_wake_refcount;
> +
> + /* Waking up takes some time (up to 2ms in some cases) so it can
> be bad
> + * for latency. To mitigate this the device isn't immediately
> allowed
> + * to sleep after all references are undone - instead there's a
> grace
> + * period after which the powersave register is updated unless
> some
> + * activity to/from device happened in the meantime.
> + *
> + * Also see comments on ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC.
> + */
> + struct timer_list ps_timer;
> +
> + /* MMIO registers are used to communicate with the device. With
> + * intensive traffic accessing powersave register would be a bit
> + * wasteful overhead and would needlessly stall CPU. It is far
> more
> + * efficient to rely on a variable in RAM and update it only upon
> + * powersave register state changes.
> + */
> + bool ps_awake;
> };
>
> static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
> @@ -215,61 +244,25 @@ static inline struct ath10k_pci
> *ath10k_pci_priv(struct ath10k *ar)
> * for this device; but that's not guaranteed.
> */
> #define TARG_CPU_SPACE_TO_CE_SPACE(ar, pci_addr, addr) \
> - (((ioread32((pci_addr)+(SOC_CORE_BASE_ADDRESS| \
> + (((ath10k_pci_read32(ar, (SOC_CORE_BASE_ADDRESS | \
> CORE_CTRL_ADDRESS)) & 0x7ff) << 21) | \
> 0x100000 | ((addr) & 0xfffff))
>
> /* Wait up to this many Ms for a Diagnostic Access CE operation to
> complete */
> #define DIAG_ACCESS_CE_TIMEOUT_MS 10
>
> -/* Target exposes its registers for direct access. However before host
> can
> - * access them it needs to make sure the target is awake
> (ath10k_pci_wake,
> - * ath10k_pci_wake_wait, ath10k_pci_is_awake). Once target is awake it
> won't go
> - * to sleep unless host tells it to (ath10k_pci_sleep).
> - *
> - * If host tries to access target registers without waking it up it can
> - * scribble over host memory.
> - *
> - * If target is asleep waking it up may take up to even 2ms.
> +void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value);
> +void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val);
> +void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val);
> +
> +u32 ath10k_pci_read32(struct ath10k *ar, u32 offset);
> +u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr);
> +u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr);
> +
> +/* QCA6174 is known to have Tx/Rx issues when SOC_WAKE register is poked
> too
> + * frequently. To avoid this put SoC to sleep after a very conservative
> grace
> + * period. Adjust with great care.
> */
> -
> -static inline void ath10k_pci_write32(struct ath10k *ar, u32 offset,
> - u32 value)
> -{
> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> -
> - iowrite32(value, ar_pci->mem + offset);
> -}
> -
> -static inline u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
> -{
> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> -
> - return ioread32(ar_pci->mem + offset);
> -}
> -
> -static inline u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
> -{
> - return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
> -}
> -
> -static inline void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr,
> u32 val)
> -{
> - ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
> -}
> -
> -static inline u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
> -{
> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> -
> - return ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
> -}
> -
> -static inline void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr,
> u32 val)
> -{
> - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
> -
> - iowrite32(val, ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
> -}
> +#define ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC 60
>
> #endif /* _PCI_H_ */
Thanks,
Peter

2015-05-08 09:17:05

by Michal Kazior

[permalink] [raw]
Subject: [PATCH 3/3] ath10k: enable pci soc powersaving

By using SOC_WAKE register it is possible to bring
down power consumption of QCA61X4 from 36mA to
16mA when associated and idle.

Currently the sleep threshold/grace period is at a
very conservative value of 60ms.

Contrary to QCA61X4 the QCA988X firmware doesn't
have Rx/beacon filtering available for client mode
and SWBA events are used for beaconing in AP/IBSS
so the SoC needs to be woken up at least every
~100ms in most cases. This means that QCA988X
is at a disadvantage and the power consumption
won't drop as much as for QCA61X4.

Due to putting irq-safe spinlocks on every MMIO
read/write it is expected this can cause a little
performance regression on some systems. I haven't
done any thorough measurements but some of my
tests don't show any extreme degradation.

The patch removes some explicit pci_wake calls
that were added in 320e14b8db51aa ("ath10k: fix
some pci wake/sleep issues"). This is safe because
all MMIO accesses are now wrapped and the device
is woken up automatically if necessary.

Signed-off-by: Michal Kazior <[email protected]>
---
drivers/net/wireless/ath/ath10k/debug.h | 1 +
drivers/net/wireless/ath/ath10k/htt_rx.c | 1 +
drivers/net/wireless/ath/ath10k/pci.c | 304 ++++++++++++++++++++++---------
drivers/net/wireless/ath/ath10k/pci.h | 91 +++++----
4 files changed, 262 insertions(+), 135 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a12b8323f9f1..53bd6a19eab6 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -36,6 +36,7 @@ enum ath10k_debug_mask {
ATH10K_DBG_REGULATORY = 0x00000800,
ATH10K_DBG_TESTMODE = 0x00001000,
ATH10K_DBG_WMI_PRINT = 0x00002000,
+ ATH10K_DBG_PCI_PS = 0x00004000,
ATH10K_DBG_ANY = 0xffffffff,
};

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index b26e32f42656..889262b07d19 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -22,6 +22,7 @@
#include "debug.h"
#include "trace.h"
#include "mac.h"
+#include "hif.h"

#include <linux/log2.h>

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 8be07c653b2d..17a060e8efa2 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -330,6 +330,205 @@ static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
},
};

+static bool ath10k_pci_is_awake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ u32 val = ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ RTC_STATE_ADDRESS);
+
+ return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
+}
+
+static void __ath10k_pci_wake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+ lockdep_assert_held(&ar_pci->ps_lock);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake reg refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ iowrite32(PCIE_SOC_WAKE_V_MASK,
+ ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ PCIE_SOC_WAKE_ADDRESS);
+}
+
+static void __ath10k_pci_sleep(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+ lockdep_assert_held(&ar_pci->ps_lock);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep reg refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ iowrite32(PCIE_SOC_WAKE_RESET,
+ ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ PCIE_SOC_WAKE_ADDRESS);
+ ar_pci->ps_awake = false;
+}
+
+static int ath10k_pci_wake_wait(struct ath10k *ar)
+{
+ int tot_delay = 0;
+ int curr_delay = 5;
+
+ while (tot_delay < PCIE_WAKE_TIMEOUT) {
+ if (ath10k_pci_is_awake(ar))
+ return 0;
+
+ udelay(curr_delay);
+ tot_delay += curr_delay;
+
+ if (curr_delay < 50)
+ curr_delay += 5;
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int ath10k_pci_wake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ /* This function can be called very frequently. To avoid excessive
+ * CPU stalls for MMIO reads use a cache var to hold the device state.
+ */
+ if (!ar_pci->ps_awake) {
+ __ath10k_pci_wake(ar);
+
+ ret = ath10k_pci_wake_wait(ar);
+ if (ret == 0)
+ ar_pci->ps_awake = true;
+ }
+
+ if (ret == 0) {
+ ar_pci->ps_wake_refcount++;
+ WARN_ON(ar_pci->ps_wake_refcount == 0);
+ }
+
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+
+ return ret;
+}
+
+static void ath10k_pci_sleep(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ if (WARN_ON(ar_pci->ps_wake_refcount == 0))
+ goto skip;
+
+ ar_pci->ps_wake_refcount--;
+
+ mod_timer(&ar_pci->ps_timer, jiffies +
+ msecs_to_jiffies(ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC));
+
+skip:
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_ps_timer(unsigned long ptr)
+{
+ struct ath10k *ar = (void *)ptr;
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps timer refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ if (ar_pci->ps_wake_refcount > 0)
+ goto skip;
+
+ __ath10k_pci_sleep(ar);
+
+skip:
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_sleep_sync(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ del_timer_sync(&ar_pci->ps_timer);
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+ WARN_ON(ar_pci->ps_wake_refcount > 0);
+ __ath10k_pci_sleep(ar);
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ int ret;
+
+ ret = ath10k_pci_wake(ar);
+ if (ret) {
+ ath10k_warn(ar, "failed to wake target for write32 of 0x%08x at 0x%08x: %d\n",
+ value, offset, ret);
+ return;
+ }
+
+ iowrite32(value, ar_pci->mem + offset);
+ ath10k_pci_sleep(ar);
+}
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ u32 val;
+ int ret;
+
+ ret = ath10k_pci_wake(ar);
+ if (ret) {
+ ath10k_warn(ar, "failed to wake target for read32 at 0x%08x: %d\n",
+ offset, ret);
+ return 0xffffffff;
+ }
+
+ val = ioread32(ar_pci->mem + offset);
+ ath10k_pci_sleep(ar);
+
+ return val;
+}
+
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
+{
+ return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+ ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
+}
+
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
+{
+ return ath10k_pci_read32(ar, PCIE_LOCAL_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+ ath10k_pci_write32(ar, PCIE_LOCAL_BASE_ADDRESS + addr, val);
+}
+
static bool ath10k_pci_irq_pending(struct ath10k *ar)
{
u32 cause;
@@ -793,60 +992,6 @@ static int ath10k_pci_diag_write32(struct ath10k *ar, u32 address, u32 value)
return ath10k_pci_diag_write_mem(ar, address, &val, sizeof(val));
}

-static bool ath10k_pci_is_awake(struct ath10k *ar)
-{
- u32 val = ath10k_pci_reg_read32(ar, RTC_STATE_ADDRESS);
-
- return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
-}
-
-static int ath10k_pci_wake_wait(struct ath10k *ar)
-{
- int tot_delay = 0;
- int curr_delay = 5;
-
- while (tot_delay < PCIE_WAKE_TIMEOUT) {
- if (ath10k_pci_is_awake(ar))
- return 0;
-
- udelay(curr_delay);
- tot_delay += curr_delay;
-
- if (curr_delay < 50)
- curr_delay += 5;
- }
-
- return -ETIMEDOUT;
-}
-
-/* The rule is host is forbidden from accessing device registers while it's
- * asleep. Currently ath10k_pci_wake() and ath10k_pci_sleep() calls aren't
- * balanced and the device is kept awake all the time. This is intended for a
- * simpler solution for the following problems:
- *
- * * device can enter sleep during s2ram without the host knowing,
- *
- * * irq handlers access registers which is a problem if other device asserts
- * a shared irq line when ath10k is between hif_power_down() and
- * hif_power_up().
- *
- * FIXME: If power consumption is a concern (and there are *real* gains) then a
- * refcounted wake/sleep needs to be implemented.
- */
-
-static int ath10k_pci_wake(struct ath10k *ar)
-{
- ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
- PCIE_SOC_WAKE_V_MASK);
- return ath10k_pci_wake_wait(ar);
-}
-
-static void ath10k_pci_sleep(struct ath10k *ar)
-{
- ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
- PCIE_SOC_WAKE_RESET);
-}
-
/* Called by lower (CE) layer when a send to Target completes. */
static void ath10k_pci_ce_send_done(struct ath10k_ce_pipe *ce_state)
{
@@ -1348,6 +1493,9 @@ static void ath10k_pci_flush(struct ath10k *ar)

static void ath10k_pci_hif_stop(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n");

/* Most likely the device has HTT Rx ring configured. The only way to
@@ -1366,6 +1514,10 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
ath10k_pci_irq_disable(ar);
ath10k_pci_irq_sync(ar);
ath10k_pci_flush(ar);
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+ WARN_ON(ar_pci->ps_wake_refcount > 0);
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
}

static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar,
@@ -1990,12 +2142,6 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)

ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");

- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake up target: %d\n", ret);
- return ret;
- }
-
pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
&ar_pci->link_ctl);
pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
@@ -2047,7 +2193,6 @@ err_ce:
ath10k_pci_ce_deinit(ar);

err_sleep:
- ath10k_pci_sleep(ar);
return ret;
}

@@ -2064,7 +2209,12 @@ static void ath10k_pci_hif_power_down(struct ath10k *ar)

static int ath10k_pci_hif_suspend(struct ath10k *ar)
{
- ath10k_pci_sleep(ar);
+ /* The grace timer can still be counting down and ar->ps_awake be true.
+ * It is known that the device may be asleep after resuming regardless
+ * of the SoC powersave state before suspending. Hence make sure the
+ * device is asleep before proceeding.
+ */
+ ath10k_pci_sleep_sync(ar);

return 0;
}
@@ -2074,13 +2224,6 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct pci_dev *pdev = ar_pci->pdev;
u32 val;
- int ret;
-
- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake device up on resume: %d\n", ret);
- return ret;
- }

/* Suspend/Resume resets the PCI configuration space, so we have to
* re-disable the RETRY_TIMEOUT register (0x41) to keep PCI Tx retries
@@ -2091,7 +2234,7 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
if ((val & 0x0000ff00) != 0)
pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);

- return ret;
+ return 0;
}
#endif

@@ -2185,13 +2328,6 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
{
struct ath10k *ar = arg;
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
- int ret;
-
- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_warn(ar, "failed to wake device up on irq: %d\n", ret);
- return IRQ_NONE;
- }

if (ar_pci->num_msi_intrs == 0) {
if (!ath10k_pci_irq_pending(ar))
@@ -2638,8 +2774,12 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
pdev->subsystem_vendor, pdev->subsystem_device);

spin_lock_init(&ar_pci->ce_lock);
+ spin_lock_init(&ar_pci->ps_lock);
+
setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry,
(unsigned long)ar);
+ setup_timer(&ar_pci->ps_timer, ath10k_pci_ps_timer,
+ (unsigned long)ar);

ret = ath10k_pci_claim(ar);
if (ret) {
@@ -2647,12 +2787,6 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_core_destroy;
}

- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake up: %d\n", ret);
- goto err_release;
- }
-
ret = ath10k_pci_alloc_pipes(ar);
if (ret) {
ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
@@ -2716,9 +2850,6 @@ err_free_pipes:
ath10k_pci_free_pipes(ar);

err_sleep:
- ath10k_pci_sleep(ar);
-
-err_release:
ath10k_pci_release(ar);

err_core_destroy:
@@ -2748,6 +2879,7 @@ static void ath10k_pci_remove(struct pci_dev *pdev)
ath10k_pci_deinit_irq(ar);
ath10k_pci_ce_deinit(ar);
ath10k_pci_free_pipes(ar);
+ ath10k_pci_sleep_sync(ar);
ath10k_pci_release(ar);
ath10k_core_destroy(ar);
}
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index ee2173d61257..d7696ddc03c4 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -191,6 +191,35 @@ struct ath10k_pci {
* device bootup is executed and re-programmed later.
*/
u16 link_ctl;
+
+ /* Protects ps_awake and ps_wake_refcount */
+ spinlock_t ps_lock;
+
+ /* The device has a special powersave-oriented register. When device is
+ * considered asleep it drains less power and driver is forbidden from
+ * accessing most MMIO registers. If host were to access them without
+ * waking up the device might scribble over host memory or return
+ * 0xdeadbeef readouts.
+ */
+ unsigned long ps_wake_refcount;
+
+ /* Waking up takes some time (up to 2ms in some cases) so it can be bad
+ * for latency. To mitigate this the device isn't immediately allowed
+ * to sleep after all references are undone - instead there's a grace
+ * period after which the powersave register is updated unless some
+ * activity to/from device happened in the meantime.
+ *
+ * Also see comments on ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC.
+ */
+ struct timer_list ps_timer;
+
+ /* MMIO registers are used to communicate with the device. With
+ * intensive traffic accessing powersave register would be a bit
+ * wasteful overhead and would needlessly stall CPU. It is far more
+ * efficient to rely on a variable in RAM and update it only upon
+ * powersave register state changes.
+ */
+ bool ps_awake;
};

static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
@@ -215,61 +244,25 @@ static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
* for this device; but that's not guaranteed.
*/
#define TARG_CPU_SPACE_TO_CE_SPACE(ar, pci_addr, addr) \
- (((ioread32((pci_addr)+(SOC_CORE_BASE_ADDRESS| \
+ (((ath10k_pci_read32(ar, (SOC_CORE_BASE_ADDRESS | \
CORE_CTRL_ADDRESS)) & 0x7ff) << 21) | \
0x100000 | ((addr) & 0xfffff))

/* Wait up to this many Ms for a Diagnostic Access CE operation to complete */
#define DIAG_ACCESS_CE_TIMEOUT_MS 10

-/* Target exposes its registers for direct access. However before host can
- * access them it needs to make sure the target is awake (ath10k_pci_wake,
- * ath10k_pci_wake_wait, ath10k_pci_is_awake). Once target is awake it won't go
- * to sleep unless host tells it to (ath10k_pci_sleep).
- *
- * If host tries to access target registers without waking it up it can
- * scribble over host memory.
- *
- * If target is asleep waking it up may take up to even 2ms.
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value);
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val);
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val);
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset);
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr);
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr);
+
+/* QCA6174 is known to have Tx/Rx issues when SOC_WAKE register is poked too
+ * frequently. To avoid this put SoC to sleep after a very conservative grace
+ * period. Adjust with great care.
*/
-
-static inline void ath10k_pci_write32(struct ath10k *ar, u32 offset,
- u32 value)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- iowrite32(value, ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- return ioread32(ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
-{
- return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
-{
- ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
-}
-
-static inline u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- return ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- iowrite32(val, ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
+#define ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC 60

#endif /* _PCI_H_ */
--
2.1.4


2015-05-18 09:38:36

by Michal Kazior

[permalink] [raw]
Subject: [PATCH v2 3/3] ath10k: enable pci soc powersaving

By using SOC_WAKE register it is possible to bring
down power consumption of QCA61X4 from 36mA to
16mA when associated and idle.

Currently the sleep threshold/grace period is at a
very conservative value of 60ms.

Contrary to QCA61X4 the QCA988X firmware doesn't
have Rx/beacon filtering available for client mode
and SWBA events are used for beaconing in AP/IBSS
so the SoC needs to be woken up at least every
~100ms in most cases. This means that QCA988X
is at a disadvantage and the power consumption
won't drop as much as for QCA61X4.

Due to putting irq-safe spinlocks on every MMIO
read/write it is expected this can cause a little
performance regression on some systems. I haven't
done any thorough measurements but some of my
tests don't show any extreme degradation.

The patch removes some explicit pci_wake calls
that were added in 320e14b8db51aa ("ath10k: fix
some pci wake/sleep issues"). This is safe because
all MMIO accesses are now wrapped and the device
is woken up automatically if necessary.

Signed-off-by: Michal Kazior <[email protected]>
---

Notes:
v2:
* remove unnecessary hif.h include [Peter]

drivers/net/wireless/ath/ath10k/debug.h | 1 +
drivers/net/wireless/ath/ath10k/pci.c | 304 +++++++++++++++++++++++---------
drivers/net/wireless/ath/ath10k/pci.h | 91 +++++-----
3 files changed, 261 insertions(+), 135 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index a12b8323f9f1..53bd6a19eab6 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -36,6 +36,7 @@ enum ath10k_debug_mask {
ATH10K_DBG_REGULATORY = 0x00000800,
ATH10K_DBG_TESTMODE = 0x00001000,
ATH10K_DBG_WMI_PRINT = 0x00002000,
+ ATH10K_DBG_PCI_PS = 0x00004000,
ATH10K_DBG_ANY = 0xffffffff,
};

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 8be07c653b2d..17a060e8efa2 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -330,6 +330,205 @@ static const struct service_to_pipe target_service_to_ce_map_wlan[] = {
},
};

+static bool ath10k_pci_is_awake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ u32 val = ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ RTC_STATE_ADDRESS);
+
+ return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
+}
+
+static void __ath10k_pci_wake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+ lockdep_assert_held(&ar_pci->ps_lock);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake reg refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ iowrite32(PCIE_SOC_WAKE_V_MASK,
+ ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ PCIE_SOC_WAKE_ADDRESS);
+}
+
+static void __ath10k_pci_sleep(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
+ lockdep_assert_held(&ar_pci->ps_lock);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep reg refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ iowrite32(PCIE_SOC_WAKE_RESET,
+ ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS +
+ PCIE_SOC_WAKE_ADDRESS);
+ ar_pci->ps_awake = false;
+}
+
+static int ath10k_pci_wake_wait(struct ath10k *ar)
+{
+ int tot_delay = 0;
+ int curr_delay = 5;
+
+ while (tot_delay < PCIE_WAKE_TIMEOUT) {
+ if (ath10k_pci_is_awake(ar))
+ return 0;
+
+ udelay(curr_delay);
+ tot_delay += curr_delay;
+
+ if (curr_delay < 50)
+ curr_delay += 5;
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int ath10k_pci_wake(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps wake refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ /* This function can be called very frequently. To avoid excessive
+ * CPU stalls for MMIO reads use a cache var to hold the device state.
+ */
+ if (!ar_pci->ps_awake) {
+ __ath10k_pci_wake(ar);
+
+ ret = ath10k_pci_wake_wait(ar);
+ if (ret == 0)
+ ar_pci->ps_awake = true;
+ }
+
+ if (ret == 0) {
+ ar_pci->ps_wake_refcount++;
+ WARN_ON(ar_pci->ps_wake_refcount == 0);
+ }
+
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+
+ return ret;
+}
+
+static void ath10k_pci_sleep(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps sleep refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ if (WARN_ON(ar_pci->ps_wake_refcount == 0))
+ goto skip;
+
+ ar_pci->ps_wake_refcount--;
+
+ mod_timer(&ar_pci->ps_timer, jiffies +
+ msecs_to_jiffies(ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC));
+
+skip:
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_ps_timer(unsigned long ptr)
+{
+ struct ath10k *ar = (void *)ptr;
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+
+ ath10k_dbg(ar, ATH10K_DBG_PCI_PS, "pci ps timer refcount %lu awake %d\n",
+ ar_pci->ps_wake_refcount, ar_pci->ps_awake);
+
+ if (ar_pci->ps_wake_refcount > 0)
+ goto skip;
+
+ __ath10k_pci_sleep(ar);
+
+skip:
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+static void ath10k_pci_sleep_sync(struct ath10k *ar)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
+ del_timer_sync(&ar_pci->ps_timer);
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+ WARN_ON(ar_pci->ps_wake_refcount > 0);
+ __ath10k_pci_sleep(ar);
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
+}
+
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ int ret;
+
+ ret = ath10k_pci_wake(ar);
+ if (ret) {
+ ath10k_warn(ar, "failed to wake target for write32 of 0x%08x at 0x%08x: %d\n",
+ value, offset, ret);
+ return;
+ }
+
+ iowrite32(value, ar_pci->mem + offset);
+ ath10k_pci_sleep(ar);
+}
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
+{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ u32 val;
+ int ret;
+
+ ret = ath10k_pci_wake(ar);
+ if (ret) {
+ ath10k_warn(ar, "failed to wake target for read32 at 0x%08x: %d\n",
+ offset, ret);
+ return 0xffffffff;
+ }
+
+ val = ioread32(ar_pci->mem + offset);
+ ath10k_pci_sleep(ar);
+
+ return val;
+}
+
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
+{
+ return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+ ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
+}
+
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
+{
+ return ath10k_pci_read32(ar, PCIE_LOCAL_BASE_ADDRESS + addr);
+}
+
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
+{
+ ath10k_pci_write32(ar, PCIE_LOCAL_BASE_ADDRESS + addr, val);
+}
+
static bool ath10k_pci_irq_pending(struct ath10k *ar)
{
u32 cause;
@@ -793,60 +992,6 @@ static int ath10k_pci_diag_write32(struct ath10k *ar, u32 address, u32 value)
return ath10k_pci_diag_write_mem(ar, address, &val, sizeof(val));
}

-static bool ath10k_pci_is_awake(struct ath10k *ar)
-{
- u32 val = ath10k_pci_reg_read32(ar, RTC_STATE_ADDRESS);
-
- return RTC_STATE_V_GET(val) == RTC_STATE_V_ON;
-}
-
-static int ath10k_pci_wake_wait(struct ath10k *ar)
-{
- int tot_delay = 0;
- int curr_delay = 5;
-
- while (tot_delay < PCIE_WAKE_TIMEOUT) {
- if (ath10k_pci_is_awake(ar))
- return 0;
-
- udelay(curr_delay);
- tot_delay += curr_delay;
-
- if (curr_delay < 50)
- curr_delay += 5;
- }
-
- return -ETIMEDOUT;
-}
-
-/* The rule is host is forbidden from accessing device registers while it's
- * asleep. Currently ath10k_pci_wake() and ath10k_pci_sleep() calls aren't
- * balanced and the device is kept awake all the time. This is intended for a
- * simpler solution for the following problems:
- *
- * * device can enter sleep during s2ram without the host knowing,
- *
- * * irq handlers access registers which is a problem if other device asserts
- * a shared irq line when ath10k is between hif_power_down() and
- * hif_power_up().
- *
- * FIXME: If power consumption is a concern (and there are *real* gains) then a
- * refcounted wake/sleep needs to be implemented.
- */
-
-static int ath10k_pci_wake(struct ath10k *ar)
-{
- ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
- PCIE_SOC_WAKE_V_MASK);
- return ath10k_pci_wake_wait(ar);
-}
-
-static void ath10k_pci_sleep(struct ath10k *ar)
-{
- ath10k_pci_reg_write32(ar, PCIE_SOC_WAKE_ADDRESS,
- PCIE_SOC_WAKE_RESET);
-}
-
/* Called by lower (CE) layer when a send to Target completes. */
static void ath10k_pci_ce_send_done(struct ath10k_ce_pipe *ce_state)
{
@@ -1348,6 +1493,9 @@ static void ath10k_pci_flush(struct ath10k *ar)

static void ath10k_pci_hif_stop(struct ath10k *ar)
{
+ struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+ unsigned long flags;
+
ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n");

/* Most likely the device has HTT Rx ring configured. The only way to
@@ -1366,6 +1514,10 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
ath10k_pci_irq_disable(ar);
ath10k_pci_irq_sync(ar);
ath10k_pci_flush(ar);
+
+ spin_lock_irqsave(&ar_pci->ps_lock, flags);
+ WARN_ON(ar_pci->ps_wake_refcount > 0);
+ spin_unlock_irqrestore(&ar_pci->ps_lock, flags);
}

static int ath10k_pci_hif_exchange_bmi_msg(struct ath10k *ar,
@@ -1990,12 +2142,6 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar)

ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif power up\n");

- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake up target: %d\n", ret);
- return ret;
- }
-
pcie_capability_read_word(ar_pci->pdev, PCI_EXP_LNKCTL,
&ar_pci->link_ctl);
pcie_capability_write_word(ar_pci->pdev, PCI_EXP_LNKCTL,
@@ -2047,7 +2193,6 @@ err_ce:
ath10k_pci_ce_deinit(ar);

err_sleep:
- ath10k_pci_sleep(ar);
return ret;
}

@@ -2064,7 +2209,12 @@ static void ath10k_pci_hif_power_down(struct ath10k *ar)

static int ath10k_pci_hif_suspend(struct ath10k *ar)
{
- ath10k_pci_sleep(ar);
+ /* The grace timer can still be counting down and ar->ps_awake be true.
+ * It is known that the device may be asleep after resuming regardless
+ * of the SoC powersave state before suspending. Hence make sure the
+ * device is asleep before proceeding.
+ */
+ ath10k_pci_sleep_sync(ar);

return 0;
}
@@ -2074,13 +2224,6 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
struct pci_dev *pdev = ar_pci->pdev;
u32 val;
- int ret;
-
- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake device up on resume: %d\n", ret);
- return ret;
- }

/* Suspend/Resume resets the PCI configuration space, so we have to
* re-disable the RETRY_TIMEOUT register (0x41) to keep PCI Tx retries
@@ -2091,7 +2234,7 @@ static int ath10k_pci_hif_resume(struct ath10k *ar)
if ((val & 0x0000ff00) != 0)
pci_write_config_dword(pdev, 0x40, val & 0xffff00ff);

- return ret;
+ return 0;
}
#endif

@@ -2185,13 +2328,6 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg)
{
struct ath10k *ar = arg;
struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
- int ret;
-
- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_warn(ar, "failed to wake device up on irq: %d\n", ret);
- return IRQ_NONE;
- }

if (ar_pci->num_msi_intrs == 0) {
if (!ath10k_pci_irq_pending(ar))
@@ -2638,8 +2774,12 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
pdev->subsystem_vendor, pdev->subsystem_device);

spin_lock_init(&ar_pci->ce_lock);
+ spin_lock_init(&ar_pci->ps_lock);
+
setup_timer(&ar_pci->rx_post_retry, ath10k_pci_rx_replenish_retry,
(unsigned long)ar);
+ setup_timer(&ar_pci->ps_timer, ath10k_pci_ps_timer,
+ (unsigned long)ar);

ret = ath10k_pci_claim(ar);
if (ret) {
@@ -2647,12 +2787,6 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
goto err_core_destroy;
}

- ret = ath10k_pci_wake(ar);
- if (ret) {
- ath10k_err(ar, "failed to wake up: %d\n", ret);
- goto err_release;
- }
-
ret = ath10k_pci_alloc_pipes(ar);
if (ret) {
ath10k_err(ar, "failed to allocate copy engine pipes: %d\n",
@@ -2716,9 +2850,6 @@ err_free_pipes:
ath10k_pci_free_pipes(ar);

err_sleep:
- ath10k_pci_sleep(ar);
-
-err_release:
ath10k_pci_release(ar);

err_core_destroy:
@@ -2748,6 +2879,7 @@ static void ath10k_pci_remove(struct pci_dev *pdev)
ath10k_pci_deinit_irq(ar);
ath10k_pci_ce_deinit(ar);
ath10k_pci_free_pipes(ar);
+ ath10k_pci_sleep_sync(ar);
ath10k_pci_release(ar);
ath10k_core_destroy(ar);
}
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index ee2173d61257..d7696ddc03c4 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -191,6 +191,35 @@ struct ath10k_pci {
* device bootup is executed and re-programmed later.
*/
u16 link_ctl;
+
+ /* Protects ps_awake and ps_wake_refcount */
+ spinlock_t ps_lock;
+
+ /* The device has a special powersave-oriented register. When device is
+ * considered asleep it drains less power and driver is forbidden from
+ * accessing most MMIO registers. If host were to access them without
+ * waking up the device might scribble over host memory or return
+ * 0xdeadbeef readouts.
+ */
+ unsigned long ps_wake_refcount;
+
+ /* Waking up takes some time (up to 2ms in some cases) so it can be bad
+ * for latency. To mitigate this the device isn't immediately allowed
+ * to sleep after all references are undone - instead there's a grace
+ * period after which the powersave register is updated unless some
+ * activity to/from device happened in the meantime.
+ *
+ * Also see comments on ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC.
+ */
+ struct timer_list ps_timer;
+
+ /* MMIO registers are used to communicate with the device. With
+ * intensive traffic accessing powersave register would be a bit
+ * wasteful overhead and would needlessly stall CPU. It is far more
+ * efficient to rely on a variable in RAM and update it only upon
+ * powersave register state changes.
+ */
+ bool ps_awake;
};

static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
@@ -215,61 +244,25 @@ static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
* for this device; but that's not guaranteed.
*/
#define TARG_CPU_SPACE_TO_CE_SPACE(ar, pci_addr, addr) \
- (((ioread32((pci_addr)+(SOC_CORE_BASE_ADDRESS| \
+ (((ath10k_pci_read32(ar, (SOC_CORE_BASE_ADDRESS | \
CORE_CTRL_ADDRESS)) & 0x7ff) << 21) | \
0x100000 | ((addr) & 0xfffff))

/* Wait up to this many Ms for a Diagnostic Access CE operation to complete */
#define DIAG_ACCESS_CE_TIMEOUT_MS 10

-/* Target exposes its registers for direct access. However before host can
- * access them it needs to make sure the target is awake (ath10k_pci_wake,
- * ath10k_pci_wake_wait, ath10k_pci_is_awake). Once target is awake it won't go
- * to sleep unless host tells it to (ath10k_pci_sleep).
- *
- * If host tries to access target registers without waking it up it can
- * scribble over host memory.
- *
- * If target is asleep waking it up may take up to even 2ms.
+void ath10k_pci_write32(struct ath10k *ar, u32 offset, u32 value);
+void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val);
+void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val);
+
+u32 ath10k_pci_read32(struct ath10k *ar, u32 offset);
+u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr);
+u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr);
+
+/* QCA6174 is known to have Tx/Rx issues when SOC_WAKE register is poked too
+ * frequently. To avoid this put SoC to sleep after a very conservative grace
+ * period. Adjust with great care.
*/
-
-static inline void ath10k_pci_write32(struct ath10k *ar, u32 offset,
- u32 value)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- iowrite32(value, ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_read32(struct ath10k *ar, u32 offset)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- return ioread32(ar_pci->mem + offset);
-}
-
-static inline u32 ath10k_pci_soc_read32(struct ath10k *ar, u32 addr)
-{
- return ath10k_pci_read32(ar, RTC_SOC_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_soc_write32(struct ath10k *ar, u32 addr, u32 val)
-{
- ath10k_pci_write32(ar, RTC_SOC_BASE_ADDRESS + addr, val);
-}
-
-static inline u32 ath10k_pci_reg_read32(struct ath10k *ar, u32 addr)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- return ioread32(ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
-
-static inline void ath10k_pci_reg_write32(struct ath10k *ar, u32 addr, u32 val)
-{
- struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
- iowrite32(val, ar_pci->mem + PCIE_LOCAL_BASE_ADDRESS + addr);
-}
+#define ATH10K_PCI_SLEEP_GRACE_PERIOD_MSEC 60

#endif /* _PCI_H_ */
--
2.1.4