2021-09-29 00:42:40

by Saheed O. Bolarinwa

[permalink] [raw]
Subject: [RFC PATCH v3 0/3] PCI/ASPM: Remove struct aspm_latency

From: "Bolarinwa O. Saheed" <[email protected]>

To validate and set link latency capability, `struct aspm_latency` and
related members defined within `struct pcie_link_state` are used.
However, since there are not many access to theses values, it is
possible to directly access and compute these values.

Doing this will also reduce the dependency on `struct pcie_link_state`.

The series removes `struct aspm_latency` and related members within
`struct pcie_link_state`. All latencies are now calculated when needed.



VERSION CHANGES:
- v2:
- directly access downstream by calling `pci_function_0()`
instead of using the `struct pcie_link_state`
- v3(this version):
- rebase on Linux 5.15-rc2

MERGE NOTICE:
These series are based on
'commit e4e737bb5c17 ("Linux 5.15-rc2")'


Saheed O. Bolarinwa (3):
PCI/ASPM: Remove link latencies cached within struct pcie_link_state
PCI/ASPM: Remove struct pcie_link_state.acceptable
PCI/ASPM: Remove struct aspm_latency

drivers/pci/pcie/aspm.c | 89 ++++++++++++++++++-----------------------
1 file changed, 38 insertions(+), 51 deletions(-)

--
2.20.1


2021-09-29 00:42:40

by Saheed O. Bolarinwa

[permalink] [raw]
Subject: [RFC PATCH v3 1/3] PCI/ASPM: Do not cache link latencies

The latencies of the upstream and downstream are calculated within
pcie_aspm_cap_init() and cached in struct pcie_link_state.latency_*
These values are only used in pcie_aspm_check_latency() where they are
compared with the acceptable latencies on the link.

This patch:
- removes `latency_*` entries from struct pcie_link_state.
- calculates the latencies directly where they are needed.
- moves pci_function_0() upward, so that the downstream device can be
obtained by calling it directly.
- further removes dependencies on struct pcie_link_state.

Signed-off-by: Saheed O. Bolarinwa <[email protected]>
---
drivers/pci/pcie/aspm.c | 54 ++++++++++++++++++++++-------------------
1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 013a47f587ce..9e85dfc56657 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -66,9 +66,6 @@ struct pcie_link_state {
u32 clkpm_default:1; /* Default Clock PM state by BIOS */
u32 clkpm_disable:1; /* Clock PM disabled */

- /* Exit latencies */
- struct aspm_latency latency_up; /* Upstream direction exit latency */
- struct aspm_latency latency_dw; /* Downstream direction exit latency */
/*
* Endpoint acceptable latencies. A pcie downstream port only
* has one slot under it, so at most there are 8 functions.
@@ -376,9 +373,25 @@ static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
}
}

+/*
+ * The L1 PM substate capability is only implemented in function 0 in a
+ * multi function device.
+ */
+static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
+{
+ struct pci_dev *child;
+
+ list_for_each_entry(child, &linkbus->devices, bus_list)
+ if (PCI_FUNC(child->devfn) == 0)
+ return child;
+ return NULL;
+}
+
static void pcie_aspm_check_latency(struct pci_dev *endpoint)
{
- u32 latency, l1_switch_latency = 0;
+ u32 latency, lnkcap_up, lnkcap_dw, l1_switch_latency = 0;
+ struct pci_dev *downstream;
+ struct aspm_latency latency_up, latency_dw;
struct aspm_latency *acceptable;
struct pcie_link_state *link;

@@ -388,17 +401,26 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
return;

link = endpoint->bus->self->link_state;
+ downstream = pci_function_0(link->pdev->subordinate);
acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)];

while (link) {
+ /* Read direction exit latencies */
+ pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP, &lnkcap_up);
+ pcie_capability_read_dword(downstream, PCI_EXP_LNKCAP, &lnkcap_dw);
+ latency_up.l0s = calc_l0s_latency(lnkcap_up);
+ latency_up.l1 = calc_l1_latency(lnkcap_up);
+ latency_dw.l0s = calc_l0s_latency(lnkcap_dw);
+ latency_dw.l1 = calc_l1_latency(lnkcap_dw);
+
/* Check upstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
- (link->latency_up.l0s > acceptable->l0s))
+ (latency_up.l0s > acceptable->l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_UP;

/* Check downstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
- (link->latency_dw.l0s > acceptable->l0s))
+ (latency_dw.l0s > acceptable->l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_DW;
/*
* Check L1 latency.
@@ -413,7 +435,7 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
* L1 exit latencies advertised by a device include L1
* substate latencies (and hence do not do any check).
*/
- latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1);
+ latency = max_t(u32, latency_up.l1, latency_dw.l1);
if ((link->aspm_capable & ASPM_STATE_L1) &&
(latency + l1_switch_latency > acceptable->l1))
link->aspm_capable &= ~ASPM_STATE_L1;
@@ -423,20 +445,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
}
}

-/*
- * The L1 PM substate capability is only implemented in function 0 in a
- * multi function device.
- */
-static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
-{
- struct pci_dev *child;
-
- list_for_each_entry(child, &linkbus->devices, bus_list)
- if (PCI_FUNC(child->devfn) == 0)
- return child;
- return NULL;
-}
-
static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
u32 clear, u32 set)
{
@@ -593,8 +601,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
link->aspm_enabled |= ASPM_STATE_L0S_UP;
if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
link->aspm_enabled |= ASPM_STATE_L0S_DW;
- link->latency_up.l0s = calc_l0s_latency(parent_lnkcap);
- link->latency_dw.l0s = calc_l0s_latency(child_lnkcap);

/* Setup L1 state */
if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
@@ -602,8 +608,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)

if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
link->aspm_enabled |= ASPM_STATE_L1;
- link->latency_up.l1 = calc_l1_latency(parent_lnkcap);
- link->latency_dw.l1 = calc_l1_latency(child_lnkcap);

/* Setup L1 substate */
pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP,
--
2.20.1

2021-09-29 00:43:28

by Saheed O. Bolarinwa

[permalink] [raw]
Subject: [RFC v3 PATCH 3/3] PCI/ASPM: Remove struct aspm_latency

The struct aspm_latency is now used only inside pcie_aspm_check_latency().

Since this struct is trivial, this patch:
- replaces struct aspm_latency variables with u32 variables
- removes struct aspm_latency

Signed-off-by: Saheed O. Bolarinwa <[email protected]>
---
drivers/pci/pcie/aspm.c | 30 ++++++++++++------------------
1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 0c0c055823f1..8093c9335e1f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -41,11 +41,6 @@
#define ASPM_STATE_ALL (ASPM_STATE_L0S | ASPM_STATE_L1 | \
ASPM_STATE_L1SS)

-struct aspm_latency {
- u32 l0s; /* L0s latency (nsec) */
- u32 l1; /* L1 latency (nsec) */
-};
-
struct pcie_link_state {
struct pci_dev *pdev; /* Upstream component of the Link */
struct pci_dev *downstream; /* Downstream component, function 0 */
@@ -384,9 +379,9 @@ static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
static void pcie_aspm_check_latency(struct pci_dev *endpoint)
{
u32 reg32, latency, encoding, lnkcap_up, lnkcap_dw, l1_switch_latency = 0;
+ u32 latency_up_l0s, latency_up_l1, latency_dw_l0s, latency_dw_l1;
+ u32 acceptable_l0s, acceptable_l1;
struct pci_dev *downstream;
- struct aspm_latency latency_up, latency_dw;
- struct aspm_latency *acceptable;
struct pcie_link_state *link;

/* Device not in D0 doesn't need latency check */
@@ -399,28 +394,28 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
pcie_capability_read_dword(endpoint, PCI_EXP_DEVCAP, &reg32);
/* Calculate endpoint L0s acceptable latency */
encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
- acceptable->l0s = calc_l0s_acceptable(encoding);
+ acceptable_l0s = calc_l0s_acceptable(encoding);
/* Calculate endpoint L1 acceptable latency */
encoding = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
- acceptable->l1 = calc_l1_acceptable(encoding);
+ acceptable_l1 = calc_l1_acceptable(encoding);

while (link) {
/* Read direction exit latencies */
pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP, &lnkcap_up);
pcie_capability_read_dword(downstream, PCI_EXP_LNKCAP, &lnkcap_dw);
- latency_up.l0s = calc_l0s_latency(lnkcap_up);
- latency_up.l1 = calc_l1_latency(lnkcap_up);
- latency_dw.l0s = calc_l0s_latency(lnkcap_dw);
- latency_dw.l1 = calc_l1_latency(lnkcap_dw);
+ latency_up_l0s = calc_l0s_latency(lnkcap_up);
+ latency_up_l1 = calc_l1_latency(lnkcap_up);
+ latency_dw_l0s = calc_l0s_latency(lnkcap_dw);
+ latency_dw_l1 = calc_l1_latency(lnkcap_dw);

/* Check upstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
- (latency_up.l0s > acceptable->l0s))
+ (latency_up_l0s > acceptable_l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_UP;

/* Check downstream direction L0s latency */
if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
- (latency_dw.l0s > acceptable->l0s))
+ (latency_dw_l0s > acceptable_l0s))
link->aspm_capable &= ~ASPM_STATE_L0S_DW;
/*
* Check L1 latency.
@@ -435,9 +430,9 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
* L1 exit latencies advertised by a device include L1
* substate latencies (and hence do not do any check).
*/
- latency = max_t(u32, latency_up.l1, latency_dw.l1);
+ latency = max_t(u32, latency_up_l1, latency_dw_l1);
if ((link->aspm_capable & ASPM_STATE_L1) &&
- (latency + l1_switch_latency > acceptable->l1))
+ (latency + l1_switch_latency > acceptable_l1))
link->aspm_capable &= ~ASPM_STATE_L1;
l1_switch_latency += 1000;

@@ -664,7 +659,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)

/* Get and check endpoint acceptable latencies */
list_for_each_entry(child, &linkbus->devices, bus_list) {
-
if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT &&
pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)
continue;
--
2.20.1

2021-09-29 00:44:12

by Saheed O. Bolarinwa

[permalink] [raw]
Subject: [RFC PATCH v3 2/3] PCI/ASPM: Remove struct pcie_link_state.acceptable

The acceptable latencies for each device on the bus are calculated within
pcie_aspm_cap_init() and cached in struct pcie_link_state.acceptable.
They are only used within pcie_aspm_check_latency() to validate actual
latencies. Thus, it is possible to avoid caching these values.

This patch:
- removes `acceptable` from struct pcie_link_state
- calculates the acceptable latency for each device directly
- removes the calculations done within pcie_aspm_cap_init()

Signed-off-by: Saheed O. Bolarinwa <[email protected]>
---
drivers/pci/pcie/aspm.c | 27 ++++++++-------------------
1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 9e85dfc56657..0c0c055823f1 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -65,12 +65,6 @@ struct pcie_link_state {
u32 clkpm_enabled:1; /* Current Clock PM state */
u32 clkpm_default:1; /* Default Clock PM state by BIOS */
u32 clkpm_disable:1; /* Clock PM disabled */
-
- /*
- * Endpoint acceptable latencies. A pcie downstream port only
- * has one slot under it, so at most there are 8 functions.
- */
- struct aspm_latency acceptable[8];
};

static int aspm_disabled, aspm_force;
@@ -389,7 +383,7 @@ static struct pci_dev *pci_function_0(struct pci_bus *linkbus)

static void pcie_aspm_check_latency(struct pci_dev *endpoint)
{
- u32 latency, lnkcap_up, lnkcap_dw, l1_switch_latency = 0;
+ u32 reg32, latency, encoding, lnkcap_up, lnkcap_dw, l1_switch_latency = 0;
struct pci_dev *downstream;
struct aspm_latency latency_up, latency_dw;
struct aspm_latency *acceptable;
@@ -402,7 +396,13 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)

link = endpoint->bus->self->link_state;
downstream = pci_function_0(link->pdev->subordinate);
- acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)];
+ pcie_capability_read_dword(endpoint, PCI_EXP_DEVCAP, &reg32);
+ /* Calculate endpoint L0s acceptable latency */
+ encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
+ acceptable->l0s = calc_l0s_acceptable(encoding);
+ /* Calculate endpoint L1 acceptable latency */
+ encoding = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
+ acceptable->l1 = calc_l1_acceptable(encoding);

while (link) {
/* Read direction exit latencies */
@@ -664,22 +664,11 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)

/* Get and check endpoint acceptable latencies */
list_for_each_entry(child, &linkbus->devices, bus_list) {
- u32 reg32, encoding;
- struct aspm_latency *acceptable =
- &link->acceptable[PCI_FUNC(child->devfn)];

if (pci_pcie_type(child) != PCI_EXP_TYPE_ENDPOINT &&
pci_pcie_type(child) != PCI_EXP_TYPE_LEG_END)
continue;

- pcie_capability_read_dword(child, PCI_EXP_DEVCAP, &reg32);
- /* Calculate endpoint L0s acceptable latency */
- encoding = (reg32 & PCI_EXP_DEVCAP_L0S) >> 6;
- acceptable->l0s = calc_l0s_acceptable(encoding);
- /* Calculate endpoint L1 acceptable latency */
- encoding = (reg32 & PCI_EXP_DEVCAP_L1) >> 9;
- acceptable->l1 = calc_l1_acceptable(encoding);
-
pcie_aspm_check_latency(child);
}
}
--
2.20.1

2021-09-30 23:38:33

by Bjorn Helgaas

[permalink] [raw]
Subject: Re: [RFC PATCH v3 1/3] PCI/ASPM: Do not cache link latencies

On Wed, Sep 29, 2021 at 02:41:14AM +0200, Saheed O. Bolarinwa wrote:
> The latencies of the upstream and downstream are calculated within
> pcie_aspm_cap_init() and cached in struct pcie_link_state.latency_*
> These values are only used in pcie_aspm_check_latency() where they are
> compared with the acceptable latencies on the link.
>
> This patch:
> - removes `latency_*` entries from struct pcie_link_state.
> - calculates the latencies directly where they are needed.
> - moves pci_function_0() upward, so that the downstream device can be
> obtained by calling it directly.

Ideally I would put the move in its own preliminary patch so the
important parts of this patch are more visible.

> - further removes dependencies on struct pcie_link_state.
>
> Signed-off-by: Saheed O. Bolarinwa <[email protected]>
> ---
> drivers/pci/pcie/aspm.c | 54 ++++++++++++++++++++++-------------------
> 1 file changed, 29 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
> index 013a47f587ce..9e85dfc56657 100644
> --- a/drivers/pci/pcie/aspm.c
> +++ b/drivers/pci/pcie/aspm.c
> @@ -66,9 +66,6 @@ struct pcie_link_state {
> u32 clkpm_default:1; /* Default Clock PM state by BIOS */
> u32 clkpm_disable:1; /* Clock PM disabled */
>
> - /* Exit latencies */
> - struct aspm_latency latency_up; /* Upstream direction exit latency */
> - struct aspm_latency latency_dw; /* Downstream direction exit latency */
> /*
> * Endpoint acceptable latencies. A pcie downstream port only
> * has one slot under it, so at most there are 8 functions.
> @@ -376,9 +373,25 @@ static void encode_l12_threshold(u32 threshold_us, u32 *scale, u32 *value)
> }
> }
>
> +/*
> + * The L1 PM substate capability is only implemented in function 0 in a
> + * multi function device.
> + */
> +static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
> +{
> + struct pci_dev *child;
> +
> + list_for_each_entry(child, &linkbus->devices, bus_list)
> + if (PCI_FUNC(child->devfn) == 0)
> + return child;
> + return NULL;
> +}
> +
> static void pcie_aspm_check_latency(struct pci_dev *endpoint)
> {
> - u32 latency, l1_switch_latency = 0;
> + u32 latency, lnkcap_up, lnkcap_dw, l1_switch_latency = 0;
> + struct pci_dev *downstream;
> + struct aspm_latency latency_up, latency_dw;
> struct aspm_latency *acceptable;
> struct pcie_link_state *link;
>
> @@ -388,17 +401,26 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
> return;
>
> link = endpoint->bus->self->link_state;
> + downstream = pci_function_0(link->pdev->subordinate);

This looks like it's in the wrong patch. Isn't "downstream" still the
same as link->downstream? I think you have another patch that removes
pcie_link_state.downstream, and this hunk should go in that patch.

> acceptable = &link->acceptable[PCI_FUNC(endpoint->devfn)];
>
> while (link) {
> + /* Read direction exit latencies */
> + pcie_capability_read_dword(link->pdev, PCI_EXP_LNKCAP, &lnkcap_up);
> + pcie_capability_read_dword(downstream, PCI_EXP_LNKCAP, &lnkcap_dw);
> + latency_up.l0s = calc_l0s_latency(lnkcap_up);
> + latency_up.l1 = calc_l1_latency(lnkcap_up);
> + latency_dw.l0s = calc_l0s_latency(lnkcap_dw);
> + latency_dw.l1 = calc_l1_latency(lnkcap_dw);

I like this a lot.

> /* Check upstream direction L0s latency */
> if ((link->aspm_capable & ASPM_STATE_L0S_UP) &&
> - (link->latency_up.l0s > acceptable->l0s))
> + (latency_up.l0s > acceptable->l0s))
> link->aspm_capable &= ~ASPM_STATE_L0S_UP;
>
> /* Check downstream direction L0s latency */
> if ((link->aspm_capable & ASPM_STATE_L0S_DW) &&
> - (link->latency_dw.l0s > acceptable->l0s))
> + (latency_dw.l0s > acceptable->l0s))
> link->aspm_capable &= ~ASPM_STATE_L0S_DW;
> /*
> * Check L1 latency.
> @@ -413,7 +435,7 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
> * L1 exit latencies advertised by a device include L1
> * substate latencies (and hence do not do any check).
> */
> - latency = max_t(u32, link->latency_up.l1, link->latency_dw.l1);
> + latency = max_t(u32, latency_up.l1, latency_dw.l1);
> if ((link->aspm_capable & ASPM_STATE_L1) &&
> (latency + l1_switch_latency > acceptable->l1))
> link->aspm_capable &= ~ASPM_STATE_L1;
> @@ -423,20 +445,6 @@ static void pcie_aspm_check_latency(struct pci_dev *endpoint)
> }
> }
>
> -/*
> - * The L1 PM substate capability is only implemented in function 0 in a
> - * multi function device.
> - */
> -static struct pci_dev *pci_function_0(struct pci_bus *linkbus)
> -{
> - struct pci_dev *child;
> -
> - list_for_each_entry(child, &linkbus->devices, bus_list)
> - if (PCI_FUNC(child->devfn) == 0)
> - return child;
> - return NULL;
> -}
> -
> static void pci_clear_and_set_dword(struct pci_dev *pdev, int pos,
> u32 clear, u32 set)
> {
> @@ -593,8 +601,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
> link->aspm_enabled |= ASPM_STATE_L0S_UP;
> if (parent_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S)
> link->aspm_enabled |= ASPM_STATE_L0S_DW;
> - link->latency_up.l0s = calc_l0s_latency(parent_lnkcap);
> - link->latency_dw.l0s = calc_l0s_latency(child_lnkcap);
>
> /* Setup L1 state */
> if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1)
> @@ -602,8 +608,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist)
>
> if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1)
> link->aspm_enabled |= ASPM_STATE_L1;
> - link->latency_up.l1 = calc_l1_latency(parent_lnkcap);
> - link->latency_dw.l1 = calc_l1_latency(child_lnkcap);
>
> /* Setup L1 substate */
> pci_read_config_dword(parent, parent->l1ss + PCI_L1SS_CAP,
> --
> 2.20.1
>