2014-04-28 22:05:25

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 0/5] RAPL driver updates

A few updates to the RAPL driver. Including David's IOSF-SB driver change
and a small tweak to allow RAPL driver to use IOSF driver on platforms
with and without IOSF mailbox interface.

David E. Box (1):
x86, iosf: Add dummy functions for loadable modules

Jacob Pan (4):
powercap/rapl: further relax energy counter checks
powercap/rapl: add new cpu ids
x86/iosf: kconfig and used by other drivers
powercap/rapl: change floor frequency for vallewview

arch/x86/Kconfig | 2 +-
arch/x86/include/asm/iosf_mbi.h | 34 +++++++++++++++++++++++
arch/x86/kernel/iosf_mbi.c | 6 +++++
drivers/powercap/intel_rapl.c | 60 +++++++++++++++++++++--------------------
4 files changed, 72 insertions(+), 30 deletions(-)

--
1.8.1.2


2014-04-28 22:05:27

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 1/5] powercap/rapl: further relax energy counter checks

Energy counters may roll slowly for some RAPL domains, checking all
of them can be time consuming and takes unpredictable amount of time.
Therefore, we relax the sanity check by only checking availability of the
MSRs and non-zero value of the energy status counters. It has been shown
sufficient for all the platforms tested to filter out inactive domains.

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index d9a0770..1c987d2 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1124,8 +1124,7 @@ err_cleanup_package:
static int rapl_check_domain(int cpu, int domain)
{
unsigned msr;
- u64 val1, val2 = 0;
- int retry = 0;
+ u64 val = 0;

switch (domain) {
case RAPL_DOMAIN_PACKAGE:
@@ -1144,26 +1143,13 @@ static int rapl_check_domain(int cpu, int domain)
pr_err("invalid domain id %d\n", domain);
return -EINVAL;
}
- if (rdmsrl_safe_on_cpu(cpu, msr, &val1))
- return -ENODEV;
-
- /* PP1/uncore/graphics domain may not be active at the time of
- * driver loading. So skip further checks.
+ /* make sure domain counters are available and contains non-zero
+ * values, otherwise skip it.
*/
- if (domain == RAPL_DOMAIN_PP1)
- return 0;
- /* energy counters roll slowly on some domains */
- while (++retry < 10) {
- usleep_range(10000, 15000);
- rdmsrl_safe_on_cpu(cpu, msr, &val2);
- if ((val1 & ENERGY_STATUS_MASK) != (val2 & ENERGY_STATUS_MASK))
- return 0;
- }
- /* if energy counter does not change, report as bad domain */
- pr_info("domain %s energy ctr %llu:%llu not working, skip\n",
- rapl_domain_names[domain], val1, val2);
+ if (rdmsrl_safe_on_cpu(cpu, msr, &val) || !val)
+ return -ENODEV;

- return -ENODEV;
+ return 0;
}

/* Detect active and valid domains for the given CPU, caller must
@@ -1180,6 +1166,9 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
/* use physical package id to read counters */
if (!rapl_check_domain(cpu, i))
rp->domain_map |= 1 << i;
+ else
+ pr_warn("RAPL domain %s detection failed\n",
+ rapl_domain_names[i]);
}
rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
if (!rp->nr_domains) {
--
1.8.1.2

2014-04-28 22:05:41

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview

RAPL power limit reduce power by limiting CPU P-state and
other techniques. On Valleyview, RAPL power limit cannot
go to LFM (low frequency mode) if we don't set the floor
frequency via IOSF mailbox.

This patch enables setting of floor frquency such that
RAPL power limit is more effective.

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index b1cda6f..13e4776 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -32,6 +32,7 @@

#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <asm/iosf_mbi.h>

/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
@@ -336,11 +337,17 @@ static int find_nr_power_limit(struct rapl_domain *rd)
return i;
}

+#define VLV_CPU_POWER_BUDGET_CTL (0x2)
+static const struct x86_cpu_id valleyview_id[] = {
+ { X86_VENDOR_INTEL, 6, 0x37},
+ {}
+};
+
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
int nr_powerlimit;
-
+ u32 mdata = 0;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
get_online_cpus();
@@ -350,7 +357,16 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
- rapl_write_data_raw(rd, PL1_CLAMP, mode);
+ if (x86_match_cpu(valleyview_id)) {
+ iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
+ VLV_CPU_POWER_BUDGET_CTL, &mdata);
+ mdata &= ~(0x7f << 8);
+ mdata |= 1 << 8;
+ iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
+ VLV_CPU_POWER_BUDGET_CTL, mdata);
+ } else
+ rapl_write_data_raw(rd, PL1_CLAMP, mode);
+
/* some domains have pl2 */
if (nr_powerlimit > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, mode);
@@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
return 0;
}

-static const struct x86_cpu_id energy_unit_quirk_ids[] = {
- { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
- {}
-};
-
static int rapl_check_unit(struct rapl_package *rp, int cpu)
{
u64 msr_val;
@@ -859,7 +870,7 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
*/
value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
/* some CPUs have different way to calculate energy unit */
- if (x86_match_cpu(energy_unit_quirk_ids))
+ if (x86_match_cpu(valleyview_id))
rp->energy_unit_divisor = 1000000 / (1 << value);
else
rp->energy_unit_divisor = 1 << value;
--
1.8.1.2

2014-04-28 22:05:56

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 4/5] x86/iosf: kconfig and used by other drivers

Allow Kconfig selection for IOSF driver. Fix warning condition
when MBI interface dummy functions are called at runtime.

Signed-off-by: Jacob Pan <[email protected]>
---
arch/x86/Kconfig | 2 +-
arch/x86/include/asm/iosf_mbi.h | 9 +++++----
2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 25d2c6f..cda587b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2375,7 +2375,7 @@ config X86_DMA_REMAP
depends on STA2X11

config IOSF_MBI
- bool
+ bool "Intel OnChip System Fabric mailbox"
depends on PCI
---help---
To be selected by modules requiring access to the Intel OnChip System
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 9fc5402..74336f0 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -5,7 +5,6 @@
#ifndef IOSF_MBI_SYMS_H
#define IOSF_MBI_SYMS_H

-#ifdef CONFIG_IOSF_MBI

#define MBI_MCR_OFFSET 0xD0
#define MBI_MDR_OFFSET 0xD4
@@ -52,6 +51,8 @@
#define BT_MBI_PCIE_READ 0x00
#define BT_MBI_PCIE_WRITE 0x01

+#ifdef CONFIG_IOSF_MBI
+
bool iosf_mbi_available(void);

/**
@@ -101,21 +102,21 @@ bool iosf_mbi_available(void)
static inline
int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
{
- WARN();
+ WARN(1, "MBI driver not available");
return -EPERM;
}

static inline
int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
{
- WARN();
+ WARN(1, "MBI driver not available");
return -EPERM;
}

static inline
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
{
- WARN();
+ WARN(1, "MBI driver not available");
return -EPERM;
}
#endif /* CONFIG_IOSF_MBI */
--
1.8.1.2

2014-04-28 22:06:26

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 3/5] x86, iosf: Add dummy functions for loadable modules

From: "David E. Box" <[email protected]>

Some loadable modules only need IOSF access on the platforms where it exists.
Provide dummy functions to allow these modules to compile and load on the
platforms where it doesn't exist.

Signed-off-by: David E. Box <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: H. Peter Anvin <[email protected]>
---
arch/x86/include/asm/iosf_mbi.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kernel/iosf_mbi.c | 6 ++++++
2 files changed, 39 insertions(+)

diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 8e71c79..9fc5402 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -5,6 +5,8 @@
#ifndef IOSF_MBI_SYMS_H
#define IOSF_MBI_SYMS_H

+#ifdef CONFIG_IOSF_MBI
+
#define MBI_MCR_OFFSET 0xD0
#define MBI_MDR_OFFSET 0xD4
#define MBI_MCRX_OFFSET 0xD8
@@ -50,6 +52,8 @@
#define BT_MBI_PCIE_READ 0x00
#define BT_MBI_PCIE_WRITE 0x01

+bool iosf_mbi_available(void);
+
/**
* iosf_mbi_read() - MailBox Interface read command
* @port: port indicating subunit being accessed
@@ -87,4 +91,33 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
*/
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);

+#else /* CONFIG_IOSF_MBI is not enabled */
+static inline
+bool iosf_mbi_available(void)
+{
+ return false;
+}
+
+static inline
+int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
+{
+ WARN();
+ return -EPERM;
+}
+
+static inline
+int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
+{
+ WARN();
+ return -EPERM;
+}
+
+static inline
+int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
+{
+ WARN();
+ return -EPERM;
+}
+#endif /* CONFIG_IOSF_MBI */
+
#endif /* IOSF_MBI_SYMS_H */
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c
index c3aae66..d3803c6 100644
--- a/arch/x86/kernel/iosf_mbi.c
+++ b/arch/x86/kernel/iosf_mbi.c
@@ -177,6 +177,12 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
}
EXPORT_SYMBOL(iosf_mbi_modify);

+bool iosf_mbi_available(void)
+{
+ return mbi_pdev;
+}
+EXPORT_SYMBOL(iosf_mbi_available);
+
static int iosf_mbi_probe(struct pci_dev *pdev,
const struct pci_device_id *unused)
{
--
1.8.1.2

2014-04-28 22:06:27

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 2/5] powercap/rapl: add new cpu ids

Adding support for Broadwell model 0x3d and HSW model (0x3c)

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 1c987d2..b1cda6f 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -951,7 +951,9 @@ static const struct x86_cpu_id rapl_ids[] = {
{ X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */
{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
{ X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */
- { X86_VENDOR_INTEL, 6, 0x45},/* Haswell */
+ { X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */
+ { X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */
+ { X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */
/* TODO: Add more CPU IDs after testing */
{}
};
--
1.8.1.2

2014-04-29 02:45:30

by R, Durgadoss

[permalink] [raw]
Subject: RE: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview

Hi Jacob,

> -----Original Message-----
> From: Jacob Pan [mailto:[email protected]]
> Sent: Monday, April 28, 2014 7:35 PM
> To: Linux PM; Wysocki, Rafael J; LKML
> Cc: David E. Box; Alan Cox; R, Durgadoss; Accardi, Kristen C; Jacob Pan
> Subject: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview
>
> RAPL power limit reduce power by limiting CPU P-state and
> other techniques. On Valleyview, RAPL power limit cannot
> go to LFM (low frequency mode) if we don't set the floor
> frequency via IOSF mailbox.
>
> This patch enables setting of floor frquency such that
> RAPL power limit is more effective.
>
> Signed-off-by: Jacob Pan <[email protected]>
> ---
> drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
> 1 file changed, 19 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
> index b1cda6f..13e4776 100644
> --- a/drivers/powercap/intel_rapl.c
> +++ b/drivers/powercap/intel_rapl.c
> @@ -32,6 +32,7 @@
>
> #include <asm/processor.h>
> #include <asm/cpu_device_id.h>
> +#include <asm/iosf_mbi.h>
>
> /* bitmasks for RAPL MSRs, used by primitive access functions */
> #define ENERGY_STATUS_MASK 0xffffffff
> @@ -336,11 +337,17 @@ static int find_nr_power_limit(struct rapl_domain *rd)
> return i;
> }
>
> +#define VLV_CPU_POWER_BUDGET_CTL (0x2)
> +static const struct x86_cpu_id valleyview_id[] = {
> + { X86_VENDOR_INTEL, 6, 0x37},
> + {}
> +};

There are other platforms that have this FloorFreq register as well.
And those addresses are not '0x02'. So, we need to have a cpu_id based
table to define the address of the floor freq register as well.
[This is not specific to valleyview.]

Also, is there a plan to expose this floor freq ratio through Sysfs for
runtime configuration. ? May be through a standard thermal cooling device
interface ?

> +
> static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
> {
> struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
> int nr_powerlimit;
> -
> + u32 mdata = 0;
> if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
> return -EACCES;
> get_online_cpus();
> @@ -350,7 +357,16 @@ static int set_domain_enable(struct powercap_zone
> *power_zone, bool mode)
> /* always enable clamp such that p-state can go below OS requested
> * range. power capping priority over guranteed frequency.
> */
> - rapl_write_data_raw(rd, PL1_CLAMP, mode);
> + if (x86_match_cpu(valleyview_id)) {
> + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
> + VLV_CPU_POWER_BUDGET_CTL, &mdata);
> + mdata &= ~(0x7f << 8);
> + mdata |= 1 << 8;
> + iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
> + VLV_CPU_POWER_BUDGET_CTL, mdata);
> + } else
> + rapl_write_data_raw(rd, PL1_CLAMP, mode);
> +
> /* some domains have pl2 */
> if (nr_powerlimit > 1) {
> rapl_write_data_raw(rd, PL2_ENABLE, mode);
> @@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
> return 0;
> }
>
> -static const struct x86_cpu_id energy_unit_quirk_ids[] = {
> - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
> - {}
> -};

Same thing here. There are other Atom platforms that need this
conversion quirk. So, please keep the table as is.

Thanks,
Durga

> -
> static int rapl_check_unit(struct rapl_package *rp, int cpu)
> {
> u64 msr_val;
> @@ -859,7 +870,7 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
> */
> value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
> /* some CPUs have different way to calculate energy unit */
> - if (x86_match_cpu(energy_unit_quirk_ids))
> + if (x86_match_cpu(valleyview_id))
> rp->energy_unit_divisor = 1000000 / (1 << value);
> else
> rp->energy_unit_divisor = 1 << value;
> --
> 1.8.1.2

2014-04-29 13:02:44

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview

On Tue, 29 Apr 2014 02:45:22 +0000
"R, Durgadoss" <[email protected]> wrote:

> Hi Jacob,
>
> > -----Original Message-----
> > From: Jacob Pan [mailto:[email protected]]
> > Sent: Monday, April 28, 2014 7:35 PM
> > To: Linux PM; Wysocki, Rafael J; LKML
> > Cc: David E. Box; Alan Cox; R, Durgadoss; Accardi, Kristen C; Jacob
> > Pan Subject: [PATCH 5/5] powercap/rapl: change floor frequency for
> > vallewview
> >
> > RAPL power limit reduce power by limiting CPU P-state and
> > other techniques. On Valleyview, RAPL power limit cannot
> > go to LFM (low frequency mode) if we don't set the floor
> > frequency via IOSF mailbox.
> >
> > This patch enables setting of floor frquency such that
> > RAPL power limit is more effective.
> >
> > Signed-off-by: Jacob Pan <[email protected]>
> > ---
> > drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
> > 1 file changed, 19 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/powercap/intel_rapl.c
> > b/drivers/powercap/intel_rapl.c index b1cda6f..13e4776 100644
> > --- a/drivers/powercap/intel_rapl.c
> > +++ b/drivers/powercap/intel_rapl.c
> > @@ -32,6 +32,7 @@
> >
> > #include <asm/processor.h>
> > #include <asm/cpu_device_id.h>
> > +#include <asm/iosf_mbi.h>
> >
> > /* bitmasks for RAPL MSRs, used by primitive access functions */
> > #define ENERGY_STATUS_MASK 0xffffffff
> > @@ -336,11 +337,17 @@ static int find_nr_power_limit(struct
> > rapl_domain *rd) return i;
> > }
> >
> > +#define VLV_CPU_POWER_BUDGET_CTL (0x2)
> > +static const struct x86_cpu_id valleyview_id[] = {
> > + { X86_VENDOR_INTEL, 6, 0x37},
> > + {}
> > +};
>
> There are other platforms that have this FloorFreq register as well.
> And those addresses are not '0x02'. So, we need to have a cpu_id based
> table to define the address of the floor freq register as well.
> [This is not specific to valleyview.]
>
Sounds like I need to add an abstraction to capture this. So far, there
are only two exceptions so i was hesitate to do so. Thanks for the
input.
> Also, is there a plan to expose this floor freq ratio through Sysfs
> for runtime configuration. ? May be through a standard thermal
> cooling device interface ?
>
why would that be necessary? who will use it? floor freq only affects
RAPL, AFAIK. In Linux there is no guaranteed freq anyway. My original
patch to enable RAPL as cooling device was abandoned in favor of
powercap framework, I am not sure if we should go back.

> > +
> > static int set_domain_enable(struct powercap_zone *power_zone,
> > bool mode) {
> > struct rapl_domain *rd =
> > power_zone_to_rapl_domain(power_zone); int nr_powerlimit;
> > -
> > + u32 mdata = 0;
> > if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
> > return -EACCES;
> > get_online_cpus();
> > @@ -350,7 +357,16 @@ static int set_domain_enable(struct
> > powercap_zone *power_zone, bool mode)
> > /* always enable clamp such that p-state can go below OS
> > requested
> > * range. power capping priority over guranteed frequency.
> > */
> > - rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > + if (x86_match_cpu(valleyview_id)) {
> > + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
> > + VLV_CPU_POWER_BUDGET_CTL, &mdata);
> > + mdata &= ~(0x7f << 8);
> > + mdata |= 1 << 8;
> > + iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
> > + VLV_CPU_POWER_BUDGET_CTL, mdata);
> > + } else
> > + rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > +
> > /* some domains have pl2 */
> > if (nr_powerlimit > 1) {
> > rapl_write_data_raw(rd, PL2_ENABLE, mode);
> > @@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct
> > rapl_domain *rd, return 0;
> > }
> >
> > -static const struct x86_cpu_id energy_unit_quirk_ids[] = {
> > - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
> > - {}
> > -};
>
> Same thing here. There are other Atom platforms that need this
> conversion quirk. So, please keep the table as is.
>
> Thanks,
> Durga
>
> > -
> > static int rapl_check_unit(struct rapl_package *rp, int cpu)
> > {
> > u64 msr_val;
> > @@ -859,7 +870,7 @@ static int rapl_check_unit(struct rapl_package
> > *rp, int cpu) */
> > value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
> > /* some CPUs have different way to calculate energy unit */
> > - if (x86_match_cpu(energy_unit_quirk_ids))
> > + if (x86_match_cpu(valleyview_id))
> > rp->energy_unit_divisor = 1000000 / (1 << value);
> > else
> > rp->energy_unit_divisor = 1 << value;
> > --
> > 1.8.1.2
>

[Jacob Pan]

2014-04-29 14:40:57

by R, Durgadoss

[permalink] [raw]
Subject: RE: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview

> -----Original Message-----
> From: Jacob Pan [mailto:[email protected]]
> Sent: Tuesday, April 29, 2014 6:33 PM
> To: R, Durgadoss
> Cc: Linux PM; Wysocki, Rafael J; LKML; David E. Box; Alan Cox; Accardi, Kristen C
> Subject: Re: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview
>
> On Tue, 29 Apr 2014 02:45:22 +0000
> "R, Durgadoss" <[email protected]> wrote:
>
> > Hi Jacob,
> >
> > > -----Original Message-----
> > > From: Jacob Pan [mailto:[email protected]]
> > > Sent: Monday, April 28, 2014 7:35 PM
> > > To: Linux PM; Wysocki, Rafael J; LKML
> > > Cc: David E. Box; Alan Cox; R, Durgadoss; Accardi, Kristen C; Jacob
> > > Pan Subject: [PATCH 5/5] powercap/rapl: change floor frequency for
> > > vallewview
> > >
> > > RAPL power limit reduce power by limiting CPU P-state and
> > > other techniques. On Valleyview, RAPL power limit cannot
> > > go to LFM (low frequency mode) if we don't set the floor
> > > frequency via IOSF mailbox.
> > >
> > > This patch enables setting of floor frquency such that
> > > RAPL power limit is more effective.
> > >
> > > Signed-off-by: Jacob Pan <[email protected]>
> > > ---
> > > drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
> > > 1 file changed, 19 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/powercap/intel_rapl.c
> > > b/drivers/powercap/intel_rapl.c index b1cda6f..13e4776 100644
> > > --- a/drivers/powercap/intel_rapl.c
> > > +++ b/drivers/powercap/intel_rapl.c
> > > @@ -32,6 +32,7 @@
> > >
> > > #include <asm/processor.h>
> > > #include <asm/cpu_device_id.h>
> > > +#include <asm/iosf_mbi.h>
> > >
> > > /* bitmasks for RAPL MSRs, used by primitive access functions */
> > > #define ENERGY_STATUS_MASK 0xffffffff
> > > @@ -336,11 +337,17 @@ static int find_nr_power_limit(struct
> > > rapl_domain *rd) return i;
> > > }
> > >
> > > +#define VLV_CPU_POWER_BUDGET_CTL (0x2)
> > > +static const struct x86_cpu_id valleyview_id[] = {
> > > + { X86_VENDOR_INTEL, 6, 0x37},
> > > + {}
> > > +};
> >
> > There are other platforms that have this FloorFreq register as well.
> > And those addresses are not '0x02'. So, we need to have a cpu_id based
> > table to define the address of the floor freq register as well.
> > [This is not specific to valleyview.]
> >
> Sounds like I need to add an abstraction to capture this. So far, there
> are only two exceptions so i was hesitate to do so. Thanks for the
> input.

Yes, We at least have few platforms that need this.

> > Also, is there a plan to expose this floor freq ratio through Sysfs
> > for runtime configuration. ? May be through a standard thermal
> > cooling device interface ?
> >
> why would that be necessary? who will use it? floor freq only affects
> RAPL, AFAIK. In Linux there is no guaranteed freq anyway. My original
> patch to enable RAPL as cooling device was abandoned in favor of
> powercap framework, I am not sure if we should go back.

There are user space thermal controls which change RAPL Power limits
according to platform's thermal condition as you might be aware.

The floor frequency is not used only to transition to LFM ratio. We can
transition to any frequency ratio by adjusting this floor frequency
(at least on VLV and couple more platforms)

Hence while changing RAPL Power Limits, there is a need to adjust
this also, to specify which ratio is our Floor (basically we will not
go below that). That's why we need an interface for modifying this
at run time (along with Power Limits).

Thanks,
Durga

> > > +
> > > static int set_domain_enable(struct powercap_zone *power_zone,
> > > bool mode) {
> > > struct rapl_domain *rd =
> > > power_zone_to_rapl_domain(power_zone); int nr_powerlimit;
> > > -
> > > + u32 mdata = 0;
> > > if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
> > > return -EACCES;
> > > get_online_cpus();
> > > @@ -350,7 +357,16 @@ static int set_domain_enable(struct
> > > powercap_zone *power_zone, bool mode)
> > > /* always enable clamp such that p-state can go below OS
> > > requested
> > > * range. power capping priority over guranteed frequency.
> > > */
> > > - rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > > + if (x86_match_cpu(valleyview_id)) {
> > > + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
> > > + VLV_CPU_POWER_BUDGET_CTL, &mdata);
> > > + mdata &= ~(0x7f << 8);
> > > + mdata |= 1 << 8;
> > > + iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
> > > + VLV_CPU_POWER_BUDGET_CTL, mdata);
> > > + } else
> > > + rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > > +
> > > /* some domains have pl2 */
> > > if (nr_powerlimit > 1) {
> > > rapl_write_data_raw(rd, PL2_ENABLE, mode);
> > > @@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct
> > > rapl_domain *rd, return 0;
> > > }
> > >
> > > -static const struct x86_cpu_id energy_unit_quirk_ids[] = {
> > > - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
> > > - {}
> > > -};
> >
> > Same thing here. There are other Atom platforms that need this
> > conversion quirk. So, please keep the table as is.
> >
> > Thanks,
> > Durga
> >
> > > -
> > > static int rapl_check_unit(struct rapl_package *rp, int cpu)
> > > {
> > > u64 msr_val;
> > > @@ -859,7 +870,7 @@ static int rapl_check_unit(struct rapl_package
> > > *rp, int cpu) */
> > > value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
> > > /* some CPUs have different way to calculate energy unit */
> > > - if (x86_match_cpu(energy_unit_quirk_ids))
> > > + if (x86_match_cpu(valleyview_id))
> > > rp->energy_unit_divisor = 1000000 / (1 << value);
> > > else
> > > rp->energy_unit_divisor = 1 << value;
> > > --
> > > 1.8.1.2
> >
>
> [Jacob Pan]

2014-04-29 16:25:42

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 5/5] powercap/rapl: change floor frequency for vallewview

On Tue, 29 Apr 2014 14:40:37 +0000
"R, Durgadoss" <[email protected]> wrote:

> > -----Original Message-----
> > From: Jacob Pan [mailto:[email protected]]
> > Sent: Tuesday, April 29, 2014 6:33 PM
> > To: R, Durgadoss
> > Cc: Linux PM; Wysocki, Rafael J; LKML; David E. Box; Alan Cox;
> > Accardi, Kristen C Subject: Re: [PATCH 5/5] powercap/rapl: change
> > floor frequency for vallewview
> >
> > On Tue, 29 Apr 2014 02:45:22 +0000
> > "R, Durgadoss" <[email protected]> wrote:
> >
> > > Hi Jacob,
> > >
> > > > -----Original Message-----
> > > > From: Jacob Pan [mailto:[email protected]]
> > > > Sent: Monday, April 28, 2014 7:35 PM
> > > > To: Linux PM; Wysocki, Rafael J; LKML
> > > > Cc: David E. Box; Alan Cox; R, Durgadoss; Accardi, Kristen C;
> > > > Jacob Pan Subject: [PATCH 5/5] powercap/rapl: change floor
> > > > frequency for vallewview
> > > >
> > > > RAPL power limit reduce power by limiting CPU P-state and
> > > > other techniques. On Valleyview, RAPL power limit cannot
> > > > go to LFM (low frequency mode) if we don't set the floor
> > > > frequency via IOSF mailbox.
> > > >
> > > > This patch enables setting of floor frquency such that
> > > > RAPL power limit is more effective.
> > > >
> > > > Signed-off-by: Jacob Pan <[email protected]>
> > > > ---
> > > > drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
> > > > 1 file changed, 19 insertions(+), 8 deletions(-)
> > > >
> > > > diff --git a/drivers/powercap/intel_rapl.c
> > > > b/drivers/powercap/intel_rapl.c index b1cda6f..13e4776 100644
> > > > --- a/drivers/powercap/intel_rapl.c
> > > > +++ b/drivers/powercap/intel_rapl.c
> > > > @@ -32,6 +32,7 @@
> > > >
> > > > #include <asm/processor.h>
> > > > #include <asm/cpu_device_id.h>
> > > > +#include <asm/iosf_mbi.h>
> > > >
> > > > /* bitmasks for RAPL MSRs, used by primitive access functions
> > > > */ #define ENERGY_STATUS_MASK 0xffffffff
> > > > @@ -336,11 +337,17 @@ static int find_nr_power_limit(struct
> > > > rapl_domain *rd) return i;
> > > > }
> > > >
> > > > +#define VLV_CPU_POWER_BUDGET_CTL (0x2)
> > > > +static const struct x86_cpu_id valleyview_id[] = {
> > > > + { X86_VENDOR_INTEL, 6, 0x37},
> > > > + {}
> > > > +};
> > >
> > > There are other platforms that have this FloorFreq register as
> > > well. And those addresses are not '0x02'. So, we need to have a
> > > cpu_id based table to define the address of the floor freq
> > > register as well. [This is not specific to valleyview.]
> > >
> > Sounds like I need to add an abstraction to capture this. So far,
> > there are only two exceptions so i was hesitate to do so. Thanks
> > for the input.
>
> Yes, We at least have few platforms that need this.
>
> > > Also, is there a plan to expose this floor freq ratio through
> > > Sysfs for runtime configuration. ? May be through a standard
> > > thermal cooling device interface ?
> > >
> > why would that be necessary? who will use it? floor freq only
> > affects RAPL, AFAIK. In Linux there is no guaranteed freq anyway.
> > My original patch to enable RAPL as cooling device was abandoned in
> > favor of powercap framework, I am not sure if we should go back.
>
> There are user space thermal controls which change RAPL Power limits
> according to platform's thermal condition as you might be aware.
>
> The floor frequency is not used only to transition to LFM ratio. We
> can transition to any frequency ratio by adjusting this floor
> frequency (at least on VLV and couple more platforms)
>
> Hence while changing RAPL Power Limits, there is a need to adjust
> this also, to specify which ratio is our Floor (basically we will not
> go below that). That's why we need an interface for modifying this
> at run time (along with Power Limits).
>
I understand. What I am proposing here is to have a single knob for
user control power, instead of two knobs (power limit and floor freq)
which may have conflicts. When thermal throttling is needed, user only
cares about power limit, that is why I think it is better to set floor
to LFM and let power limit be the only knob. It is simpler. In case
freq is a constraint, user should use cpufreq interface.

> Thanks,
> Durga
>
> > > > +
> > > > static int set_domain_enable(struct powercap_zone *power_zone,
> > > > bool mode) {
> > > > struct rapl_domain *rd =
> > > > power_zone_to_rapl_domain(power_zone); int nr_powerlimit;
> > > > -
> > > > + u32 mdata = 0;
> > > > if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
> > > > return -EACCES;
> > > > get_online_cpus();
> > > > @@ -350,7 +357,16 @@ static int set_domain_enable(struct
> > > > powercap_zone *power_zone, bool mode)
> > > > /* always enable clamp such that p-state can go below
> > > > OS requested
> > > > * range. power capping priority over guranteed
> > > > frequency. */
> > > > - rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > > > + if (x86_match_cpu(valleyview_id)) {
> > > > + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
> > > > + VLV_CPU_POWER_BUDGET_CTL, &mdata);
> > > > + mdata &= ~(0x7f << 8);
> > > > + mdata |= 1 << 8;
> > > > + iosf_mbi_write(BT_MBI_UNIT_PMC,
> > > > BT_MBI_PMC_WRITE,
> > > > + VLV_CPU_POWER_BUDGET_CTL, mdata);
> > > > + } else
> > > > + rapl_write_data_raw(rd, PL1_CLAMP, mode);
> > > > +
> > > > /* some domains have pl2 */
> > > > if (nr_powerlimit > 1) {
> > > > rapl_write_data_raw(rd, PL2_ENABLE, mode);
> > > > @@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct
> > > > rapl_domain *rd, return 0;
> > > > }
> > > >
> > > > -static const struct x86_cpu_id energy_unit_quirk_ids[] = {
> > > > - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
> > > > - {}
> > > > -};
> > >
> > > Same thing here. There are other Atom platforms that need this
> > > conversion quirk. So, please keep the table as is.
> > >
> > > Thanks,
> > > Durga
> > >
> > > > -
> > > > static int rapl_check_unit(struct rapl_package *rp, int cpu)
> > > > {
> > > > u64 msr_val;
> > > > @@ -859,7 +870,7 @@ static int rapl_check_unit(struct
> > > > rapl_package *rp, int cpu) */
> > > > value = (msr_val & ENERGY_UNIT_MASK) >>
> > > > ENERGY_UNIT_OFFSET; /* some CPUs have different way to
> > > > calculate energy unit */
> > > > - if (x86_match_cpu(energy_unit_quirk_ids))
> > > > + if (x86_match_cpu(valleyview_id))
> > > > rp->energy_unit_divisor = 1000000 / (1 <<
> > > > value); else
> > > > rp->energy_unit_divisor = 1 << value;
> > > > --
> > > > 1.8.1.2
> > >
> >
> > [Jacob Pan]

[Jacob Pan]

2014-04-29 22:35:39

by David E. Box

[permalink] [raw]
Subject: [PATCH v2 1/4] powercap/rapl: further relax energy counter checks

From: Jacob Pan <[email protected]>

Energy counters may roll slowly for some RAPL domains, checking all
of them can be time consuming and takes unpredictable amount of time.
Therefore, we relax the sanity check by only checking availability of the
MSRs and non-zero value of the energy status counters. It has been shown
sufficient for all the platforms tested to filter out inactive domains.

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 29 +++++++++--------------------
1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index d9a0770..1c987d2 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1124,8 +1124,7 @@ err_cleanup_package:
static int rapl_check_domain(int cpu, int domain)
{
unsigned msr;
- u64 val1, val2 = 0;
- int retry = 0;
+ u64 val = 0;

switch (domain) {
case RAPL_DOMAIN_PACKAGE:
@@ -1144,26 +1143,13 @@ static int rapl_check_domain(int cpu, int domain)
pr_err("invalid domain id %d\n", domain);
return -EINVAL;
}
- if (rdmsrl_safe_on_cpu(cpu, msr, &val1))
- return -ENODEV;
-
- /* PP1/uncore/graphics domain may not be active at the time of
- * driver loading. So skip further checks.
+ /* make sure domain counters are available and contains non-zero
+ * values, otherwise skip it.
*/
- if (domain == RAPL_DOMAIN_PP1)
- return 0;
- /* energy counters roll slowly on some domains */
- while (++retry < 10) {
- usleep_range(10000, 15000);
- rdmsrl_safe_on_cpu(cpu, msr, &val2);
- if ((val1 & ENERGY_STATUS_MASK) != (val2 & ENERGY_STATUS_MASK))
- return 0;
- }
- /* if energy counter does not change, report as bad domain */
- pr_info("domain %s energy ctr %llu:%llu not working, skip\n",
- rapl_domain_names[domain], val1, val2);
+ if (rdmsrl_safe_on_cpu(cpu, msr, &val) || !val)
+ return -ENODEV;

- return -ENODEV;
+ return 0;
}

/* Detect active and valid domains for the given CPU, caller must
@@ -1180,6 +1166,9 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
/* use physical package id to read counters */
if (!rapl_check_domain(cpu, i))
rp->domain_map |= 1 << i;
+ else
+ pr_warn("RAPL domain %s detection failed\n",
+ rapl_domain_names[i]);
}
rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
if (!rp->nr_domains) {
--
1.7.10.4

2014-04-29 22:35:37

by David E. Box

[permalink] [raw]
Subject: [PATCH v2 0/4] RAPL driver updates

From: "David E. Box" <[email protected]>

v2: iosf: Remove Kconfig exposure. Make mailbox driver a module.

David E. Box (1):
x86/iosf: Make IOSF driver modular and usable by more drivers

Jacob Pan (3):
powercap/rapl: further relax energy counter checks
powercap/rapl: add new cpu ids
powercap/rapl: change floor frequency for vallewview

arch/x86/Kconfig | 7 ++---
arch/x86/include/asm/iosf_mbi.h | 33 +++++++++++++++++++++
arch/x86/kernel/iosf_mbi.c | 6 ++++
drivers/powercap/intel_rapl.c | 60 ++++++++++++++++++++-------------------
4 files changed, 72 insertions(+), 34 deletions(-)

--
1.7.10.4

2014-04-29 22:36:01

by David E. Box

[permalink] [raw]
Subject: [PATCH v2 4/4] powercap/rapl: change floor frequency for vallewview

From: Jacob Pan <[email protected]>

RAPL power limit reduce power by limiting CPU P-state and
other techniques. On Valleyview, RAPL power limit cannot
go to LFM (low frequency mode) if we don't set the floor
frequency via IOSF mailbox.

This patch enables setting of floor frquency such that
RAPL power limit is more effective.

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index b1cda6f..13e4776 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -32,6 +32,7 @@

#include <asm/processor.h>
#include <asm/cpu_device_id.h>
+#include <asm/iosf_mbi.h>

/* bitmasks for RAPL MSRs, used by primitive access functions */
#define ENERGY_STATUS_MASK 0xffffffff
@@ -336,11 +337,17 @@ static int find_nr_power_limit(struct rapl_domain *rd)
return i;
}

+#define VLV_CPU_POWER_BUDGET_CTL (0x2)
+static const struct x86_cpu_id valleyview_id[] = {
+ { X86_VENDOR_INTEL, 6, 0x37},
+ {}
+};
+
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
int nr_powerlimit;
-
+ u32 mdata = 0;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
get_online_cpus();
@@ -350,7 +357,16 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
- rapl_write_data_raw(rd, PL1_CLAMP, mode);
+ if (x86_match_cpu(valleyview_id)) {
+ iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
+ VLV_CPU_POWER_BUDGET_CTL, &mdata);
+ mdata &= ~(0x7f << 8);
+ mdata |= 1 << 8;
+ iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
+ VLV_CPU_POWER_BUDGET_CTL, mdata);
+ } else
+ rapl_write_data_raw(rd, PL1_CLAMP, mode);
+
/* some domains have pl2 */
if (nr_powerlimit > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, mode);
@@ -833,11 +849,6 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
return 0;
}

-static const struct x86_cpu_id energy_unit_quirk_ids[] = {
- { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
- {}
-};
-
static int rapl_check_unit(struct rapl_package *rp, int cpu)
{
u64 msr_val;
@@ -859,7 +870,7 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
*/
value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
/* some CPUs have different way to calculate energy unit */
- if (x86_match_cpu(energy_unit_quirk_ids))
+ if (x86_match_cpu(valleyview_id))
rp->energy_unit_divisor = 1000000 / (1 << value);
else
rp->energy_unit_divisor = 1 << value;
--
1.7.10.4

2014-04-29 22:36:24

by David E. Box

[permalink] [raw]
Subject: [PATCH v2 3/4] x86/iosf: Make IOSF driver modular and usable by more drivers

From: "David E. Box" <[email protected]>

Currently drivers that work on big core can't use the mailbox driver without
selecting it which forces an unneeded dependency. Provides dummy fucntions to
allow these modules to conditionally use the mailbox on soc's without limiting
their ability to compile and load on big core systems. Make mailbox driver
build default m to ensure availability on x86 SOC's.

Signed-off-by: David E. Box <[email protected]>
---
arch/x86/Kconfig | 7 ++-----
arch/x86/include/asm/iosf_mbi.h | 33 +++++++++++++++++++++++++++++++++
arch/x86/kernel/iosf_mbi.c | 6 ++++++
3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 25d2c6f..f1304d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2375,12 +2375,9 @@ config X86_DMA_REMAP
depends on STA2X11

config IOSF_MBI
- bool
+ tristate
+ default m
depends on PCI
- ---help---
- To be selected by modules requiring access to the Intel OnChip System
- Fabric (IOSF) Sideband MailBox Interface (MBI). For MBI platforms
- enumerable by PCI.

source "net/Kconfig"

diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index 8e71c79..9e92d65 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -50,6 +50,10 @@
#define BT_MBI_PCIE_READ 0x00
#define BT_MBI_PCIE_WRITE 0x01

+#if defined(CONFIG_IOSF_MBI) || defined(CONFIG_IOSF_MBI_MODULE)
+
+bool iosf_mbi_available(void);
+
/**
* iosf_mbi_read() - MailBox Interface read command
* @port: port indicating subunit being accessed
@@ -87,4 +91,33 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr);
*/
int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask);

+#else /* CONFIG_IOSF_MBI is not enabled */
+static inline
+bool iosf_mbi_available(void)
+{
+ return false;
+}
+
+static inline
+int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr)
+{
+ WARN(1, "MBI driver not available");
+ return -EPERM;
+}
+
+static inline
+int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr)
+{
+ WARN(1, "MBI driver not available");
+ return -EPERM;
+}
+
+static inline
+int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
+{
+ WARN(1, "MBI driver not available");
+ return -EPERM;
+}
+#endif /* CONFIG_IOSF_MBI */
+
#endif /* IOSF_MBI_SYMS_H */
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c
index c3aae66..d3803c6 100644
--- a/arch/x86/kernel/iosf_mbi.c
+++ b/arch/x86/kernel/iosf_mbi.c
@@ -177,6 +177,12 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask)
}
EXPORT_SYMBOL(iosf_mbi_modify);

+bool iosf_mbi_available(void)
+{
+ return mbi_pdev;
+}
+EXPORT_SYMBOL(iosf_mbi_available);
+
static int iosf_mbi_probe(struct pci_dev *pdev,
const struct pci_device_id *unused)
{
--
1.7.10.4

2014-04-29 22:36:23

by David E. Box

[permalink] [raw]
Subject: [PATCH v2 2/4] powercap/rapl: add new cpu ids

From: Jacob Pan <[email protected]>

Adding support for Broadwell model 0x3d and HSW model (0x3c)

Signed-off-by: Jacob Pan <[email protected]>
---
drivers/powercap/intel_rapl.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 1c987d2..b1cda6f 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -951,7 +951,9 @@ static const struct x86_cpu_id rapl_ids[] = {
{ X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */
{ X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */
{ X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */
- { X86_VENDOR_INTEL, 6, 0x45},/* Haswell */
+ { X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */
+ { X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */
+ { X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */
/* TODO: Add more CPU IDs after testing */
{}
};
--
1.7.10.4

2014-04-29 22:46:11

by Rafael J. Wysocki

[permalink] [raw]
Subject: Re: [PATCH v2 0/4] RAPL driver updates

On Tuesday, April 29, 2014 03:33:05 PM David E. Box wrote:
> From: "David E. Box" <[email protected]>
>
> v2: iosf: Remove Kconfig exposure. Make mailbox driver a module.
>
> David E. Box (1):
> x86/iosf: Make IOSF driver modular and usable by more drivers
>
> Jacob Pan (3):
> powercap/rapl: further relax energy counter checks
> powercap/rapl: add new cpu ids
> powercap/rapl: change floor frequency for vallewview

I'm fine with [1-2/4].

[3/4] has to go via the x86 tree and [4/4] probably too, because it
depends on [3/4].

So would you like me to queue up [1-2/4] without the remaining two for 3.16?


--
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.

2014-04-29 23:38:35

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH v2 0/4] RAPL driver updates

On Wed, 30 Apr 2014 01:02:36 +0200
"Rafael J. Wysocki" <[email protected]> wrote:

> On Tuesday, April 29, 2014 03:33:05 PM David E. Box wrote:
> > From: "David E. Box" <[email protected]>
> >
> > v2: iosf: Remove Kconfig exposure. Make mailbox driver a module.
> >
> > David E. Box (1):
> > x86/iosf: Make IOSF driver modular and usable by more drivers
> >
> > Jacob Pan (3):
> > powercap/rapl: further relax energy counter checks
> > powercap/rapl: add new cpu ids
> > powercap/rapl: change floor frequency for vallewview
>
> I'm fine with [1-2/4].
>
> [3/4] has to go via the x86 tree and [4/4] probably too, because it
> depends on [3/4].
>
OK. I will update [4/4] to be submitted to x86 tree with [3/4].
> So would you like me to queue up [1-2/4] without the remaining two
> for 3.16?
>
Sure. thanks.

>

[Jacob Pan]

2014-04-30 05:30:03

by R, Durgadoss

[permalink] [raw]
Subject: RE: [PATCH v2 1/4] powercap/rapl: further relax energy counter checks



> -----Original Message-----
> From: David E. Box [mailto:[email protected]]
> Sent: Wednesday, April 30, 2014 4:03 AM
> To: [email protected]; [email protected]; linux-
> [email protected]; Wysocki, Rafael J; [email protected];
> [email protected]
> Cc: [email protected]; R, Durgadoss; Accardi, Kristen C
> Subject: [PATCH v2 1/4] powercap/rapl: further relax energy counter checks
>
> From: Jacob Pan <[email protected]>
>
> Energy counters may roll slowly for some RAPL domains, checking all
> of them can be time consuming and takes unpredictable amount of time.
> Therefore, we relax the sanity check by only checking availability of the
> MSRs and non-zero value of the energy status counters. It has been shown
> sufficient for all the platforms tested to filter out inactive domains.
>

Acked-by: Durgadoss R <[email protected]>

Thanks,
Durga

> Signed-off-by: Jacob Pan <[email protected]>
> ---
> drivers/powercap/intel_rapl.c | 29 +++++++++--------------------
> 1 file changed, 9 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
> index d9a0770..1c987d2 100644
> --- a/drivers/powercap/intel_rapl.c
> +++ b/drivers/powercap/intel_rapl.c
> @@ -1124,8 +1124,7 @@ err_cleanup_package:
> static int rapl_check_domain(int cpu, int domain)
> {
> unsigned msr;
> - u64 val1, val2 = 0;
> - int retry = 0;
> + u64 val = 0;
>
> switch (domain) {
> case RAPL_DOMAIN_PACKAGE:
> @@ -1144,26 +1143,13 @@ static int rapl_check_domain(int cpu, int domain)
> pr_err("invalid domain id %d\n", domain);
> return -EINVAL;
> }
> - if (rdmsrl_safe_on_cpu(cpu, msr, &val1))
> - return -ENODEV;
> -
> - /* PP1/uncore/graphics domain may not be active at the time of
> - * driver loading. So skip further checks.
> + /* make sure domain counters are available and contains non-zero
> + * values, otherwise skip it.
> */
> - if (domain == RAPL_DOMAIN_PP1)
> - return 0;
> - /* energy counters roll slowly on some domains */
> - while (++retry < 10) {
> - usleep_range(10000, 15000);
> - rdmsrl_safe_on_cpu(cpu, msr, &val2);
> - if ((val1 & ENERGY_STATUS_MASK) != (val2 &
> ENERGY_STATUS_MASK))
> - return 0;
> - }
> - /* if energy counter does not change, report as bad domain */
> - pr_info("domain %s energy ctr %llu:%llu not working, skip\n",
> - rapl_domain_names[domain], val1, val2);
> + if (rdmsrl_safe_on_cpu(cpu, msr, &val) || !val)
> + return -ENODEV;
>
> - return -ENODEV;
> + return 0;
> }
>
> /* Detect active and valid domains for the given CPU, caller must
> @@ -1180,6 +1166,9 @@ static int rapl_detect_domains(struct rapl_package *rp,
> int cpu)
> /* use physical package id to read counters */
> if (!rapl_check_domain(cpu, i))
> rp->domain_map |= 1 << i;
> + else
> + pr_warn("RAPL domain %s detection failed\n",
> + rapl_domain_names[i]);
> }
> rp->nr_domains = bitmap_weight(&rp->domain_map,
> RAPL_DOMAIN_MAX);
> if (!rp->nr_domains) {
> --
> 1.7.10.4