From: Kan Liang <[email protected]>
This patch addes full support for Intel SKL client uncore.
- Add support for SKL client cpu uncore, which is similar to BDW
client. There are some differences in CBOX number and uncore control
MSR.
- Add new CPU model 78 for SkyLake Mobile, include both cpu and pci
uncore.
- Set and clear enable bit of PERF_GLOBAL_CTL in enable_box/disable_box.
It's because PERF_GLOBAL_CTL could be cleared after Package C7. It's
not enough to only set enable bit in init_box.
Signed-off-by: Kan Liang <[email protected]>
---
arch/x86/events/intel/uncore.c | 4 +-
arch/x86/events/intel/uncore.h | 1 +
arch/x86/events/intel/uncore_snb.c | 80 +++++++++++++++++++++++++++++++++++++-
3 files changed, 83 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 16c1789..7fe2f77 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1361,6 +1361,7 @@ static const struct intel_uncore_init_fun knl_uncore_init __initconst = {
};
static const struct intel_uncore_init_fun skl_uncore_init __initconst = {
+ .cpu_init = skl_uncore_cpu_init,
.pci_init = skl_uncore_pci_init,
};
@@ -1384,7 +1385,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */
X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */
X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */
- X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */
+ X86_UNCORE_MODEL_MATCH(78, skl_uncore_init), /* SkyLake Mobile */
+ X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake Desktop */
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 79766b9..798cd89 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -360,6 +360,7 @@ int bdw_uncore_pci_init(void);
int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
+void skl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* perf_event_intel_uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 96531d2..5001ea6 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -1,4 +1,4 @@
-/* Nehalem/SandBridge/Haswell uncore support */
+/* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */
#include "uncore.h"
/* Uncore IMC PCI IDs */
@@ -9,6 +9,7 @@
#define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04
#define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604
#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -64,6 +65,10 @@
#define NHM_UNC_PERFEVTSEL0 0x3c0
#define NHM_UNC_UNCORE_PMC0 0x3b0
+/* SKL uncore global control */
+#define SKL_UNC_PERF_GLOBAL_CTL 0xe01
+#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1)
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -179,6 +184,73 @@ void snb_uncore_cpu_init(void)
snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
}
+static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0) {
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+ }
+}
+
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+ SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
+static void skl_uncore_msr_disable_box(struct intel_uncore_box *box)
+{
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
+{
+ if (box->pmu->pmu_idx == 0)
+ wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
+}
+
+static struct intel_uncore_ops skl_uncore_msr_ops = {
+ .init_box = skl_uncore_msr_init_box,
+ .enable_box = skl_uncore_msr_enable_box,
+ .disable_box = skl_uncore_msr_disable_box,
+ .exit_box = skl_uncore_msr_exit_box,
+ .disable_event = snb_uncore_msr_disable_event,
+ .enable_event = snb_uncore_msr_enable_event,
+ .read_counter = uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .num_boxes = 5,
+ .perf_ctr_bits = 44,
+ .fixed_ctr_bits = 48,
+ .perf_ctr = SNB_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = SNB_UNC_CBO_MSR_OFFSET,
+ .ops = &skl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+ .event_descs = snb_uncore_events,
+};
+
+static struct intel_uncore_type *skl_msr_uncores[] = {
+ &skl_uncore_cbox,
+ &snb_uncore_arb,
+ NULL,
+};
+
+void skl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = skl_msr_uncores;
+ if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
+ skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
+ snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -544,6 +616,11 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
},
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+
{ /* end: all zeroes */ },
};
@@ -587,6 +664,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */
IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */
IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */
+ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */
{ /* end marker */ }
};
--
2.5.0
On Fri, 15 Apr 2016, [email protected] wrote:
> +static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
> +{
> + if (box->pmu->pmu_idx == 0) {
> + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
> + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
> + }
> +}
> +
> +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
> +{
> + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
> + SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
> +}
> +
> +static void skl_uncore_msr_disable_box(struct intel_uncore_box *box)
> +{
> + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
> +}
> +
> +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
> +{
> + if (box->pmu->pmu_idx == 0)
> + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
> +}
The above looks broken.
init() enables the uncore machinery on the node it is running on.
start() enables the uncore machinery on the node it is running on.
stop() disables the uncore machinery on the node it is running on.
So what happens in the following case:
start(event(box0), node0)
start(event(box1), node0)
stop(event(box1), node0)
The stop of the box1 events disables the whole machinery on that node and
therefor the box0 event is wreckaged as well. Hmm?
Thanks,
tglx
> On Fri, 15 Apr 2016, [email protected] wrote:
> > +static void skl_uncore_msr_init_box(struct intel_uncore_box *box) {
> > + if (box->pmu->pmu_idx == 0) {
> > + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
> > + SNB_UNC_GLOBAL_CTL_EN |
> SKL_UNC_GLOBAL_CTL_CORE_ALL);
> > + }
> > +}
> > +
> > +static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) {
> > + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
> > + SNB_UNC_GLOBAL_CTL_EN |
> SKL_UNC_GLOBAL_CTL_CORE_ALL); }
> > +
> > +static void skl_uncore_msr_disable_box(struct intel_uncore_box *box)
> > +{
> > + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0);
> > +}
> > +
> > +static void skl_uncore_msr_exit_box(struct intel_uncore_box *box) {
> > + if (box->pmu->pmu_idx == 0)
> > + wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, 0); }
>
> The above looks broken.
>
> init() enables the uncore machinery on the node it is running on.
>
> start() enables the uncore machinery on the node it is running on.
>
> stop() disables the uncore machinery on the node it is running on.
>
> So what happens in the following case:
>
> start(event(box0), node0)
>
> start(event(box1), node0)
>
> stop(event(box1), node0)
>
> The stop of the box1 events disables the whole machinery on that node and
> therefor the box0 event is wreckaged as well. Hmm?
>
Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in enable_event?
If it's cleared, we can reset it there. The drawback is that there will be an extra
rdmsrl and a possible wrmsrl.
Thanks,
Kan
On Wed, 20 Apr 2016, Liang, Kan wrote:
> > The stop of the box1 events disables the whole machinery on that node and
> > therefor the box0 event is wreckaged as well. Hmm?
> >
> Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in enable_event? If it's
> cleared, we can reset it there. The drawback is that there will be an extra
> rdmsrl and a possible wrmsrl.
Well, that does not buy anything as you cannot disable the thing at all,
unless you have refcounting. And that refcounting needs to be in the 'type'
struct and that would probably be some real pain to implement.
The question is whether we need enable/disable at all. If the type is
initialized we enable it and on exit we disable it. Ditto on cpu hotplug -
which is also used for init to enable all nodes.
So if there is no drawback in letting the thing enabled if no events are
armed, then we really can do w/o the enable/disable_box callbacks.
Thanks,
tglx
> On Wed, 20 Apr 2016, Liang, Kan wrote:
> > > The stop of the box1 events disables the whole machinery on that
> > > node and therefor the box0 event is wreckaged as well. Hmm?
> > >
> > Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in enable_event?
> > If it's cleared, we can reset it there. The drawback is that there
> > will be an extra rdmsrl and a possible wrmsrl.
>
> Well, that does not buy anything as you cannot disable the thing at all, unless
> you have refcounting. And that refcounting needs to be in the 'type'
> struct and that would probably be some real pain to implement.
>
> The question is whether we need enable/disable at all. If the type is
> initialized we enable it and on exit we disable it. Ditto on cpu hotplug - which
> is also used for init to enable all nodes.
>
> So if there is no drawback in letting the thing enabled if no events are armed,
> then we really can do w/o the enable/disable_box callbacks.
>
There is no drawback in letting the thing enabled, but PERF_GLOBAL_CTL could
be disabled after Package C7. I add the enable/disable thing to try to
workaround it.
I once did the test on a SKL laptop. If the machine goes idle for a while, then the
uncore counter will always return 0. For fixing it, we have to re-enable
PERF_GLOBAL_CTL.
I think I made a typo in previous reply. I mean we can check it or just force rewrite
the PERF_GLOBAL_CTL in enable_box. We don't need disable_box since there is
no drawback in letting the thing enabled.
The HSW and BDW client also have similar errata. If it's OK for you, I will send
another patch for them.
Thanks,
Kan
> Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in enable_event?
> If it's cleared, we can reset it there. The drawback is that there will be an extra
> rdmsrl and a possible wrmsrl.
It's better to not rely on register state for this. There are a variety
of user space tools that manipulate the uncore MSRs through /dev/*/msr
So better keep software state. Software state is also much faster than
reading MSRs.
-Andi
--
[email protected] -- Speaking for myself only
On Wed, 20 Apr 2016, Liang, Kan wrote:
> > On Wed, 20 Apr 2016, Liang, Kan wrote:
> > > > The stop of the box1 events disables the whole machinery on that
> > > > node and therefor the box0 event is wreckaged as well. Hmm?
> > > >
> > > Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in enable_event?
> > > If it's cleared, we can reset it there. The drawback is that there
> > > will be an extra rdmsrl and a possible wrmsrl.
> >
> > Well, that does not buy anything as you cannot disable the thing at all, unless
> > you have refcounting. And that refcounting needs to be in the 'type'
> > struct and that would probably be some real pain to implement.
> >
> > The question is whether we need enable/disable at all. If the type is
> > initialized we enable it and on exit we disable it. Ditto on cpu hotplug - which
> > is also used for init to enable all nodes.
> >
> > So if there is no drawback in letting the thing enabled if no events are armed,
> > then we really can do w/o the enable/disable_box callbacks.
> >
> There is no drawback in letting the thing enabled, but PERF_GLOBAL_CTL could
> be disabled after Package C7. I add the enable/disable thing to try to
> workaround it.
I don't see how that solves it. If a counter is active, then C7 will stop it
and you wont get anything useful from it after returning from C7. Or does an
active counter prevent C7?
> I once did the test on a SKL laptop. If the machine goes idle for a while,
> then the uncore counter will always return 0. For fixing it, we have to
> re-enable PERF_GLOBAL_CTL.
Hmm, but that does only help for new events after returning from C7, right?
> I think I made a typo in previous reply. I mean we can check it or just
> force rewrite the PERF_GLOBAL_CTL in enable_box. We don't need disable_box
> since there is no drawback in letting the thing enabled.
Sure, but then you can just unconditionally enable it. IOW, leave the enable
callback as is.
Thanks,
tglx
>
> On Wed, 20 Apr 2016, Liang, Kan wrote:
> > > On Wed, 20 Apr 2016, Liang, Kan wrote:
> > > > > The stop of the box1 events disables the whole machinery on that
> > > > > node and therefor the box0 event is wreckaged as well. Hmm?
> > > > >
> > > > Right. How about check the SKL_UNC_PERF_GLOBAL_CTL in
> enable_event?
> > > > If it's cleared, we can reset it there. The drawback is that there
> > > > will be an extra rdmsrl and a possible wrmsrl.
> > >
> > > Well, that does not buy anything as you cannot disable the thing at
> > > all, unless you have refcounting. And that refcounting needs to be in the
> 'type'
> > > struct and that would probably be some real pain to implement.
> > >
> > > The question is whether we need enable/disable at all. If the type
> > > is initialized we enable it and on exit we disable it. Ditto on cpu
> > > hotplug - which is also used for init to enable all nodes.
> > >
> > > So if there is no drawback in letting the thing enabled if no events
> > > are armed, then we really can do w/o the enable/disable_box callbacks.
> > >
> > There is no drawback in letting the thing enabled, but PERF_GLOBAL_CTL
> > could be disabled after Package C7. I add the enable/disable thing to
> > try to workaround it.
>
> I don't see how that solves it. If a counter is active, then C7 will stop it and
> you wont get anything useful from it after returning from C7. Or does an
> active counter prevent C7?
Right, the workaround doesn't cover all cases. It helps for the new events
and the cases that monitoring a busy system. A busy system means it never
enter C7 during the counting.
I will mention it in the changelog of V2.
>
> > I once did the test on a SKL laptop. If the machine goes idle for a
> > while, then the uncore counter will always return 0. For fixing it, we
> > have to re-enable PERF_GLOBAL_CTL.
>
> Hmm, but that does only help for new events after returning from C7, right?
Yes.
>
> > I think I made a typo in previous reply. I mean we can check it or
> > just force rewrite the PERF_GLOBAL_CTL in enable_box. We don't need
> > disable_box since there is no drawback in letting the thing enabled.
>
> Sure, but then you can just unconditionally enable it. IOW, leave the enable
> callback as is.
Will do that in V2.
Thanks,
Kan