From: Kan Liang <[email protected]>
The discovery table of UPI on SPR MCC is broken. The patch series is
to mitigate the issue by providing a hardcode pre-defined table.
The broken discovery table can trigger a kernel warning message, which
is overkilled. The patch series also refine the error handling code.
Kan Liang (5):
perf/x86/uncore: Factor out uncore_device_to_die()
perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
perf/x86/uncore: Ignore broken units in discovery table
perf/x86/uncore: Add a quirk for UPI on SPR
perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
arch/x86/events/intel/uncore.c | 34 ++++-
arch/x86/events/intel/uncore.h | 4 +
arch/x86/events/intel/uncore_discovery.c | 60 ++++++---
arch/x86/events/intel/uncore_discovery.h | 14 +-
arch/x86/events/intel/uncore_snbep.c | 158 ++++++++++++++++++-----
5 files changed, 210 insertions(+), 60 deletions(-)
--
2.35.1
From: Kan Liang <[email protected]>
The current code assumes that the discovery table provides valid
box_ids for the normal units. It's not the case anymore since some units
in the discovery table are broken on some SPR variants.
Factor out uncore_get_box_id(). Check the existence of the type->box_ids
before using it. If it's not available, use pmu_idx.
Signed-off-by: Kan Liang <[email protected]>
---
arch/x86/events/intel/uncore.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 8caf253be1de..d63be6d1224e 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -857,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
+static inline int uncore_get_box_id(struct intel_uncore_type *type,
+ struct intel_uncore_pmu *pmu)
+{
+ return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
+}
+
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@@ -865,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
sprintf(pmu_name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu_name, "uncore_type_%u_%d",
- type->type_id, type->box_ids[pmu->pmu_idx]);
+ type->type_id, uncore_get_box_id(type, pmu));
}
}
@@ -892,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
- type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
+ uncore_get_box_id(type, pmu));
}
}
--
2.35.1
From: Kan Liang <[email protected]>
Some units in a discovery table may be broken, e.g., UPI of SPR MCC.
A generic method is required to ignore the broken units.
Add uncore_units_ignore in the struct intel_uncore_init_fun, which
indicates the type ID of broken units. It will be assigned by the
platform-specific code later when the platform has a broken discovery
table.
Signed-off-by: Kan Liang <[email protected]>
---
arch/x86/events/intel/uncore.c | 8 ++++++--
arch/x86/events/intel/uncore.h | 2 ++
arch/x86/events/intel/uncore_discovery.c | 26 +++++++++++++++++++++---
arch/x86/events/intel/uncore_discovery.h | 2 +-
4 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index d63be6d1224e..751df6460bff 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1695,7 +1695,10 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
+ /* Discovery table is required */
bool use_discovery;
+ /* The units in the discovery table should be ignored. */
+ int *uncore_units_ignore;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1873,7 +1876,7 @@ static int __init intel_uncore_init(void)
id = x86_match_cpu(intel_uncore_match);
if (!id) {
- if (!uncore_no_discover && intel_uncore_has_discovery_tables())
+ if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
@@ -1881,7 +1884,8 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_no_discover && uncore_init->use_discovery)
return -ENODEV;
- if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
+ if (uncore_init->use_discovery &&
+ !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
return -ENODEV;
}
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 8d493bea9eb6..bbaa57cd868d 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -34,6 +34,8 @@
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+#define UNCORE_IGNORE_END -1
+
struct pci_extra_dev {
struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
};
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 08af92af2be2..abb51191f5af 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -190,8 +190,25 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
}
+static bool
+uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
+{
+ int i;
+
+ if (!ignore)
+ return false;
+
+ for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
+ if (unit->box_type == ignore[i])
+ return true;
+ }
+
+ return false;
+}
+
static int parse_discovery_table(struct pci_dev *dev, int die,
- u32 bar_offset, bool *parsed)
+ u32 bar_offset, bool *parsed,
+ int *ignore)
{
struct uncore_global_discovery global;
struct uncore_unit_discovery unit;
@@ -246,6 +263,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
if (unit.access_type >= UNCORE_ACCESS_MAX)
continue;
+ if (uncore_ignore_unit(&unit, ignore))
+ continue;
+
uncore_insert_box_info(&unit, die, *parsed);
}
@@ -254,7 +274,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
return 0;
}
-bool intel_uncore_has_discovery_tables(void)
+bool intel_uncore_has_discovery_tables(int *ignore)
{
u32 device, val, entry_id, bar_offset;
int die, dvsec = 0, ret = true;
@@ -290,7 +310,7 @@ bool intel_uncore_has_discovery_tables(void)
if (die < 0)
continue;
- parse_discovery_table(dev, die, bar_offset, &parsed);
+ parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
}
}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index f4439357779a..41637022b5d1 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -122,7 +122,7 @@ struct intel_uncore_discovery_type {
unsigned int *box_offset; /* Box offset */
};
-bool intel_uncore_has_discovery_tables(void);
+bool intel_uncore_has_discovery_tables(int *ignore);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
--
2.35.1
Hi Peter & Ingo,
Gentle Ping. Please let me know if you have any comments on the patch set.
Thanks
Kan
On 2023-01-12 3:01 p.m., [email protected] wrote:
> From: Kan Liang <[email protected]>
>
> The discovery table of UPI on SPR MCC is broken. The patch series is
> to mitigate the issue by providing a hardcode pre-defined table.
>
> The broken discovery table can trigger a kernel warning message, which
> is overkilled. The patch series also refine the error handling code.
>
> Kan Liang (5):
> perf/x86/uncore: Factor out uncore_device_to_die()
> perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
> perf/x86/uncore: Ignore broken units in discovery table
> perf/x86/uncore: Add a quirk for UPI on SPR
> perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
>
> arch/x86/events/intel/uncore.c | 34 ++++-
> arch/x86/events/intel/uncore.h | 4 +
> arch/x86/events/intel/uncore_discovery.c | 60 ++++++---
> arch/x86/events/intel/uncore_discovery.h | 14 +-
> arch/x86/events/intel/uncore_snbep.c | 158 ++++++++++++++++++-----
> 5 files changed, 210 insertions(+), 60 deletions(-)
>
On Thu, 19 Jan 2023, Liang, Kan wrote:
> Hi Peter & Ingo,
>
> Gentle Ping. Please let me know if you have any comments on the patch set.
>
> Thanks
> Kan
>
> On 2023-01-12 3:01 p.m., [email protected] wrote:
> > From: Kan Liang <[email protected]>
> >
> > The discovery table of UPI on SPR MCC is broken. The patch series is
> > to mitigate the issue by providing a hardcode pre-defined table.
> >
> > The broken discovery table can trigger a kernel warning message, which
> > is overkilled. The patch series also refine the error handling code.
> >
> > Kan Liang (5):
> > perf/x86/uncore: Factor out uncore_device_to_die()
> > perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
> > perf/x86/uncore: Ignore broken units in discovery table
> > perf/x86/uncore: Add a quirk for UPI on SPR
> > perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
For the series,
Tested-by: Michael Petlan <[email protected]>
> >
> > arch/x86/events/intel/uncore.c | 34 ++++-
> > arch/x86/events/intel/uncore.h | 4 +
> > arch/x86/events/intel/uncore_discovery.c | 60 ++++++---
> > arch/x86/events/intel/uncore_discovery.h | 14 +-
> > arch/x86/events/intel/uncore_snbep.c | 158 ++++++++++++++++++-----
> > 5 files changed, 210 insertions(+), 60 deletions(-)
> >
>
>
The following commit has been merged into the perf/core branch of tip:
Commit-ID: bd9514a4d5ec25b29728720ca8b3a9ac4e3137d7
Gitweb: https://git.kernel.org/tip/bd9514a4d5ec25b29728720ca8b3a9ac4e3137d7
Author: Kan Liang <[email protected]>
AuthorDate: Thu, 12 Jan 2023 12:01:03 -08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Sat, 21 Jan 2023 00:06:12 +01:00
perf/x86/uncore: Ignore broken units in discovery table
Some units in a discovery table may be broken, e.g., UPI of SPR MCC.
A generic method is required to ignore the broken units.
Add uncore_units_ignore in the struct intel_uncore_init_fun, which
indicates the type ID of broken units. It will be assigned by the
platform-specific code later when the platform has a broken discovery
table.
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Tested-by: Michael Petlan <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/events/intel/uncore.c | 8 +++++--
arch/x86/events/intel/uncore.h | 2 ++-
arch/x86/events/intel/uncore_discovery.c | 26 ++++++++++++++++++++---
arch/x86/events/intel/uncore_discovery.h | 2 +-
4 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 271c016..d9a2ef5 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1695,7 +1695,10 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
+ /* Discovery table is required */
bool use_discovery;
+ /* The units in the discovery table should be ignored. */
+ int *uncore_units_ignore;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1874,7 +1877,7 @@ static int __init intel_uncore_init(void)
id = x86_match_cpu(intel_uncore_match);
if (!id) {
- if (!uncore_no_discover && intel_uncore_has_discovery_tables())
+ if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
@@ -1882,7 +1885,8 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_no_discover && uncore_init->use_discovery)
return -ENODEV;
- if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
+ if (uncore_init->use_discovery &&
+ !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
return -ENODEV;
}
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 8d493be..bbaa57c 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -34,6 +34,8 @@
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
+#define UNCORE_IGNORE_END -1
+
struct pci_extra_dev {
struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
};
diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c
index 08af92a..abb5119 100644
--- a/arch/x86/events/intel/uncore_discovery.c
+++ b/arch/x86/events/intel/uncore_discovery.c
@@ -190,8 +190,25 @@ free_box_offset:
}
+static bool
+uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
+{
+ int i;
+
+ if (!ignore)
+ return false;
+
+ for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
+ if (unit->box_type == ignore[i])
+ return true;
+ }
+
+ return false;
+}
+
static int parse_discovery_table(struct pci_dev *dev, int die,
- u32 bar_offset, bool *parsed)
+ u32 bar_offset, bool *parsed,
+ int *ignore)
{
struct uncore_global_discovery global;
struct uncore_unit_discovery unit;
@@ -246,6 +263,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
if (unit.access_type >= UNCORE_ACCESS_MAX)
continue;
+ if (uncore_ignore_unit(&unit, ignore))
+ continue;
+
uncore_insert_box_info(&unit, die, *parsed);
}
@@ -254,7 +274,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
return 0;
}
-bool intel_uncore_has_discovery_tables(void)
+bool intel_uncore_has_discovery_tables(int *ignore)
{
u32 device, val, entry_id, bar_offset;
int die, dvsec = 0, ret = true;
@@ -290,7 +310,7 @@ bool intel_uncore_has_discovery_tables(void)
if (die < 0)
continue;
- parse_discovery_table(dev, die, bar_offset, &parsed);
+ parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
}
}
diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h
index f443935..4163702 100644
--- a/arch/x86/events/intel/uncore_discovery.h
+++ b/arch/x86/events/intel/uncore_discovery.h
@@ -122,7 +122,7 @@ struct intel_uncore_discovery_type {
unsigned int *box_offset; /* Box offset */
};
-bool intel_uncore_has_discovery_tables(void);
+bool intel_uncore_has_discovery_tables(int *ignore);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
The following commit has been merged into the perf/core branch of tip:
Commit-ID: 3af548f2361077cd53762c88d62343d4e8ea1efb
Gitweb: https://git.kernel.org/tip/3af548f2361077cd53762c88d62343d4e8ea1efb
Author: Kan Liang <[email protected]>
AuthorDate: Thu, 12 Jan 2023 12:01:02 -08:00
Committer: Peter Zijlstra <[email protected]>
CommitterDate: Sat, 21 Jan 2023 00:06:12 +01:00
perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
The current code assumes that the discovery table provides valid
box_ids for the normal units. It's not the case anymore since some units
in the discovery table are broken on some SPR variants.
Factor out uncore_get_box_id(). Check the existence of the type->box_ids
before using it. If it's not available, use pmu_idx.
Signed-off-by: Kan Liang <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Tested-by: Michael Petlan <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/events/intel/uncore.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index eeaa92f..271c016 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -857,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
+static inline int uncore_get_box_id(struct intel_uncore_type *type,
+ struct intel_uncore_pmu *pmu)
+{
+ return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
+}
+
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@@ -865,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
sprintf(pmu_name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu_name, "uncore_type_%u_%d",
- type->type_id, type->box_ids[pmu->pmu_idx]);
+ type->type_id, uncore_get_box_id(type, pmu));
}
}
@@ -892,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
- type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
+ uncore_get_box_id(type, pmu));
}
}
Hello all,
gentle ping #2... How does it look with the patchset acceptance?
Is everything OK? Does it need any additional testing/etc.?
When could the patches be expected to land in Linus' tree? Is it
within v6.2 scope?
Thank you.
Michael
On Thu, 19 Jan 2023, Liang, Kan wrote:
> Hi Peter & Ingo,
>
> Gentle Ping. Please let me know if you have any comments on the patch set.
>
> Thanks
> Kan
>
> On 2023-01-12 3:01 p.m., [email protected] wrote:
> > From: Kan Liang <[email protected]>
> >
> > The discovery table of UPI on SPR MCC is broken. The patch series is
> > to mitigate the issue by providing a hardcode pre-defined table.
> >
> > The broken discovery table can trigger a kernel warning message, which
> > is overkilled. The patch series also refine the error handling code.
> >
> > Kan Liang (5):
> > perf/x86/uncore: Factor out uncore_device_to_die()
> > perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
> > perf/x86/uncore: Ignore broken units in discovery table
> > perf/x86/uncore: Add a quirk for UPI on SPR
> > perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
> >
> > arch/x86/events/intel/uncore.c | 34 ++++-
> > arch/x86/events/intel/uncore.h | 4 +
> > arch/x86/events/intel/uncore_discovery.c | 60 ++++++---
> > arch/x86/events/intel/uncore_discovery.h | 14 +-
> > arch/x86/events/intel/uncore_snbep.c | 158 ++++++++++++++++++-----
> > 5 files changed, 210 insertions(+), 60 deletions(-)
> >
>
>
I reviewed this patch series, applied it to a kernel tree, and tested
it on two larger (12+ socket) systems, did not notice any adverse
affects. So I believe it's appropriate to add both of these tags:
Tested-by: Steve Wahl <[email protected]>
Reviewed-by: Steve Wahl <[email protected]>
--> Steve
On Thu, Jan 12, 2023 at 12:01:00PM -0800, [email protected] wrote:
> From: Kan Liang <[email protected]>
>
> The discovery table of UPI on SPR MCC is broken. The patch series is
> to mitigate the issue by providing a hardcode pre-defined table.
>
> The broken discovery table can trigger a kernel warning message, which
> is overkilled. The patch series also refine the error handling code.
>
> Kan Liang (5):
> perf/x86/uncore: Factor out uncore_device_to_die()
> perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
> perf/x86/uncore: Ignore broken units in discovery table
> perf/x86/uncore: Add a quirk for UPI on SPR
> perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
>
> arch/x86/events/intel/uncore.c | 34 ++++-
> arch/x86/events/intel/uncore.h | 4 +
> arch/x86/events/intel/uncore_discovery.c | 60 ++++++---
> arch/x86/events/intel/uncore_discovery.h | 14 +-
> arch/x86/events/intel/uncore_snbep.c | 158 ++++++++++++++++++-----
> 5 files changed, 210 insertions(+), 60 deletions(-)
>
> --
> 2.35.1
>
--
Steve Wahl, Hewlett Packard Enterprise