2023-02-24 16:26:36

by Andrzej Hajda

[permalink] [raw]
Subject: [PATCH 0/3] drm/i915: track gt->wakerefs

This patchset extracts i915 rpm wakeref tracking to separate files (patch 1)
and then uses it to track GT wakerefs (patch 2).
Next step is to use external library lib/ref_track, but this requires some
adjustements to the library and will be performed in separate patchset.
The patches are taken from internal branch.

To: Jani Nikula <[email protected]>
To: Joonas Lahtinen <[email protected]>
To: Rodrigo Vivi <[email protected]>
To: Tvrtko Ursulin <[email protected]>
To: David Airlie <[email protected]>
To: Daniel Vetter <[email protected]>
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Chris Wilson <[email protected]>
Signed-off-by: Andrzej Hajda <[email protected]>

---
Andrzej Hajda (1):
drm/i915: Correct type of wakeref variable

Chris Wilson (2):
drm/i915: Separate wakeref tracking
drm/i915: Track leaked gt->wakerefs

drivers/gpu/drm/i915/Kconfig.debug | 24 ++
drivers/gpu/drm/i915/Makefile | 4 +
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 7 +-
.../drm/i915/gem/selftests/i915_gem_coherency.c | 10 +-
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 14 +-
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 +-
drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 3 +-
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +
.../gpu/drm/i915/gt/intel_execlists_submission.c | 2 +-
drivers/gpu/drm/i915/gt/intel_gt_pm.c | 10 +-
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 38 +++-
drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 4 +-
drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 20 +-
drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 5 +-
drivers/gpu/drm/i915/gt/selftest_reset.c | 10 +-
drivers/gpu/drm/i915/gt/selftest_rps.c | 17 +-
drivers/gpu/drm/i915/gt/selftest_slpc.c | 5 +-
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +-
drivers/gpu/drm/i915/i915_pmu.c | 16 +-
drivers/gpu/drm/i915/intel_runtime_pm.c | 244 +++------------------
drivers/gpu/drm/i915/intel_runtime_pm.h | 10 +-
drivers/gpu/drm/i915/intel_wakeref.c | 4 +
drivers/gpu/drm/i915/intel_wakeref.h | 48 +++-
drivers/gpu/drm/i915/intel_wakeref_tracker.c | 234 ++++++++++++++++++++
drivers/gpu/drm/i915/intel_wakeref_tracker.h | 76 +++++++
26 files changed, 536 insertions(+), 299 deletions(-)
---
base-commit: 1ddc2effff762c6a109af52f3c39534c7115aebe
change-id: 20230224-track_gt-1b3da8bdacd7

Best regards,
--
Andrzej Hajda <[email protected]>


2023-02-24 16:26:39

by Andrzej Hajda

[permalink] [raw]
Subject: [PATCH 1/3] drm/i915: Separate wakeref tracking

From: Chris Wilson <[email protected]>

Extract the callstack tracking of intel_runtime_pm.c into its own
utility so that that we can reuse it for other online debugging of
scoped wakerefs.

Signed-off-by: Chris Wilson <[email protected]>
Signed-off-by: Andrzej Hajda <[email protected]>
---
drivers/gpu/drm/i915/Kconfig.debug | 9 +
drivers/gpu/drm/i915/Makefile | 4 +
drivers/gpu/drm/i915/intel_runtime_pm.c | 244 +++------------------------
drivers/gpu/drm/i915/intel_runtime_pm.h | 10 +-
drivers/gpu/drm/i915/intel_wakeref.h | 6 +-
drivers/gpu/drm/i915/intel_wakeref_tracker.c | 234 +++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_wakeref_tracker.h | 76 +++++++++
7 files changed, 355 insertions(+), 228 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 93dfb7ed970547..5fde52107e3b44 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -25,6 +25,7 @@ config DRM_I915_DEBUG
select PREEMPT_COUNT
select I2C_CHARDEV
select STACKDEPOT
+ select STACKTRACE
select DRM_DP_AUX_CHARDEV
select X86_MSR # used by igt/pm_rpm
select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
@@ -37,6 +38,7 @@ config DRM_I915_DEBUG
select DRM_I915_DEBUG_GEM
select DRM_I915_DEBUG_GEM_ONCE
select DRM_I915_DEBUG_MMIO
+ select DRM_I915_TRACK_WAKEREF
select DRM_I915_DEBUG_RUNTIME_PM
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
@@ -227,11 +229,18 @@ config DRM_I915_DEBUG_VBLANK_EVADE

If in doubt, say "N".

+config DRM_I915_TRACK_WAKEREF
+ depends on STACKDEPOT
+ depends on STACKTRACE
+ bool
+
config DRM_I915_DEBUG_RUNTIME_PM
bool "Enable extra state checking for runtime PM"
depends on DRM_I915
default n
select STACKDEPOT
+ select STACKTRACE
+ select DRM_I915_TRACK_WAKEREF
help
Choose this option to turn on extra state checking for the
runtime PM functionality. This may introduce overhead during
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b2f91a1f826858..42daff6d575a82 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -81,6 +81,10 @@ i915-$(CONFIG_DEBUG_FS) += \
i915_debugfs_params.o \
display/intel_display_debugfs.o \
display/intel_pipe_crc.o
+
+i915-$(CONFIG_DRM_I915_TRACK_WAKEREF) += \
+ intel_wakeref_tracker.o
+
i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o

# "Graphics Technology" (aka we talk to the gpu)
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 129746713d072f..72887e2bb03c21 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -52,182 +52,37 @@

#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)

-#include <linux/sort.h>
-
-#define STACKDEPTH 8
-
-static noinline depot_stack_handle_t __save_depot_stack(void)
-{
- unsigned long entries[STACKDEPTH];
- unsigned int n;
-
- n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
- return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
-}
-
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
- spin_lock_init(&rpm->debug.lock);
- stack_depot_init();
+ intel_wakeref_tracker_init(&rpm->debug);
}

-static noinline depot_stack_handle_t
+static intel_wakeref_t
track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
- depot_stack_handle_t stack, *stacks;
- unsigned long flags;
-
- if (rpm->no_wakeref_tracking)
- return -1;
-
- stack = __save_depot_stack();
- if (!stack)
+ if (!rpm->available)
return -1;

- spin_lock_irqsave(&rpm->debug.lock, flags);
-
- if (!rpm->debug.count)
- rpm->debug.last_acquire = stack;
-
- stacks = krealloc(rpm->debug.owners,
- (rpm->debug.count + 1) * sizeof(*stacks),
- GFP_NOWAIT | __GFP_NOWARN);
- if (stacks) {
- stacks[rpm->debug.count++] = stack;
- rpm->debug.owners = stacks;
- } else {
- stack = -1;
- }
-
- spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
- return stack;
+ return intel_wakeref_tracker_add(&rpm->debug);
}

static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
- depot_stack_handle_t stack)
+ intel_wakeref_t wakeref)
{
- struct drm_i915_private *i915 = container_of(rpm,
- struct drm_i915_private,
- runtime_pm);
- unsigned long flags, n;
- bool found = false;
-
- if (unlikely(stack == -1))
- return;
-
- spin_lock_irqsave(&rpm->debug.lock, flags);
- for (n = rpm->debug.count; n--; ) {
- if (rpm->debug.owners[n] == stack) {
- memmove(rpm->debug.owners + n,
- rpm->debug.owners + n + 1,
- (--rpm->debug.count - n) * sizeof(stack));
- found = true;
- break;
- }
- }
- spin_unlock_irqrestore(&rpm->debug.lock, flags);
-
- if (drm_WARN(&i915->drm, !found,
- "Unmatched wakeref (tracking %lu), count %u\n",
- rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
- char *buf;
-
- buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
- if (!buf)
- return;
-
- stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
- DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
-
- stack = READ_ONCE(rpm->debug.last_release);
- if (stack) {
- stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
- DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
- }
-
- kfree(buf);
- }
+ intel_wakeref_tracker_remove(&rpm->debug, wakeref);
}

-static int cmphandle(const void *_a, const void *_b)
+static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
{
- const depot_stack_handle_t * const a = _a, * const b = _b;
+ struct drm_printer p = drm_debug_printer("i915");

- if (*a < *b)
- return -1;
- else if (*a > *b)
- return 1;
- else
- return 0;
-}
-
-static void
-__print_intel_runtime_pm_wakeref(struct drm_printer *p,
- const struct intel_runtime_pm_debug *dbg)
-{
- unsigned long i;
- char *buf;
-
- buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
- if (!buf)
- return;
-
- if (dbg->last_acquire) {
- stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
- drm_printf(p, "Wakeref last acquired:\n%s", buf);
- }
-
- if (dbg->last_release) {
- stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
- drm_printf(p, "Wakeref last released:\n%s", buf);
- }
-
- drm_printf(p, "Wakeref count: %lu\n", dbg->count);
-
- sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
-
- for (i = 0; i < dbg->count; i++) {
- depot_stack_handle_t stack = dbg->owners[i];
- unsigned long rep;
-
- rep = 1;
- while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
- rep++, i++;
- stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
- drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
- }
-
- kfree(buf);
-}
-
-static noinline void
-__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
- struct intel_runtime_pm_debug *saved)
-{
- *saved = *debug;
-
- debug->owners = NULL;
- debug->count = 0;
- debug->last_release = __save_depot_stack();
-}
-
-static void
-dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
-{
- if (debug->count) {
- struct drm_printer p = drm_debug_printer("i915");
-
- __print_intel_runtime_pm_wakeref(&p, debug);
- }
-
- kfree(debug->owners);
+ intel_wakeref_tracker_reset(&rpm->debug, &p);
}

static noinline void
__intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
{
- struct intel_runtime_pm_debug dbg = {};
+ struct intel_wakeref_tracker saved;
unsigned long flags;

if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
@@ -235,60 +90,21 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
flags))
return;

- __untrack_all_wakerefs(&rpm->debug, &dbg);
+ saved = __intel_wakeref_tracker_reset(&rpm->debug);
spin_unlock_irqrestore(&rpm->debug.lock, flags);

- dump_and_free_wakeref_tracking(&dbg);
-}
-
-static noinline void
-untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
-{
- struct intel_runtime_pm_debug dbg = {};
- unsigned long flags;
-
- spin_lock_irqsave(&rpm->debug.lock, flags);
- __untrack_all_wakerefs(&rpm->debug, &dbg);
- spin_unlock_irqrestore(&rpm->debug.lock, flags);
+ if (saved.count) {
+ struct drm_printer p = drm_debug_printer("i915");

- dump_and_free_wakeref_tracking(&dbg);
+ __intel_wakeref_tracker_show(&saved, &p);
+ intel_wakeref_tracker_fini(&saved);
+ }
}

void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
struct drm_printer *p)
{
- struct intel_runtime_pm_debug dbg = {};
-
- do {
- unsigned long alloc = dbg.count;
- depot_stack_handle_t *s;
-
- spin_lock_irq(&rpm->debug.lock);
- dbg.count = rpm->debug.count;
- if (dbg.count <= alloc) {
- memcpy(dbg.owners,
- rpm->debug.owners,
- dbg.count * sizeof(*s));
- }
- dbg.last_acquire = rpm->debug.last_acquire;
- dbg.last_release = rpm->debug.last_release;
- spin_unlock_irq(&rpm->debug.lock);
- if (dbg.count <= alloc)
- break;
-
- s = krealloc(dbg.owners,
- dbg.count * sizeof(*s),
- GFP_NOWAIT | __GFP_NOWARN);
- if (!s)
- goto out;
-
- dbg.owners = s;
- } while (1);
-
- __print_intel_runtime_pm_wakeref(p, &dbg);
-
-out:
- kfree(dbg.owners);
+ intel_wakeref_tracker_show(&rpm->debug, p);
}

#else
@@ -297,14 +113,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
}

-static depot_stack_handle_t
+static intel_wakeref_t
track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
return -1;
}

static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
- intel_wakeref_t wref)
+ intel_wakeref_t wakeref)
{
}

@@ -349,9 +165,8 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
bool wakelock)
{
- struct drm_i915_private *i915 = container_of(rpm,
- struct drm_i915_private,
- runtime_pm);
+ struct drm_i915_private *i915 =
+ container_of(rpm, struct drm_i915_private, runtime_pm);
int ret;

ret = pm_runtime_get_sync(rpm->kdev);
@@ -556,9 +371,8 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
*/
void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
{
- struct drm_i915_private *i915 = container_of(rpm,
- struct drm_i915_private,
- runtime_pm);
+ struct drm_i915_private *i915 =
+ container_of(rpm, struct drm_i915_private, runtime_pm);
struct device *kdev = rpm->kdev;

/*
@@ -611,9 +425,8 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)

void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
{
- struct drm_i915_private *i915 = container_of(rpm,
- struct drm_i915_private,
- runtime_pm);
+ struct drm_i915_private *i915 =
+ container_of(rpm, struct drm_i915_private, runtime_pm);
struct device *kdev = rpm->kdev;

/* Transfer rpm ownership back to core */
@@ -628,9 +441,8 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)

void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
{
- struct drm_i915_private *i915 = container_of(rpm,
- struct drm_i915_private,
- runtime_pm);
+ struct drm_i915_private *i915 =
+ container_of(rpm, struct drm_i915_private, runtime_pm);
int count = atomic_read(&rpm->wakeref_count);

intel_wakeref_auto_fini(&rpm->userfault_wakeref);
@@ -646,7 +458,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
{
struct drm_i915_private *i915 =
- container_of(rpm, struct drm_i915_private, runtime_pm);
+ container_of(rpm, struct drm_i915_private, runtime_pm);
struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
struct device *kdev = &pdev->dev;

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index e592e8d6499a1f..a8dc2baf79844f 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -83,15 +83,7 @@ struct intel_runtime_pm {
* paired rpm_put) we can remove corresponding pairs of and keep
* the array trimmed to active wakerefs.
*/
- struct intel_runtime_pm_debug {
- spinlock_t lock;
-
- depot_stack_handle_t last_acquire;
- depot_stack_handle_t last_release;
-
- depot_stack_handle_t *owners;
- unsigned long count;
- } debug;
+ struct intel_wakeref_tracker debug;
#endif
};

diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 71b8a63f6f104d..20720fbcc28d46 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -17,7 +17,9 @@
#include <linux/timer.h>
#include <linux/workqueue.h>

-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
+#include "intel_wakeref_tracker.h"
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
#define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
#else
#define INTEL_WAKEREF_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
@@ -26,8 +28,6 @@
struct intel_runtime_pm;
struct intel_wakeref;

-typedef depot_stack_handle_t intel_wakeref_t;
-
struct intel_wakeref_ops {
int (*get)(struct intel_wakeref *wf);
int (*put)(struct intel_wakeref *wf);
diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.c b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
new file mode 100644
index 00000000000000..a0bcef13a1085a
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/slab.h>
+#include <linux/stackdepot.h>
+#include <linux/stacktrace.h>
+#include <linux/sort.h>
+
+#include <drm/drm_print.h>
+
+#include "intel_wakeref.h"
+
+#define STACKDEPTH 8
+
+static noinline depot_stack_handle_t __save_depot_stack(void)
+{
+ unsigned long entries[STACKDEPTH];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+ return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
+}
+
+static void __print_depot_stack(depot_stack_handle_t stack,
+ char *buf, int sz, int indent)
+{
+ unsigned long *entries;
+ unsigned int nr_entries;
+
+ nr_entries = stack_depot_fetch(stack, &entries);
+ stack_trace_snprint(buf, sz, entries, nr_entries, indent);
+}
+
+static int cmphandle(const void *_a, const void *_b)
+{
+ const depot_stack_handle_t * const a = _a, * const b = _b;
+
+ if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return 1;
+ else
+ return 0;
+}
+
+void
+__intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
+ struct drm_printer *p)
+{
+ unsigned long i;
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+ if (!buf)
+ return;
+
+ if (w->last_acquire) {
+ __print_depot_stack(w->last_acquire, buf, PAGE_SIZE, 2);
+ drm_printf(p, "Wakeref last acquired:\n%s", buf);
+ }
+
+ if (w->last_release) {
+ __print_depot_stack(w->last_release, buf, PAGE_SIZE, 2);
+ drm_printf(p, "Wakeref last released:\n%s", buf);
+ }
+
+ drm_printf(p, "Wakeref count: %lu\n", w->count);
+
+ sort(w->owners, w->count, sizeof(*w->owners), cmphandle, NULL);
+
+ for (i = 0; i < w->count; i++) {
+ depot_stack_handle_t stack = w->owners[i];
+ unsigned long rep;
+
+ rep = 1;
+ while (i + 1 < w->count && w->owners[i + 1] == stack)
+ rep++, i++;
+ __print_depot_stack(stack, buf, PAGE_SIZE, 2);
+ drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
+ }
+
+ kfree(buf);
+}
+
+void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
+ struct drm_printer *p)
+{
+ struct intel_wakeref_tracker tmp = {};
+
+ do {
+ unsigned long alloc = tmp.count;
+ depot_stack_handle_t *s;
+
+ spin_lock_irq(&w->lock);
+ tmp.count = w->count;
+ if (tmp.count <= alloc)
+ memcpy(tmp.owners, w->owners, tmp.count * sizeof(*s));
+ tmp.last_acquire = w->last_acquire;
+ tmp.last_release = w->last_release;
+ spin_unlock_irq(&w->lock);
+ if (tmp.count <= alloc)
+ break;
+
+ s = krealloc(tmp.owners,
+ tmp.count * sizeof(*s),
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (!s)
+ goto out;
+
+ tmp.owners = s;
+ } while (1);
+
+ __intel_wakeref_tracker_show(&tmp, p);
+
+out:
+ intel_wakeref_tracker_fini(&tmp);
+}
+
+intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
+{
+ depot_stack_handle_t stack, *stacks;
+ unsigned long flags;
+
+ stack = __save_depot_stack();
+ if (!stack)
+ return -1;
+
+ spin_lock_irqsave(&w->lock, flags);
+
+ if (!w->count)
+ w->last_acquire = stack;
+
+ stacks = krealloc(w->owners,
+ (w->count + 1) * sizeof(*stacks),
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (stacks) {
+ stacks[w->count++] = stack;
+ w->owners = stacks;
+ } else {
+ stack = -1;
+ }
+
+ spin_unlock_irqrestore(&w->lock, flags);
+
+ return stack;
+}
+
+void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
+ intel_wakeref_t stack)
+{
+ unsigned long flags, n;
+ bool found = false;
+
+ if (unlikely(stack == -1))
+ return;
+
+ spin_lock_irqsave(&w->lock, flags);
+ for (n = w->count; n--; ) {
+ if (w->owners[n] == stack) {
+ memmove(w->owners + n,
+ w->owners + n + 1,
+ (--w->count - n) * sizeof(stack));
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&w->lock, flags);
+
+ if (WARN(!found,
+ "Unmatched wakeref %x, tracking %lu\n",
+ stack, w->count)) {
+ char *buf;
+
+ buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
+ if (!buf)
+ return;
+
+ __print_depot_stack(stack, buf, PAGE_SIZE, 2);
+ pr_err("wakeref %x from\n%s", stack, buf);
+
+ stack = READ_ONCE(w->last_release);
+ if (stack && !w->count) {
+ __print_depot_stack(stack, buf, PAGE_SIZE, 2);
+ pr_err("wakeref last released at\n%s", buf);
+ }
+
+ kfree(buf);
+ }
+}
+
+struct intel_wakeref_tracker
+__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
+{
+ struct intel_wakeref_tracker saved;
+
+ lockdep_assert_held(&w->lock);
+
+ saved = *w;
+
+ w->owners = NULL;
+ w->count = 0;
+ w->last_release = __save_depot_stack();
+
+ return saved;
+}
+
+void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
+ struct drm_printer *p)
+{
+ struct intel_wakeref_tracker tmp;
+
+ spin_lock_irq(&w->lock);
+ tmp = __intel_wakeref_tracker_reset(w);
+ spin_unlock_irq(&w->lock);
+
+ if (tmp.count)
+ __intel_wakeref_tracker_show(&tmp, p);
+
+ intel_wakeref_tracker_fini(&tmp);
+}
+
+void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w)
+{
+ memset(w, 0, sizeof(*w));
+ spin_lock_init(&w->lock);
+ stack_depot_init();
+}
+
+void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w)
+{
+ kfree(w->owners);
+}
diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.h b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
new file mode 100644
index 00000000000000..61df68e28c0fbf
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_WAKEREF_TRACKER_H
+#define INTEL_WAKEREF_TRACKER_H
+
+#include <linux/kconfig.h>
+#include <linux/spinlock.h>
+#include <linux/stackdepot.h>
+
+typedef depot_stack_handle_t intel_wakeref_t;
+
+struct drm_printer;
+
+struct intel_wakeref_tracker {
+ spinlock_t lock;
+
+ depot_stack_handle_t last_acquire;
+ depot_stack_handle_t last_release;
+
+ depot_stack_handle_t *owners;
+ unsigned long count;
+};
+
+#if IS_ENABLED(CONFIG_DRM_I915_TRACK_WAKEREF)
+
+void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w);
+void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w);
+
+intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w);
+void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
+ intel_wakeref_t handle);
+
+struct intel_wakeref_tracker
+__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w);
+void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
+ struct drm_printer *p);
+
+void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
+ struct drm_printer *p);
+void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
+ struct drm_printer *p);
+
+#else
+
+static inline void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w) {}
+static inline void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w) {}
+
+static inline intel_wakeref_t
+intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
+{
+ return -1;
+}
+
+static inline void
+intel_wakeref_untrack_remove(struct intel_wakeref_tracker *w, intel_wakeref_t handle) {}
+
+static inline struct intel_wakeref_tracker
+__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
+{
+ return (struct intel_wakeref_tracker){};
+}
+
+static inline void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
+ struct drm_printer *p)
+{
+}
+
+static inline void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w, struct drm_printer *p) {}
+static inline void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w, struct drm_printer *p) {}
+
+#endif
+
+#endif /* INTEL_WAKEREF_TRACKER_H */

--
2.34.1

2023-02-24 16:26:49

by Andrzej Hajda

[permalink] [raw]
Subject: [PATCH 3/3] drm/i915: Correct type of wakeref variable

Wakeref has dedicated type. Assumption it will be int
compatible forever is incorrect.

Signed-off-by: Andrzej Hajda <[email protected]>
---
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 978820f8697059..c35f551193c9ad 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -3235,7 +3235,7 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_guc *guc = container_of(w, struct intel_guc,
submission_state.destroyed_worker);
struct intel_gt *gt = guc_to_gt(guc);
- int tmp;
+ intel_wakeref_t tmp;

with_intel_gt_pm(gt, tmp)
deregister_destroyed_contexts(guc);

--
2.34.1

2023-02-24 16:26:51

by Andrzej Hajda

[permalink] [raw]
Subject: [PATCH 2/3] drm/i915: Track leaked gt->wakerefs

From: Chris Wilson <[email protected]>

Track every intel_gt_pm_get() until its corresponding release in
intel_gt_pm_put() by returning a cookie to the caller for acquire that
must be passed by on released. When there is an imbalance, we can see who
either tried to free a stale wakeref, or who forgot to free theirs.

Signed-off-by: Chris Wilson <[email protected]>
Signed-off-by: Andrzej Hajda <[email protected]>
---
drivers/gpu/drm/i915/Kconfig.debug | 15 ++++++++
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 7 ++--
.../drm/i915/gem/selftests/i915_gem_coherency.c | 10 +++---
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 14 ++++----
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++++---
drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 3 +-
drivers/gpu/drm/i915/gt/intel_engine_pm.c | 4 +--
drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 ++
.../gpu/drm/i915/gt/intel_execlists_submission.c | 2 +-
drivers/gpu/drm/i915/gt/intel_gt_pm.c | 10 +++---
drivers/gpu/drm/i915/gt/intel_gt_pm.h | 38 +++++++++++++++-----
drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 4 +--
drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 20 ++++++-----
drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 5 +--
drivers/gpu/drm/i915/gt/selftest_reset.c | 10 +++---
drivers/gpu/drm/i915/gt/selftest_rps.c | 17 +++++----
drivers/gpu/drm/i915/gt/selftest_slpc.c | 5 +--
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 9 ++---
drivers/gpu/drm/i915/i915_pmu.c | 16 +++++----
drivers/gpu/drm/i915/intel_wakeref.c | 4 +++
drivers/gpu/drm/i915/intel_wakeref.h | 42 ++++++++++++++++++++++
21 files changed, 180 insertions(+), 70 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 5fde52107e3b44..63b77dc48d4394 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -40,6 +40,7 @@ config DRM_I915_DEBUG
select DRM_I915_DEBUG_MMIO
select DRM_I915_TRACK_WAKEREF
select DRM_I915_DEBUG_RUNTIME_PM
+ select DRM_I915_DEBUG_WAKEREF
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
select BROKEN # for prototype uAPI
@@ -249,3 +250,17 @@ config DRM_I915_DEBUG_RUNTIME_PM
Recommended for driver developers only.

If in doubt, say "N"
+
+config DRM_I915_DEBUG_WAKEREF
+ bool "Enable extra tracking for wakerefs"
+ depends on DRM_I915
+ default n
+ select STACKDEPOT
+ select STACKTRACE
+ select DRM_I915_TRACK_WAKEREF
+ help
+ Choose this option to turn on extra state checking and usage
+ tracking for the wakerefPM functionality. This may introduce
+ overhead during driver runtime.
+
+ If in doubt, say "N"
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9dce2957b4e5ae..65abcd82b6de93 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -253,6 +253,7 @@ struct i915_execbuffer {
struct intel_gt *gt; /* gt for the execbuf */
struct intel_context *context; /* logical state for the request */
struct i915_gem_context *gem_context; /** caller's context */
+ intel_wakeref_t wakeref;

/** our requests to build */
struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
@@ -2713,7 +2714,7 @@ eb_select_engine(struct i915_execbuffer *eb)

for_each_child(ce, child)
intel_context_get(child);
- intel_gt_pm_get(ce->engine->gt);
+ eb->wakeref = intel_gt_pm_get(ce->engine->gt);

if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
err = intel_context_alloc_state(ce);
@@ -2752,7 +2753,7 @@ eb_select_engine(struct i915_execbuffer *eb)
return err;

err:
- intel_gt_pm_put(ce->engine->gt);
+ intel_gt_pm_put(ce->engine->gt, eb->wakeref);
for_each_child(ce, child)
intel_context_put(child);
intel_context_put(ce);
@@ -2765,7 +2766,7 @@ eb_put_engine(struct i915_execbuffer *eb)
struct intel_context *child;

i915_vm_put(eb->context->vm);
- intel_gt_pm_put(eb->gt);
+ intel_gt_pm_put(eb->context->engine->gt, eb->wakeref);
for_each_child(eb->context, child)
intel_context_put(child);
intel_context_put(eb->context);
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
index 3bef1beec7cbb5..3fd68a099a85ef 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -85,6 +85,7 @@ static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)

static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
{
+ intel_wakeref_t wakeref;
struct i915_vma *vma;
u32 __iomem *map;
int err = 0;
@@ -99,7 +100,7 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
if (IS_ERR(vma))
return PTR_ERR(vma);

- intel_gt_pm_get(vma->vm->gt);
+ wakeref = intel_gt_pm_get(vma->vm->gt);

map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
@@ -112,12 +113,13 @@ static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
i915_vma_unpin_iomap(vma);

out_rpm:
- intel_gt_pm_put(vma->vm->gt);
+ intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
{
+ intel_wakeref_t wakeref;
struct i915_vma *vma;
u32 __iomem *map;
int err = 0;
@@ -132,7 +134,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
if (IS_ERR(vma))
return PTR_ERR(vma);

- intel_gt_pm_get(vma->vm->gt);
+ wakeref = intel_gt_pm_get(vma->vm->gt);

map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
@@ -145,7 +147,7 @@ static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
i915_vma_unpin_iomap(vma);

out_rpm:
- intel_gt_pm_put(vma->vm->gt);
+ intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index 56279908ed305b..f6f36a85688814 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -630,14 +630,14 @@ static bool assert_mmap_offset(struct drm_i915_private *i915,
static void disable_retire_worker(struct drm_i915_private *i915)
{
i915_gem_driver_unregister__shrinker(i915);
- intel_gt_pm_get(to_gt(i915));
+ intel_gt_pm_get_untracked(to_gt(i915));
cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
}

static void restore_retire_worker(struct drm_i915_private *i915)
{
igt_flush_test(i915);
- intel_gt_pm_put(to_gt(i915));
+ intel_gt_pm_put_untracked(to_gt(i915));
i915_gem_driver_register__shrinker(i915);
}

@@ -778,6 +778,7 @@ static int igt_mmap_offset_exhaustion(void *arg)

static int gtt_set(struct drm_i915_gem_object *obj)
{
+ intel_wakeref_t wakeref;
struct i915_vma *vma;
void __iomem *map;
int err = 0;
@@ -786,7 +787,7 @@ static int gtt_set(struct drm_i915_gem_object *obj)
if (IS_ERR(vma))
return PTR_ERR(vma);

- intel_gt_pm_get(vma->vm->gt);
+ wakeref = intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
if (IS_ERR(map)) {
@@ -798,12 +799,13 @@ static int gtt_set(struct drm_i915_gem_object *obj)
i915_vma_unpin_iomap(vma);

out:
- intel_gt_pm_put(vma->vm->gt);
+ intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

static int gtt_check(struct drm_i915_gem_object *obj)
{
+ intel_wakeref_t wakeref;
struct i915_vma *vma;
void __iomem *map;
int err = 0;
@@ -812,7 +814,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
if (IS_ERR(vma))
return PTR_ERR(vma);

- intel_gt_pm_get(vma->vm->gt);
+ wakeref = intel_gt_pm_get(vma->vm->gt);
map = i915_vma_pin_iomap(vma);
i915_vma_unpin(vma);
if (IS_ERR(map)) {
@@ -828,7 +830,7 @@ static int gtt_check(struct drm_i915_gem_object *obj)
i915_vma_unpin_iomap(vma);

out:
- intel_gt_pm_put(vma->vm->gt);
+ intel_gt_pm_put(vma->vm->gt, wakeref);
return err;
}

diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b9526..d650beb8ed22f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)

static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref;
+
/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference.
*/
- if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+ wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+ if (GEM_WARN_ON(!wakeref))
return;

/*
@@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
- WRITE_ONCE(b->irq_armed, true);
+ WRITE_ONCE(b->irq_armed, wakeref);

/* Requests may have completed before we could enable the interrupt. */
if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)

static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref = b->irq_armed;
+
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
b->irq_disable(b);

- WRITE_ONCE(b->irq_armed, false);
- intel_gt_pm_put_async(b->irq_engine->gt);
+ WRITE_ONCE(b->irq_armed, 0);
+ intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}

static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c4c33..bdf09fd67b6e70 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
#include <linux/types.h>

#include "intel_engine_types.h"
+#include "intel_wakeref.h"

/*
* Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
- bool irq_armed;
+ intel_wakeref_t irq_armed;

/* Not all breadcrumbs are attached to physical HW */
intel_engine_mask_t engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index e971b153fda976..7db9229d28639c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)

ENGINE_TRACE(engine, "\n");

- intel_gt_pm_get(engine->gt);
+ engine->wakeref_track = intel_gt_pm_get(engine->gt);

/* Discard stale context state from across idling */
ce = engine->kernel_context;
@@ -276,7 +276,7 @@ static int __engine_park(struct intel_wakeref *wf)
engine->park(engine);

/* While gt calls i915_vma_parked(), we have to break the lock cycle */
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
return 0;
}

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 0a071e5da1a85d..2791d487ab114c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -431,7 +431,9 @@ struct intel_engine_cs {
unsigned long serial;

unsigned long wakeref_serial;
+ intel_wakeref_t wakeref_track;
struct intel_wakeref wakeref;
+
struct file *default_state;

struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 3c573d41d40468..80d90aa305f1ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
if (engine->fw_domain && !--engine->fw_active)
intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async_untracked(engine->gt);

/*
* If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index cef3d6f5c34e01..302f908b37fb2e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -49,19 +49,20 @@ static void mtl_media_idle(struct intel_gt *gt)
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
int count = atomic_read(&gt->user_wakeref);
+ intel_wakeref_t wakeref;

/* Inside suspend/resume so single threaded, no races to worry about. */
if (likely(!count))
return;

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
if (suspend) {
GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
atomic_sub(count, &gt->wakeref.count);
} else {
atomic_add(count, &gt->wakeref.count);
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
}

static void runtime_begin(struct intel_gt *gt)
@@ -248,6 +249,7 @@ int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err;

err = intel_gt_has_unrecoverable_error(gt);
@@ -264,7 +266,7 @@ int intel_gt_resume(struct intel_gt *gt)
*/
gt_sanitize(gt, true);

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);

intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
@@ -307,7 +309,7 @@ int intel_gt_resume(struct intel_gt *gt)

out_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;

err_wedged:
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 6c9a4645236467..2ae5fbbede6ca8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
return intel_wakeref_is_active(&gt->wakeref);
}

-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
{
intel_wakeref_get(&gt->wakeref);
}

+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+ intel_gt_pm_get_untracked(gt);
+ return intel_wakeref_track(&gt->wakeref);
+}
+
static inline void __intel_gt_pm_get(struct intel_gt *gt)
{
__intel_wakeref_get(&gt->wakeref);
}

-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
- return intel_wakeref_get_if_active(&gt->wakeref);
+ if (!intel_wakeref_get_if_active(&gt->wakeref))
+ return 0;
+
+ return intel_wakeref_track(&gt->wakeref);
}

static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
intel_wakeref_might_get(&gt->wakeref);
}

-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
}

-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
{
intel_wakeref_put_async(&gt->wakeref);
}
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
intel_wakeref_might_put(&gt->wakeref);
}

-#define with_intel_gt_pm(gt, tmp) \
- for (tmp = 1, intel_gt_pm_get(gt); tmp; \
- intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+ for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)

/**
* with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
* @wf: pointer to a temporary wakeref.
*/
#define with_intel_gt_pm_if_awake(gt, wf) \
- for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)

static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 83df4cd5e06cb9..0f5ba4491e4de8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -27,7 +27,7 @@
void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
- intel_gt_pm_get(gt);
+ intel_gt_pm_get_untracked(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
}
@@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_untracked(gt);
atomic_dec(&gt->user_wakeref);
}

diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 542ce6d2de1922..f3d21bdfd7f639 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
return *a - *b;
}

-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
{
- intel_gt_pm_get(gt);
+ intel_wakeref_t wakeref = intel_gt_pm_get(gt);

/* Boost gpufreq to max [waitboost] and keep it fixed */
atomic_inc(&gt->rps.num_waiters);
schedule_work(&gt->rps.work);
flush_work(&gt->rps.work);
+
+ return wakeref;
}

-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
{
atomic_dec(&gt->rps.num_waiters);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

return igt_flush_test(gt->i915);
}
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;

if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;

- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *batch;
@@ -207,7 +210,7 @@ static int perf_mi_bb_start(void *arg)
pr_info("%s: MI_BB_START cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;

return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;

if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;

- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ static int perf_mi_noop(void *arg)
pr_info("%s: 16K MI_NOOP cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;

return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index b46425aeb2f048..23bd3ab9ddeea1 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;

if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);

for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
}

intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index a9e0a91bc0e026..ab464cd72b8c39 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -257,11 +257,12 @@ static int igt_atomic_reset(void *arg)
{
struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
+ intel_wakeref_t wakeref;
int err = 0;

/* Check that the resets are usable from atomic context */

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);

/* Flush any requests before we get started and check basics */
@@ -292,7 +293,7 @@ static int igt_atomic_reset(void *arg)

unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

return err;
}
@@ -303,6 +304,7 @@ static int igt_atomic_engine_reset(void *arg)
const typeof(*igt_atomic_phases) *p;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;

/* Check that the resets are usable from atomic context */
@@ -313,7 +315,7 @@ static int igt_atomic_engine_reset(void *arg)
if (intel_uc_uses_guc_submission(&gt->uc))
return 0;

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);

/* Flush any requests before we get started and check basics */
@@ -361,7 +363,7 @@ static int igt_atomic_engine_reset(void *arg)

out_unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index 6755bbc4ebdac0..7052a52e44d74e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -223,6 +223,7 @@ int live_rps_clock_interval(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;

if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -235,7 +236,7 @@ int live_rps_clock_interval(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_rps_disable(&gt->rps);

intel_gt_check_clock_frequency(gt);
@@ -354,7 +355,7 @@ int live_rps_clock_interval(void *arg)
}

intel_rps_enable(&gt->rps);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

igt_spinner_fini(&spin);

@@ -375,6 +376,7 @@ int live_rps_control(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;

/*
@@ -397,7 +399,7 @@ int live_rps_control(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;

- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
ktime_t min_dt, max_dt;
@@ -487,7 +489,7 @@ int live_rps_control(void *arg)
break;
}
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);

igt_spinner_fini(&spin);

@@ -1022,6 +1024,7 @@ int live_rps_interrupt(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
u32 pm_events;
int err = 0;

@@ -1032,9 +1035,9 @@ int live_rps_interrupt(void *arg)
if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
return 0;

- intel_gt_pm_get(gt);
- pm_events = rps->pm_events;
- intel_gt_pm_put(gt);
+ pm_events = 0;
+ with_intel_gt_pm(gt, wakeref)
+ pm_events = rps->pm_events;
if (!pm_events) {
pr_err("No RPS PM events registered, but RPS is enabled?\n");
return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index bd44ce73a5044e..81fa891eac19fc 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -241,6 +241,7 @@ static int run_test(struct intel_gt *gt, int test_type)
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
struct igt_spinner spin;
u32 slpc_min_freq, slpc_max_freq;
int err = 0;
@@ -278,7 +279,7 @@ static int run_test(struct intel_gt *gt, int test_type)
}

intel_gt_pm_wait_for_idle(gt);
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
u32 max_act_freq;
@@ -365,7 +366,7 @@ static int run_test(struct intel_gt *gt, int test_type)
if (igt_flush_test(gt->i915))
err = -EIO;

- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
intel_gt_pm_wait_for_idle(gt);

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index be495e657d66bd..978820f8697059 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1106,7 +1106,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -1302,6 +1302,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
unsigned long flags;
u32 reset_count;
bool in_reset;
+ intel_wakeref_t wakeref;

spin_lock_irqsave(&guc->timestamp.lock, flags);

@@ -1324,7 +1325,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+ if (!in_reset && (wakeref = intel_gt_pm_get_if_awake(gt))) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
/*
@@ -1333,7 +1334,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
*/
guc_update_engine_gt_clks(engine);
guc_update_pm_timestamp(guc, now);
- intel_gt_pm_put_async(gt);
+ intel_gt_pm_put_async(gt, wakeref);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -4545,7 +4546,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
intel_context_put(ce);
} else if (context_destroyed(ce)) {
/* Context has been destroyed */
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 52531ab28c5f55..737d45d11d9f1f 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -168,19 +168,19 @@ static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_pmu *pmu = &i915->pmu;
+ intel_wakeref_t wakeref;
unsigned long flags;
- bool awake = false;
u64 val;

- if (intel_gt_pm_get_if_awake(gt)) {
+ wakeref = intel_gt_pm_get_if_awake(gt);
+ if (wakeref) {
val = __get_rc6(gt);
- intel_gt_pm_put_async(gt);
- awake = true;
+ intel_gt_pm_put_async(gt, wakeref);
}

spin_lock_irqsave(&pmu->lock, flags);

- if (awake) {
+ if (wakeref) {
pmu->sample[__I915_SAMPLE_RC6].cur = val;
} else {
/*
@@ -373,12 +373,14 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
struct drm_i915_private *i915 = gt->i915;
struct i915_pmu *pmu = &i915->pmu;
struct intel_rps *rps = &gt->rps;
+ intel_wakeref_t wakeref;

if (!frequency_sampling_enabled(pmu))
return;

/* Report 0/0 (actual/requested) frequency while parked. */
- if (!intel_gt_pm_get_if_awake(gt))
+ wakeref = intel_gt_pm_get_if_awake(gt);
+ if (!wakeref)
return;

if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) {
@@ -409,7 +411,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
period_ns / 1000);
}

- intel_gt_pm_put_async(gt);
+ intel_gt_pm_put_async(gt, wakeref);
}

static enum hrtimer_restart i915_sample(struct hrtimer *hrtimer)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index dfd87d08221807..db4887e33ea607 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -108,6 +108,10 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
INIT_DELAYED_WORK(&wf->work, __intel_wakeref_put_work);
lockdep_init_map(&wf->work.work.lockdep_map,
"wakeref.work", &key->work, 0);
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+ intel_wakeref_tracker_init(&wf->debug);
+#endif
}

int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 20720fbcc28d46..f2de4ccb7f5377 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -43,6 +43,10 @@ struct intel_wakeref {
const struct intel_wakeref_ops *ops;

struct delayed_work work;
+
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+ struct intel_wakeref_tracker debug;
+#endif
};

struct intel_wakeref_lockclass {
@@ -261,6 +265,44 @@ __intel_wakeref_defer_park(struct intel_wakeref *wf)
*/
int intel_wakeref_wait_for_idle(struct intel_wakeref *wf);

+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+ return intel_wakeref_tracker_add(&wf->debug);
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+ intel_wakeref_t handle)
+{
+ intel_wakeref_tracker_remove(&wf->debug, handle);
+}
+
+static inline void intel_wakeref_show(struct intel_wakeref *wf,
+ struct drm_printer *p)
+{
+ intel_wakeref_tracker_show(&wf->debug, p);
+}
+
+#else
+
+static inline intel_wakeref_t intel_wakeref_track(struct intel_wakeref *wf)
+{
+ return -1;
+}
+
+static inline void intel_wakeref_untrack(struct intel_wakeref *wf,
+ intel_wakeref_t handle)
+{
+}
+
+static inline void intel_wakeref_show(struct intel_wakeref *wf,
+ struct drm_printer *p)
+{
+}
+
+#endif
+
struct intel_wakeref_auto {
struct intel_runtime_pm *rpm;
struct timer_list timer;

--
2.34.1

2023-02-27 11:50:50

by Jani Nikula

[permalink] [raw]
Subject: Re: [PATCH 1/3] drm/i915: Separate wakeref tracking

On Fri, 24 Feb 2023, Andrzej Hajda <[email protected]> wrote:
> From: Chris Wilson <[email protected]>
>
> Extract the callstack tracking of intel_runtime_pm.c into its own
> utility so that that we can reuse it for other online debugging of
> scoped wakerefs.
>
> Signed-off-by: Chris Wilson <[email protected]>
> Signed-off-by: Andrzej Hajda <[email protected]>
> ---
> drivers/gpu/drm/i915/Kconfig.debug | 9 +
> drivers/gpu/drm/i915/Makefile | 4 +
> drivers/gpu/drm/i915/intel_runtime_pm.c | 244 +++------------------------
> drivers/gpu/drm/i915/intel_runtime_pm.h | 10 +-
> drivers/gpu/drm/i915/intel_wakeref.h | 6 +-
> drivers/gpu/drm/i915/intel_wakeref_tracker.c | 234 +++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_wakeref_tracker.h | 76 +++++++++
> 7 files changed, 355 insertions(+), 228 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
> index 93dfb7ed970547..5fde52107e3b44 100644
> --- a/drivers/gpu/drm/i915/Kconfig.debug
> +++ b/drivers/gpu/drm/i915/Kconfig.debug
> @@ -25,6 +25,7 @@ config DRM_I915_DEBUG
> select PREEMPT_COUNT
> select I2C_CHARDEV
> select STACKDEPOT
> + select STACKTRACE
> select DRM_DP_AUX_CHARDEV
> select X86_MSR # used by igt/pm_rpm
> select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
> @@ -37,6 +38,7 @@ config DRM_I915_DEBUG
> select DRM_I915_DEBUG_GEM
> select DRM_I915_DEBUG_GEM_ONCE
> select DRM_I915_DEBUG_MMIO
> + select DRM_I915_TRACK_WAKEREF
> select DRM_I915_DEBUG_RUNTIME_PM
> select DRM_I915_SW_FENCE_DEBUG_OBJECTS
> select DRM_I915_SELFTEST
> @@ -227,11 +229,18 @@ config DRM_I915_DEBUG_VBLANK_EVADE
>
> If in doubt, say "N".
>
> +config DRM_I915_TRACK_WAKEREF
> + depends on STACKDEPOT
> + depends on STACKTRACE
> + bool
> +
> config DRM_I915_DEBUG_RUNTIME_PM
> bool "Enable extra state checking for runtime PM"
> depends on DRM_I915
> default n
> select STACKDEPOT
> + select STACKTRACE
> + select DRM_I915_TRACK_WAKEREF
> help
> Choose this option to turn on extra state checking for the
> runtime PM functionality. This may introduce overhead during
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index b2f91a1f826858..42daff6d575a82 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -81,6 +81,10 @@ i915-$(CONFIG_DEBUG_FS) += \
> i915_debugfs_params.o \
> display/intel_display_debugfs.o \
> display/intel_pipe_crc.o
> +
> +i915-$(CONFIG_DRM_I915_TRACK_WAKEREF) += \
> + intel_wakeref_tracker.o
> +
> i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>
> # "Graphics Technology" (aka we talk to the gpu)
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
> index 129746713d072f..72887e2bb03c21 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
> @@ -52,182 +52,37 @@
>
> #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
>
> -#include <linux/sort.h>
> -
> -#define STACKDEPTH 8
> -
> -static noinline depot_stack_handle_t __save_depot_stack(void)
> -{
> - unsigned long entries[STACKDEPTH];
> - unsigned int n;
> -
> - n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> - return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
> -}
> -
> static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
> {
> - spin_lock_init(&rpm->debug.lock);
> - stack_depot_init();
> + intel_wakeref_tracker_init(&rpm->debug);
> }
>
> -static noinline depot_stack_handle_t
> +static intel_wakeref_t
> track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
> {
> - depot_stack_handle_t stack, *stacks;
> - unsigned long flags;
> -
> - if (rpm->no_wakeref_tracking)
> - return -1;
> -
> - stack = __save_depot_stack();
> - if (!stack)
> + if (!rpm->available)
> return -1;
>
> - spin_lock_irqsave(&rpm->debug.lock, flags);
> -
> - if (!rpm->debug.count)
> - rpm->debug.last_acquire = stack;
> -
> - stacks = krealloc(rpm->debug.owners,
> - (rpm->debug.count + 1) * sizeof(*stacks),
> - GFP_NOWAIT | __GFP_NOWARN);
> - if (stacks) {
> - stacks[rpm->debug.count++] = stack;
> - rpm->debug.owners = stacks;
> - } else {
> - stack = -1;
> - }
> -
> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
> -
> - return stack;
> + return intel_wakeref_tracker_add(&rpm->debug);
> }
>
> static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
> - depot_stack_handle_t stack)
> + intel_wakeref_t wakeref)
> {
> - struct drm_i915_private *i915 = container_of(rpm,
> - struct drm_i915_private,
> - runtime_pm);
> - unsigned long flags, n;
> - bool found = false;
> -
> - if (unlikely(stack == -1))
> - return;
> -
> - spin_lock_irqsave(&rpm->debug.lock, flags);
> - for (n = rpm->debug.count; n--; ) {
> - if (rpm->debug.owners[n] == stack) {
> - memmove(rpm->debug.owners + n,
> - rpm->debug.owners + n + 1,
> - (--rpm->debug.count - n) * sizeof(stack));
> - found = true;
> - break;
> - }
> - }
> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
> -
> - if (drm_WARN(&i915->drm, !found,
> - "Unmatched wakeref (tracking %lu), count %u\n",
> - rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
> - char *buf;
> -
> - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> - if (!buf)
> - return;
> -
> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
> - DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
> -
> - stack = READ_ONCE(rpm->debug.last_release);
> - if (stack) {
> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
> - DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
> - }
> -
> - kfree(buf);
> - }
> + intel_wakeref_tracker_remove(&rpm->debug, wakeref);
> }
>
> -static int cmphandle(const void *_a, const void *_b)
> +static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
> {
> - const depot_stack_handle_t * const a = _a, * const b = _b;
> + struct drm_printer p = drm_debug_printer("i915");
>
> - if (*a < *b)
> - return -1;
> - else if (*a > *b)
> - return 1;
> - else
> - return 0;
> -}
> -
> -static void
> -__print_intel_runtime_pm_wakeref(struct drm_printer *p,
> - const struct intel_runtime_pm_debug *dbg)
> -{
> - unsigned long i;
> - char *buf;
> -
> - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> - if (!buf)
> - return;
> -
> - if (dbg->last_acquire) {
> - stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
> - drm_printf(p, "Wakeref last acquired:\n%s", buf);
> - }
> -
> - if (dbg->last_release) {
> - stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
> - drm_printf(p, "Wakeref last released:\n%s", buf);
> - }
> -
> - drm_printf(p, "Wakeref count: %lu\n", dbg->count);
> -
> - sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
> -
> - for (i = 0; i < dbg->count; i++) {
> - depot_stack_handle_t stack = dbg->owners[i];
> - unsigned long rep;
> -
> - rep = 1;
> - while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
> - rep++, i++;
> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
> - drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
> - }
> -
> - kfree(buf);
> -}
> -
> -static noinline void
> -__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
> - struct intel_runtime_pm_debug *saved)
> -{
> - *saved = *debug;
> -
> - debug->owners = NULL;
> - debug->count = 0;
> - debug->last_release = __save_depot_stack();
> -}
> -
> -static void
> -dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
> -{
> - if (debug->count) {
> - struct drm_printer p = drm_debug_printer("i915");
> -
> - __print_intel_runtime_pm_wakeref(&p, debug);
> - }
> -
> - kfree(debug->owners);
> + intel_wakeref_tracker_reset(&rpm->debug, &p);
> }
>
> static noinline void
> __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
> {
> - struct intel_runtime_pm_debug dbg = {};
> + struct intel_wakeref_tracker saved;
> unsigned long flags;
>
> if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
> @@ -235,60 +90,21 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
> flags))
> return;
>
> - __untrack_all_wakerefs(&rpm->debug, &dbg);
> + saved = __intel_wakeref_tracker_reset(&rpm->debug);
> spin_unlock_irqrestore(&rpm->debug.lock, flags);
>
> - dump_and_free_wakeref_tracking(&dbg);
> -}
> -
> -static noinline void
> -untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
> -{
> - struct intel_runtime_pm_debug dbg = {};
> - unsigned long flags;
> -
> - spin_lock_irqsave(&rpm->debug.lock, flags);
> - __untrack_all_wakerefs(&rpm->debug, &dbg);
> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
> + if (saved.count) {
> + struct drm_printer p = drm_debug_printer("i915");
>
> - dump_and_free_wakeref_tracking(&dbg);
> + __intel_wakeref_tracker_show(&saved, &p);
> + intel_wakeref_tracker_fini(&saved);
> + }
> }
>
> void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
> struct drm_printer *p)
> {
> - struct intel_runtime_pm_debug dbg = {};
> -
> - do {
> - unsigned long alloc = dbg.count;
> - depot_stack_handle_t *s;
> -
> - spin_lock_irq(&rpm->debug.lock);
> - dbg.count = rpm->debug.count;
> - if (dbg.count <= alloc) {
> - memcpy(dbg.owners,
> - rpm->debug.owners,
> - dbg.count * sizeof(*s));
> - }
> - dbg.last_acquire = rpm->debug.last_acquire;
> - dbg.last_release = rpm->debug.last_release;
> - spin_unlock_irq(&rpm->debug.lock);
> - if (dbg.count <= alloc)
> - break;
> -
> - s = krealloc(dbg.owners,
> - dbg.count * sizeof(*s),
> - GFP_NOWAIT | __GFP_NOWARN);
> - if (!s)
> - goto out;
> -
> - dbg.owners = s;
> - } while (1);
> -
> - __print_intel_runtime_pm_wakeref(p, &dbg);
> -
> -out:
> - kfree(dbg.owners);
> + intel_wakeref_tracker_show(&rpm->debug, p);
> }
>
> #else
> @@ -297,14 +113,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
> {
> }
>
> -static depot_stack_handle_t
> +static intel_wakeref_t
> track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
> {
> return -1;
> }
>
> static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
> - intel_wakeref_t wref)
> + intel_wakeref_t wakeref)
> {
> }
>
> @@ -349,9 +165,8 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
> static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
> bool wakelock)
> {
> - struct drm_i915_private *i915 = container_of(rpm,
> - struct drm_i915_private,
> - runtime_pm);
> + struct drm_i915_private *i915 =
> + container_of(rpm, struct drm_i915_private, runtime_pm);
> int ret;
>
> ret = pm_runtime_get_sync(rpm->kdev);
> @@ -556,9 +371,8 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
> */
> void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
> {
> - struct drm_i915_private *i915 = container_of(rpm,
> - struct drm_i915_private,
> - runtime_pm);
> + struct drm_i915_private *i915 =
> + container_of(rpm, struct drm_i915_private, runtime_pm);
> struct device *kdev = rpm->kdev;
>
> /*
> @@ -611,9 +425,8 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
>
> void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
> {
> - struct drm_i915_private *i915 = container_of(rpm,
> - struct drm_i915_private,
> - runtime_pm);
> + struct drm_i915_private *i915 =
> + container_of(rpm, struct drm_i915_private, runtime_pm);
> struct device *kdev = rpm->kdev;
>
> /* Transfer rpm ownership back to core */
> @@ -628,9 +441,8 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
>
> void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
> {
> - struct drm_i915_private *i915 = container_of(rpm,
> - struct drm_i915_private,
> - runtime_pm);
> + struct drm_i915_private *i915 =
> + container_of(rpm, struct drm_i915_private, runtime_pm);
> int count = atomic_read(&rpm->wakeref_count);
>
> intel_wakeref_auto_fini(&rpm->userfault_wakeref);
> @@ -646,7 +458,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
> void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
> {
> struct drm_i915_private *i915 =
> - container_of(rpm, struct drm_i915_private, runtime_pm);
> + container_of(rpm, struct drm_i915_private, runtime_pm);

Lots of unrelated changes above that should be separate patches.

> struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> struct device *kdev = &pdev->dev;
>
> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
> index e592e8d6499a1f..a8dc2baf79844f 100644
> --- a/drivers/gpu/drm/i915/intel_runtime_pm.h
> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
> @@ -83,15 +83,7 @@ struct intel_runtime_pm {
> * paired rpm_put) we can remove corresponding pairs of and keep
> * the array trimmed to active wakerefs.
> */
> - struct intel_runtime_pm_debug {
> - spinlock_t lock;
> -
> - depot_stack_handle_t last_acquire;
> - depot_stack_handle_t last_release;
> -
> - depot_stack_handle_t *owners;
> - unsigned long count;
> - } debug;
> + struct intel_wakeref_tracker debug;

There's a lot going on in the patch all around. Adding the struct to a
separate file could maybe be an individual patch to simplify the actual
changes.

This doesn't include the file that defines struct intel_wakeref_tracker;
it's included via intel_wakeref.h. But only if

> #endif
> };
>
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
> index 71b8a63f6f104d..20720fbcc28d46 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -17,7 +17,9 @@
> #include <linux/timer.h>
> #include <linux/workqueue.h>
>
> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
> +#include "intel_wakeref_tracker.h"
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
> #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
> #else
> #define INTEL_WAKEREF_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
> @@ -26,8 +28,6 @@
> struct intel_runtime_pm;
> struct intel_wakeref;
>
> -typedef depot_stack_handle_t intel_wakeref_t;
> -
> struct intel_wakeref_ops {
> int (*get)(struct intel_wakeref *wf);
> int (*put)(struct intel_wakeref *wf);
> diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.c b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
> new file mode 100644
> index 00000000000000..a0bcef13a1085a
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
> @@ -0,0 +1,234 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2021 Intel Corporation
> + */
> +
> +#include <linux/slab.h>
> +#include <linux/stackdepot.h>
> +#include <linux/stacktrace.h>
> +#include <linux/sort.h>
> +
> +#include <drm/drm_print.h>
> +
> +#include "intel_wakeref.h"

This should really include the corresponding .h
i.e. intel_wakeref_tracker.h too. Now it gets included via
intel_wakeref.h but I'm not sure why.

> +
> +#define STACKDEPTH 8
> +
> +static noinline depot_stack_handle_t __save_depot_stack(void)
> +{
> + unsigned long entries[STACKDEPTH];
> + unsigned int n;
> +
> + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
> + return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
> +}
> +
> +static void __print_depot_stack(depot_stack_handle_t stack,
> + char *buf, int sz, int indent)
> +{
> + unsigned long *entries;
> + unsigned int nr_entries;
> +
> + nr_entries = stack_depot_fetch(stack, &entries);
> + stack_trace_snprint(buf, sz, entries, nr_entries, indent);
> +}
> +
> +static int cmphandle(const void *_a, const void *_b)
> +{
> + const depot_stack_handle_t * const a = _a, * const b = _b;
> +
> + if (*a < *b)
> + return -1;
> + else if (*a > *b)
> + return 1;
> + else
> + return 0;
> +}
> +
> +void
> +__intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
> + struct drm_printer *p)
> +{
> + unsigned long i;
> + char *buf;
> +
> + buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> + if (!buf)
> + return;
> +
> + if (w->last_acquire) {
> + __print_depot_stack(w->last_acquire, buf, PAGE_SIZE, 2);
> + drm_printf(p, "Wakeref last acquired:\n%s", buf);
> + }
> +
> + if (w->last_release) {
> + __print_depot_stack(w->last_release, buf, PAGE_SIZE, 2);
> + drm_printf(p, "Wakeref last released:\n%s", buf);
> + }
> +
> + drm_printf(p, "Wakeref count: %lu\n", w->count);
> +
> + sort(w->owners, w->count, sizeof(*w->owners), cmphandle, NULL);
> +
> + for (i = 0; i < w->count; i++) {
> + depot_stack_handle_t stack = w->owners[i];
> + unsigned long rep;
> +
> + rep = 1;
> + while (i + 1 < w->count && w->owners[i + 1] == stack)
> + rep++, i++;
> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
> + drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
> + }
> +
> + kfree(buf);
> +}
> +
> +void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
> + struct drm_printer *p)
> +{
> + struct intel_wakeref_tracker tmp = {};
> +
> + do {
> + unsigned long alloc = tmp.count;
> + depot_stack_handle_t *s;
> +
> + spin_lock_irq(&w->lock);
> + tmp.count = w->count;
> + if (tmp.count <= alloc)
> + memcpy(tmp.owners, w->owners, tmp.count * sizeof(*s));
> + tmp.last_acquire = w->last_acquire;
> + tmp.last_release = w->last_release;
> + spin_unlock_irq(&w->lock);
> + if (tmp.count <= alloc)
> + break;
> +
> + s = krealloc(tmp.owners,
> + tmp.count * sizeof(*s),
> + GFP_NOWAIT | __GFP_NOWARN);
> + if (!s)
> + goto out;
> +
> + tmp.owners = s;
> + } while (1);
> +
> + __intel_wakeref_tracker_show(&tmp, p);
> +
> +out:
> + intel_wakeref_tracker_fini(&tmp);
> +}
> +
> +intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
> +{
> + depot_stack_handle_t stack, *stacks;
> + unsigned long flags;
> +
> + stack = __save_depot_stack();
> + if (!stack)
> + return -1;
> +
> + spin_lock_irqsave(&w->lock, flags);
> +
> + if (!w->count)
> + w->last_acquire = stack;
> +
> + stacks = krealloc(w->owners,
> + (w->count + 1) * sizeof(*stacks),
> + GFP_NOWAIT | __GFP_NOWARN);
> + if (stacks) {
> + stacks[w->count++] = stack;
> + w->owners = stacks;
> + } else {
> + stack = -1;
> + }
> +
> + spin_unlock_irqrestore(&w->lock, flags);
> +
> + return stack;
> +}
> +
> +void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
> + intel_wakeref_t stack)
> +{
> + unsigned long flags, n;
> + bool found = false;
> +
> + if (unlikely(stack == -1))
> + return;
> +
> + spin_lock_irqsave(&w->lock, flags);
> + for (n = w->count; n--; ) {
> + if (w->owners[n] == stack) {
> + memmove(w->owners + n,
> + w->owners + n + 1,
> + (--w->count - n) * sizeof(stack));
> + found = true;
> + break;
> + }
> + }
> + spin_unlock_irqrestore(&w->lock, flags);
> +
> + if (WARN(!found,
> + "Unmatched wakeref %x, tracking %lu\n",
> + stack, w->count)) {
> + char *buf;
> +
> + buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
> + if (!buf)
> + return;
> +
> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
> + pr_err("wakeref %x from\n%s", stack, buf);
> +
> + stack = READ_ONCE(w->last_release);
> + if (stack && !w->count) {
> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
> + pr_err("wakeref last released at\n%s", buf);
> + }
> +
> + kfree(buf);
> + }
> +}
> +
> +struct intel_wakeref_tracker
> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
> +{
> + struct intel_wakeref_tracker saved;
> +
> + lockdep_assert_held(&w->lock);
> +
> + saved = *w;
> +
> + w->owners = NULL;
> + w->count = 0;
> + w->last_release = __save_depot_stack();
> +
> + return saved;
> +}
> +
> +void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
> + struct drm_printer *p)
> +{
> + struct intel_wakeref_tracker tmp;
> +
> + spin_lock_irq(&w->lock);
> + tmp = __intel_wakeref_tracker_reset(w);
> + spin_unlock_irq(&w->lock);
> +
> + if (tmp.count)
> + __intel_wakeref_tracker_show(&tmp, p);
> +
> + intel_wakeref_tracker_fini(&tmp);
> +}
> +
> +void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w)
> +{
> + memset(w, 0, sizeof(*w));
> + spin_lock_init(&w->lock);
> + stack_depot_init();
> +}
> +
> +void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w)
> +{
> + kfree(w->owners);
> +}
> diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.h b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
> new file mode 100644
> index 00000000000000..61df68e28c0fbf
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
> @@ -0,0 +1,76 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2019 Intel Corporation
> + */
> +
> +#ifndef INTEL_WAKEREF_TRACKER_H
> +#define INTEL_WAKEREF_TRACKER_H
> +
> +#include <linux/kconfig.h>
> +#include <linux/spinlock.h>
> +#include <linux/stackdepot.h>
> +
> +typedef depot_stack_handle_t intel_wakeref_t;
> +
> +struct drm_printer;
> +
> +struct intel_wakeref_tracker {
> + spinlock_t lock;
> +
> + depot_stack_handle_t last_acquire;
> + depot_stack_handle_t last_release;
> +
> + depot_stack_handle_t *owners;
> + unsigned long count;
> +};
> +
> +#if IS_ENABLED(CONFIG_DRM_I915_TRACK_WAKEREF)
> +
> +void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w);
> +void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w);
> +
> +intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w);
> +void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
> + intel_wakeref_t handle);
> +
> +struct intel_wakeref_tracker
> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w);
> +void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
> + struct drm_printer *p);
> +
> +void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
> + struct drm_printer *p);
> +void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
> + struct drm_printer *p);
> +
> +#else
> +
> +static inline void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w) {}
> +static inline void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w) {}
> +
> +static inline intel_wakeref_t
> +intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
> +{
> + return -1;
> +}
> +
> +static inline void
> +intel_wakeref_untrack_remove(struct intel_wakeref_tracker *w, intel_wakeref_t handle) {}
> +
> +static inline struct intel_wakeref_tracker
> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
> +{
> + return (struct intel_wakeref_tracker){};
> +}
> +
> +static inline void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
> + struct drm_printer *p)
> +{
> +}
> +
> +static inline void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w, struct drm_printer *p) {}
> +static inline void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w, struct drm_printer *p) {}
> +
> +#endif
> +
> +#endif /* INTEL_WAKEREF_TRACKER_H */

--
Jani Nikula, Intel Open Source Graphics Center

2023-03-01 21:42:58

by Andrzej Hajda

[permalink] [raw]
Subject: Re: [Intel-gfx] [PATCH 1/3] drm/i915: Separate wakeref tracking

On 27.02.2023 12:50, Jani Nikula wrote:
> On Fri, 24 Feb 2023, Andrzej Hajda <[email protected]> wrote:
>> From: Chris Wilson <[email protected]>
>>
>> Extract the callstack tracking of intel_runtime_pm.c into its own
>> utility so that that we can reuse it for other online debugging of
>> scoped wakerefs.
>>
>> Signed-off-by: Chris Wilson <[email protected]>
>> Signed-off-by: Andrzej Hajda <[email protected]>
>> ---
>> drivers/gpu/drm/i915/Kconfig.debug | 9 +
>> drivers/gpu/drm/i915/Makefile | 4 +
>> drivers/gpu/drm/i915/intel_runtime_pm.c | 244 +++------------------------
>> drivers/gpu/drm/i915/intel_runtime_pm.h | 10 +-
>> drivers/gpu/drm/i915/intel_wakeref.h | 6 +-
>> drivers/gpu/drm/i915/intel_wakeref_tracker.c | 234 +++++++++++++++++++++++++
>> drivers/gpu/drm/i915/intel_wakeref_tracker.h | 76 +++++++++
>> 7 files changed, 355 insertions(+), 228 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
>> index 93dfb7ed970547..5fde52107e3b44 100644
>> --- a/drivers/gpu/drm/i915/Kconfig.debug
>> +++ b/drivers/gpu/drm/i915/Kconfig.debug
>> @@ -25,6 +25,7 @@ config DRM_I915_DEBUG
>> select PREEMPT_COUNT
>> select I2C_CHARDEV
>> select STACKDEPOT
>> + select STACKTRACE
>> select DRM_DP_AUX_CHARDEV
>> select X86_MSR # used by igt/pm_rpm
>> select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
>> @@ -37,6 +38,7 @@ config DRM_I915_DEBUG
>> select DRM_I915_DEBUG_GEM
>> select DRM_I915_DEBUG_GEM_ONCE
>> select DRM_I915_DEBUG_MMIO
>> + select DRM_I915_TRACK_WAKEREF
>> select DRM_I915_DEBUG_RUNTIME_PM
>> select DRM_I915_SW_FENCE_DEBUG_OBJECTS
>> select DRM_I915_SELFTEST
>> @@ -227,11 +229,18 @@ config DRM_I915_DEBUG_VBLANK_EVADE
>>
>> If in doubt, say "N".
>>
>> +config DRM_I915_TRACK_WAKEREF
>> + depends on STACKDEPOT
>> + depends on STACKTRACE
>> + bool
>> +
>> config DRM_I915_DEBUG_RUNTIME_PM
>> bool "Enable extra state checking for runtime PM"
>> depends on DRM_I915
>> default n
>> select STACKDEPOT
>> + select STACKTRACE
>> + select DRM_I915_TRACK_WAKEREF
>> help
>> Choose this option to turn on extra state checking for the
>> runtime PM functionality. This may introduce overhead during
>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> index b2f91a1f826858..42daff6d575a82 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -81,6 +81,10 @@ i915-$(CONFIG_DEBUG_FS) += \
>> i915_debugfs_params.o \
>> display/intel_display_debugfs.o \
>> display/intel_pipe_crc.o
>> +
>> +i915-$(CONFIG_DRM_I915_TRACK_WAKEREF) += \
>> + intel_wakeref_tracker.o
>> +
>> i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>>
>> # "Graphics Technology" (aka we talk to the gpu)
>> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
>> index 129746713d072f..72887e2bb03c21 100644
>> --- a/drivers/gpu/drm/i915/intel_runtime_pm.c
>> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
>> @@ -52,182 +52,37 @@
>>
>> #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
>>
>> -#include <linux/sort.h>
>> -
>> -#define STACKDEPTH 8
>> -
>> -static noinline depot_stack_handle_t __save_depot_stack(void)
>> -{
>> - unsigned long entries[STACKDEPTH];
>> - unsigned int n;
>> -
>> - n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
>> - return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
>> -}
>> -
>> static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
>> {
>> - spin_lock_init(&rpm->debug.lock);
>> - stack_depot_init();
>> + intel_wakeref_tracker_init(&rpm->debug);
>> }
>>
>> -static noinline depot_stack_handle_t
>> +static intel_wakeref_t
>> track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
>> {
>> - depot_stack_handle_t stack, *stacks;
>> - unsigned long flags;
>> -
>> - if (rpm->no_wakeref_tracking)
>> - return -1;
>> -
>> - stack = __save_depot_stack();
>> - if (!stack)
>> + if (!rpm->available)
>> return -1;
>>
>> - spin_lock_irqsave(&rpm->debug.lock, flags);
>> -
>> - if (!rpm->debug.count)
>> - rpm->debug.last_acquire = stack;
>> -
>> - stacks = krealloc(rpm->debug.owners,
>> - (rpm->debug.count + 1) * sizeof(*stacks),
>> - GFP_NOWAIT | __GFP_NOWARN);
>> - if (stacks) {
>> - stacks[rpm->debug.count++] = stack;
>> - rpm->debug.owners = stacks;
>> - } else {
>> - stack = -1;
>> - }
>> -
>> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
>> -
>> - return stack;
>> + return intel_wakeref_tracker_add(&rpm->debug);
>> }
>>
>> static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
>> - depot_stack_handle_t stack)
>> + intel_wakeref_t wakeref)
>> {
>> - struct drm_i915_private *i915 = container_of(rpm,
>> - struct drm_i915_private,
>> - runtime_pm);
>> - unsigned long flags, n;
>> - bool found = false;
>> -
>> - if (unlikely(stack == -1))
>> - return;
>> -
>> - spin_lock_irqsave(&rpm->debug.lock, flags);
>> - for (n = rpm->debug.count; n--; ) {
>> - if (rpm->debug.owners[n] == stack) {
>> - memmove(rpm->debug.owners + n,
>> - rpm->debug.owners + n + 1,
>> - (--rpm->debug.count - n) * sizeof(stack));
>> - found = true;
>> - break;
>> - }
>> - }
>> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
>> -
>> - if (drm_WARN(&i915->drm, !found,
>> - "Unmatched wakeref (tracking %lu), count %u\n",
>> - rpm->debug.count, atomic_read(&rpm->wakeref_count))) {
>> - char *buf;
>> -
>> - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
>> - if (!buf)
>> - return;
>> -
>> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
>> - DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
>> -
>> - stack = READ_ONCE(rpm->debug.last_release);
>> - if (stack) {
>> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
>> - DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
>> - }
>> -
>> - kfree(buf);
>> - }
>> + intel_wakeref_tracker_remove(&rpm->debug, wakeref);
>> }
>>
>> -static int cmphandle(const void *_a, const void *_b)
>> +static void untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
>> {
>> - const depot_stack_handle_t * const a = _a, * const b = _b;
>> + struct drm_printer p = drm_debug_printer("i915");
>>
>> - if (*a < *b)
>> - return -1;
>> - else if (*a > *b)
>> - return 1;
>> - else
>> - return 0;
>> -}
>> -
>> -static void
>> -__print_intel_runtime_pm_wakeref(struct drm_printer *p,
>> - const struct intel_runtime_pm_debug *dbg)
>> -{
>> - unsigned long i;
>> - char *buf;
>> -
>> - buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
>> - if (!buf)
>> - return;
>> -
>> - if (dbg->last_acquire) {
>> - stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
>> - drm_printf(p, "Wakeref last acquired:\n%s", buf);
>> - }
>> -
>> - if (dbg->last_release) {
>> - stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
>> - drm_printf(p, "Wakeref last released:\n%s", buf);
>> - }
>> -
>> - drm_printf(p, "Wakeref count: %lu\n", dbg->count);
>> -
>> - sort(dbg->owners, dbg->count, sizeof(*dbg->owners), cmphandle, NULL);
>> -
>> - for (i = 0; i < dbg->count; i++) {
>> - depot_stack_handle_t stack = dbg->owners[i];
>> - unsigned long rep;
>> -
>> - rep = 1;
>> - while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
>> - rep++, i++;
>> - stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
>> - drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
>> - }
>> -
>> - kfree(buf);
>> -}
>> -
>> -static noinline void
>> -__untrack_all_wakerefs(struct intel_runtime_pm_debug *debug,
>> - struct intel_runtime_pm_debug *saved)
>> -{
>> - *saved = *debug;
>> -
>> - debug->owners = NULL;
>> - debug->count = 0;
>> - debug->last_release = __save_depot_stack();
>> -}
>> -
>> -static void
>> -dump_and_free_wakeref_tracking(struct intel_runtime_pm_debug *debug)
>> -{
>> - if (debug->count) {
>> - struct drm_printer p = drm_debug_printer("i915");
>> -
>> - __print_intel_runtime_pm_wakeref(&p, debug);
>> - }
>> -
>> - kfree(debug->owners);
>> + intel_wakeref_tracker_reset(&rpm->debug, &p);
>> }
>>
>> static noinline void
>> __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
>> {
>> - struct intel_runtime_pm_debug dbg = {};
>> + struct intel_wakeref_tracker saved;
>> unsigned long flags;
>>
>> if (!atomic_dec_and_lock_irqsave(&rpm->wakeref_count,
>> @@ -235,60 +90,21 @@ __intel_wakeref_dec_and_check_tracking(struct intel_runtime_pm *rpm)
>> flags))
>> return;
>>
>> - __untrack_all_wakerefs(&rpm->debug, &dbg);
>> + saved = __intel_wakeref_tracker_reset(&rpm->debug);
>> spin_unlock_irqrestore(&rpm->debug.lock, flags);
>>
>> - dump_and_free_wakeref_tracking(&dbg);
>> -}
>> -
>> -static noinline void
>> -untrack_all_intel_runtime_pm_wakerefs(struct intel_runtime_pm *rpm)
>> -{
>> - struct intel_runtime_pm_debug dbg = {};
>> - unsigned long flags;
>> -
>> - spin_lock_irqsave(&rpm->debug.lock, flags);
>> - __untrack_all_wakerefs(&rpm->debug, &dbg);
>> - spin_unlock_irqrestore(&rpm->debug.lock, flags);
>> + if (saved.count) {
>> + struct drm_printer p = drm_debug_printer("i915");
>>
>> - dump_and_free_wakeref_tracking(&dbg);
>> + __intel_wakeref_tracker_show(&saved, &p);
>> + intel_wakeref_tracker_fini(&saved);
>> + }
>> }
>>
>> void print_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
>> struct drm_printer *p)
>> {
>> - struct intel_runtime_pm_debug dbg = {};
>> -
>> - do {
>> - unsigned long alloc = dbg.count;
>> - depot_stack_handle_t *s;
>> -
>> - spin_lock_irq(&rpm->debug.lock);
>> - dbg.count = rpm->debug.count;
>> - if (dbg.count <= alloc) {
>> - memcpy(dbg.owners,
>> - rpm->debug.owners,
>> - dbg.count * sizeof(*s));
>> - }
>> - dbg.last_acquire = rpm->debug.last_acquire;
>> - dbg.last_release = rpm->debug.last_release;
>> - spin_unlock_irq(&rpm->debug.lock);
>> - if (dbg.count <= alloc)
>> - break;
>> -
>> - s = krealloc(dbg.owners,
>> - dbg.count * sizeof(*s),
>> - GFP_NOWAIT | __GFP_NOWARN);
>> - if (!s)
>> - goto out;
>> -
>> - dbg.owners = s;
>> - } while (1);
>> -
>> - __print_intel_runtime_pm_wakeref(p, &dbg);
>> -
>> -out:
>> - kfree(dbg.owners);
>> + intel_wakeref_tracker_show(&rpm->debug, p);
>> }
>>
>> #else
>> @@ -297,14 +113,14 @@ static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
>> {
>> }
>>
>> -static depot_stack_handle_t
>> +static intel_wakeref_t
>> track_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
>> {
>> return -1;
>> }
>>
>> static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
>> - intel_wakeref_t wref)
>> + intel_wakeref_t wakeref)
>> {
>> }
>>
>> @@ -349,9 +165,8 @@ intel_runtime_pm_release(struct intel_runtime_pm *rpm, int wakelock)
>> static intel_wakeref_t __intel_runtime_pm_get(struct intel_runtime_pm *rpm,
>> bool wakelock)
>> {
>> - struct drm_i915_private *i915 = container_of(rpm,
>> - struct drm_i915_private,
>> - runtime_pm);
>> + struct drm_i915_private *i915 =
>> + container_of(rpm, struct drm_i915_private, runtime_pm);
>> int ret;
>>
>> ret = pm_runtime_get_sync(rpm->kdev);
>> @@ -556,9 +371,8 @@ void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref)
>> */
>> void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
>> {
>> - struct drm_i915_private *i915 = container_of(rpm,
>> - struct drm_i915_private,
>> - runtime_pm);
>> + struct drm_i915_private *i915 =
>> + container_of(rpm, struct drm_i915_private, runtime_pm);
>> struct device *kdev = rpm->kdev;
>>
>> /*
>> @@ -611,9 +425,8 @@ void intel_runtime_pm_enable(struct intel_runtime_pm *rpm)
>>
>> void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
>> {
>> - struct drm_i915_private *i915 = container_of(rpm,
>> - struct drm_i915_private,
>> - runtime_pm);
>> + struct drm_i915_private *i915 =
>> + container_of(rpm, struct drm_i915_private, runtime_pm);
>> struct device *kdev = rpm->kdev;
>>
>> /* Transfer rpm ownership back to core */
>> @@ -628,9 +441,8 @@ void intel_runtime_pm_disable(struct intel_runtime_pm *rpm)
>>
>> void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
>> {
>> - struct drm_i915_private *i915 = container_of(rpm,
>> - struct drm_i915_private,
>> - runtime_pm);
>> + struct drm_i915_private *i915 =
>> + container_of(rpm, struct drm_i915_private, runtime_pm);
>> int count = atomic_read(&rpm->wakeref_count);
>>
>> intel_wakeref_auto_fini(&rpm->userfault_wakeref);
>> @@ -646,7 +458,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
>> void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
>> {
>> struct drm_i915_private *i915 =
>> - container_of(rpm, struct drm_i915_private, runtime_pm);
>> + container_of(rpm, struct drm_i915_private, runtime_pm);
>
> Lots of unrelated changes above that should be separate patches.


OK


>
>> struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>> struct device *kdev = &pdev->dev;
>>
>> diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
>> index e592e8d6499a1f..a8dc2baf79844f 100644
>> --- a/drivers/gpu/drm/i915/intel_runtime_pm.h
>> +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
>> @@ -83,15 +83,7 @@ struct intel_runtime_pm {
>> * paired rpm_put) we can remove corresponding pairs of and keep
>> * the array trimmed to active wakerefs.
>> */
>> - struct intel_runtime_pm_debug {
>> - spinlock_t lock;
>> -
>> - depot_stack_handle_t last_acquire;
>> - depot_stack_handle_t last_release;
>> -
>> - depot_stack_handle_t *owners;
>> - unsigned long count;
>> - } debug;
>> + struct intel_wakeref_tracker debug;
>
> There's a lot going on in the patch all around. Adding the struct to a
> separate file could maybe be an individual patch to simplify the actual
> changes.

Hmm, the patch abstracts out wakeref_tracker from rpm to separate
'library'. In this context the change above looks quite natural and
atomic for me.

>
> This doesn't include the file that defines struct intel_wakeref_tracker;
> it's included via intel_wakeref.h. But only if
>
>> #endif
>> };
>>
>> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
>> index 71b8a63f6f104d..20720fbcc28d46 100644
>> --- a/drivers/gpu/drm/i915/intel_wakeref.h
>> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
>> @@ -17,7 +17,9 @@
>> #include <linux/timer.h>
>> #include <linux/workqueue.h>
>>
>> -#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
>> +#include "intel_wakeref_tracker.h"
>> +
>> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_WAKEREF)
>> #define INTEL_WAKEREF_BUG_ON(expr) BUG_ON(expr)
>> #else
>> #define INTEL_WAKEREF_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
>> @@ -26,8 +28,6 @@
>> struct intel_runtime_pm;
>> struct intel_wakeref;
>>
>> -typedef depot_stack_handle_t intel_wakeref_t;
>> -
>> struct intel_wakeref_ops {
>> int (*get)(struct intel_wakeref *wf);
>> int (*put)(struct intel_wakeref *wf);
>> diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.c b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
>> new file mode 100644
>> index 00000000000000..a0bcef13a1085a
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.c
>> @@ -0,0 +1,234 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2021 Intel Corporation
>> + */
>> +
>> +#include <linux/slab.h>
>> +#include <linux/stackdepot.h>
>> +#include <linux/stacktrace.h>
>> +#include <linux/sort.h>
>> +
>> +#include <drm/drm_print.h>
>> +
>> +#include "intel_wakeref.h"
>
> This should really include the corresponding .h
> i.e. intel_wakeref_tracker.h too. Now it gets included via
> intel_wakeref.h but I'm not sure why.

Apparently some leftover, will fix it.

Regards
Andrzej


>
>> +
>> +#define STACKDEPTH 8
>> +
>> +static noinline depot_stack_handle_t __save_depot_stack(void)
>> +{
>> + unsigned long entries[STACKDEPTH];
>> + unsigned int n;
>> +
>> + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
>> + return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
>> +}
>> +
>> +static void __print_depot_stack(depot_stack_handle_t stack,
>> + char *buf, int sz, int indent)
>> +{
>> + unsigned long *entries;
>> + unsigned int nr_entries;
>> +
>> + nr_entries = stack_depot_fetch(stack, &entries);
>> + stack_trace_snprint(buf, sz, entries, nr_entries, indent);
>> +}
>> +
>> +static int cmphandle(const void *_a, const void *_b)
>> +{
>> + const depot_stack_handle_t * const a = _a, * const b = _b;
>> +
>> + if (*a < *b)
>> + return -1;
>> + else if (*a > *b)
>> + return 1;
>> + else
>> + return 0;
>> +}
>> +
>> +void
>> +__intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
>> + struct drm_printer *p)
>> +{
>> + unsigned long i;
>> + char *buf;
>> +
>> + buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
>> + if (!buf)
>> + return;
>> +
>> + if (w->last_acquire) {
>> + __print_depot_stack(w->last_acquire, buf, PAGE_SIZE, 2);
>> + drm_printf(p, "Wakeref last acquired:\n%s", buf);
>> + }
>> +
>> + if (w->last_release) {
>> + __print_depot_stack(w->last_release, buf, PAGE_SIZE, 2);
>> + drm_printf(p, "Wakeref last released:\n%s", buf);
>> + }
>> +
>> + drm_printf(p, "Wakeref count: %lu\n", w->count);
>> +
>> + sort(w->owners, w->count, sizeof(*w->owners), cmphandle, NULL);
>> +
>> + for (i = 0; i < w->count; i++) {
>> + depot_stack_handle_t stack = w->owners[i];
>> + unsigned long rep;
>> +
>> + rep = 1;
>> + while (i + 1 < w->count && w->owners[i + 1] == stack)
>> + rep++, i++;
>> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
>> + drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
>> + }
>> +
>> + kfree(buf);
>> +}
>> +
>> +void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
>> + struct drm_printer *p)
>> +{
>> + struct intel_wakeref_tracker tmp = {};
>> +
>> + do {
>> + unsigned long alloc = tmp.count;
>> + depot_stack_handle_t *s;
>> +
>> + spin_lock_irq(&w->lock);
>> + tmp.count = w->count;
>> + if (tmp.count <= alloc)
>> + memcpy(tmp.owners, w->owners, tmp.count * sizeof(*s));
>> + tmp.last_acquire = w->last_acquire;
>> + tmp.last_release = w->last_release;
>> + spin_unlock_irq(&w->lock);
>> + if (tmp.count <= alloc)
>> + break;
>> +
>> + s = krealloc(tmp.owners,
>> + tmp.count * sizeof(*s),
>> + GFP_NOWAIT | __GFP_NOWARN);
>> + if (!s)
>> + goto out;
>> +
>> + tmp.owners = s;
>> + } while (1);
>> +
>> + __intel_wakeref_tracker_show(&tmp, p);
>> +
>> +out:
>> + intel_wakeref_tracker_fini(&tmp);
>> +}
>> +
>> +intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
>> +{
>> + depot_stack_handle_t stack, *stacks;
>> + unsigned long flags;
>> +
>> + stack = __save_depot_stack();
>> + if (!stack)
>> + return -1;
>> +
>> + spin_lock_irqsave(&w->lock, flags);
>> +
>> + if (!w->count)
>> + w->last_acquire = stack;
>> +
>> + stacks = krealloc(w->owners,
>> + (w->count + 1) * sizeof(*stacks),
>> + GFP_NOWAIT | __GFP_NOWARN);
>> + if (stacks) {
>> + stacks[w->count++] = stack;
>> + w->owners = stacks;
>> + } else {
>> + stack = -1;
>> + }
>> +
>> + spin_unlock_irqrestore(&w->lock, flags);
>> +
>> + return stack;
>> +}
>> +
>> +void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
>> + intel_wakeref_t stack)
>> +{
>> + unsigned long flags, n;
>> + bool found = false;
>> +
>> + if (unlikely(stack == -1))
>> + return;
>> +
>> + spin_lock_irqsave(&w->lock, flags);
>> + for (n = w->count; n--; ) {
>> + if (w->owners[n] == stack) {
>> + memmove(w->owners + n,
>> + w->owners + n + 1,
>> + (--w->count - n) * sizeof(stack));
>> + found = true;
>> + break;
>> + }
>> + }
>> + spin_unlock_irqrestore(&w->lock, flags);
>> +
>> + if (WARN(!found,
>> + "Unmatched wakeref %x, tracking %lu\n",
>> + stack, w->count)) {
>> + char *buf;
>> +
>> + buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN);
>> + if (!buf)
>> + return;
>> +
>> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
>> + pr_err("wakeref %x from\n%s", stack, buf);
>> +
>> + stack = READ_ONCE(w->last_release);
>> + if (stack && !w->count) {
>> + __print_depot_stack(stack, buf, PAGE_SIZE, 2);
>> + pr_err("wakeref last released at\n%s", buf);
>> + }
>> +
>> + kfree(buf);
>> + }
>> +}
>> +
>> +struct intel_wakeref_tracker
>> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
>> +{
>> + struct intel_wakeref_tracker saved;
>> +
>> + lockdep_assert_held(&w->lock);
>> +
>> + saved = *w;
>> +
>> + w->owners = NULL;
>> + w->count = 0;
>> + w->last_release = __save_depot_stack();
>> +
>> + return saved;
>> +}
>> +
>> +void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
>> + struct drm_printer *p)
>> +{
>> + struct intel_wakeref_tracker tmp;
>> +
>> + spin_lock_irq(&w->lock);
>> + tmp = __intel_wakeref_tracker_reset(w);
>> + spin_unlock_irq(&w->lock);
>> +
>> + if (tmp.count)
>> + __intel_wakeref_tracker_show(&tmp, p);
>> +
>> + intel_wakeref_tracker_fini(&tmp);
>> +}
>> +
>> +void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w)
>> +{
>> + memset(w, 0, sizeof(*w));
>> + spin_lock_init(&w->lock);
>> + stack_depot_init();
>> +}
>> +
>> +void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w)
>> +{
>> + kfree(w->owners);
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_wakeref_tracker.h b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
>> new file mode 100644
>> index 00000000000000..61df68e28c0fbf
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_wakeref_tracker.h
>> @@ -0,0 +1,76 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright © 2019 Intel Corporation
>> + */
>> +
>> +#ifndef INTEL_WAKEREF_TRACKER_H
>> +#define INTEL_WAKEREF_TRACKER_H
>> +
>> +#include <linux/kconfig.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/stackdepot.h>
>> +
>> +typedef depot_stack_handle_t intel_wakeref_t;
>> +
>> +struct drm_printer;
>> +
>> +struct intel_wakeref_tracker {
>> + spinlock_t lock;
>> +
>> + depot_stack_handle_t last_acquire;
>> + depot_stack_handle_t last_release;
>> +
>> + depot_stack_handle_t *owners;
>> + unsigned long count;
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_DRM_I915_TRACK_WAKEREF)
>> +
>> +void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w);
>> +void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w);
>> +
>> +intel_wakeref_t intel_wakeref_tracker_add(struct intel_wakeref_tracker *w);
>> +void intel_wakeref_tracker_remove(struct intel_wakeref_tracker *w,
>> + intel_wakeref_t handle);
>> +
>> +struct intel_wakeref_tracker
>> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w);
>> +void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
>> + struct drm_printer *p);
>> +
>> +void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w,
>> + struct drm_printer *p);
>> +void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w,
>> + struct drm_printer *p);
>> +
>> +#else
>> +
>> +static inline void intel_wakeref_tracker_init(struct intel_wakeref_tracker *w) {}
>> +static inline void intel_wakeref_tracker_fini(struct intel_wakeref_tracker *w) {}
>> +
>> +static inline intel_wakeref_t
>> +intel_wakeref_tracker_add(struct intel_wakeref_tracker *w)
>> +{
>> + return -1;
>> +}
>> +
>> +static inline void
>> +intel_wakeref_untrack_remove(struct intel_wakeref_tracker *w, intel_wakeref_t handle) {}
>> +
>> +static inline struct intel_wakeref_tracker
>> +__intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w)
>> +{
>> + return (struct intel_wakeref_tracker){};
>> +}
>> +
>> +static inline void intel_wakeref_tracker_reset(struct intel_wakeref_tracker *w,
>> + struct drm_printer *p)
>> +{
>> +}
>> +
>> +static inline void __intel_wakeref_tracker_show(const struct intel_wakeref_tracker *w, struct drm_printer *p) {}
>> +static inline void intel_wakeref_tracker_show(struct intel_wakeref_tracker *w, struct drm_printer *p) {}
>> +
>> +#endif
>> +
>> +#endif /* INTEL_WAKEREF_TRACKER_H */
>