2017-09-05 08:10:46

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 0/6] Miscellaneous improvements to Host1x and TegraDRM

New in v2:
- Changes in syncpoint protection and u64_to_user_ptr patches.
See the patches for notes.
- Added patch to support more opcodes in the debug dump
disassembly.
- Added patch to fix an incorrect comment.

Thanks,
Mikko

Patch v1 notes:

Hi all,

here are some new features and improvements.

Patch 1 enables syncpoint protection which prevents channels from
touching syncpoints not belonging to them on Tegra186.

Patch 2 enables the gather filter which prevents userspace command
buffers from using CDMA commands usually reserved for the kernel.
A test is available at git://github.com/cyndis/host1x_test, branch
gather-filter.

Patch 3 greatly improves formatting of debug dumps spewed by host1x
in case of job timeouts. They are now actually readable by humans
without use of additional scripts.

Patch 4 is a simple aesthetical fix to the TegraDRM submit path.

Everything was tested on TX1 and TX2 and should be applied on the
previously posted Tegra186 support series.

Cheers,
Mikko

Mikko Perttunen (6):
gpu: host1x: Enable Tegra186 syncpoint protection
gpu: host1x: Enable gather filter
gpu: host1x: Improve debug disassembly formatting
gpu: host1x: Disassemble more instructions
gpu: host1x: Fix incorrect comment for channel_request
drm/tegra: Use u64_to_user_ptr helper

drivers/gpu/drm/tegra/drm.c | 18 ++---
drivers/gpu/host1x/channel.c | 3 +-
drivers/gpu/host1x/debug.c | 14 +++-
drivers/gpu/host1x/debug.h | 14 ++--
drivers/gpu/host1x/dev.h | 15 +++++
drivers/gpu/host1x/hw/channel_hw.c | 25 +++++++
drivers/gpu/host1x/hw/debug_hw.c | 101 ++++++++++++++++++++++------
drivers/gpu/host1x/hw/debug_hw_1x01.c | 11 +--
drivers/gpu/host1x/hw/debug_hw_1x06.c | 12 ++--
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++
drivers/gpu/host1x/hw/syncpt_hw.c | 46 +++++++++++++
drivers/gpu/host1x/syncpt.c | 8 +++
13 files changed, 246 insertions(+), 45 deletions(-)

--
2.14.1


2017-09-05 08:10:43

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 6/6] drm/tegra: Use u64_to_user_ptr helper

Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/drm/tegra/drm.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e3331a2bc082..72d5c0021540 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -388,18 +388,21 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_cmdbufs = args->num_cmdbufs;
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
- struct drm_tegra_cmdbuf __user *cmdbufs =
- (void __user *)(uintptr_t)args->cmdbufs;
- struct drm_tegra_reloc __user *relocs =
- (void __user *)(uintptr_t)args->relocs;
- struct drm_tegra_waitchk __user *waitchks =
- (void __user *)(uintptr_t)args->waitchks;
+ struct drm_tegra_cmdbuf __user *cmdbufs;
+ struct drm_tegra_reloc __user *relocs;
+ struct drm_tegra_waitchk __user *waitchks;
+ struct drm_tegra_syncpt __user *user_syncpt;
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct host1x_syncpt *sp;
struct host1x_job *job;
int err;

+ cmdbufs = u64_to_user_ptr(args->cmdbufs);
+ relocs = u64_to_user_ptr(args->relocs);
+ waitchks = u64_to_user_ptr(args->waitchks);
+ user_syncpt = u64_to_user_ptr(args->syncpts);
+
/* We don't yet support other than one syncpt_incr struct per submit */
if (args->num_syncpts != 1)
return -EINVAL;
@@ -520,8 +523,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}

- if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
- sizeof(syncpt))) {
+ if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) {
err = -EFAULT;
goto fail;
}
--
2.14.1

2017-09-05 08:12:40

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 2/6] gpu: host1x: Enable gather filter

The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen <[email protected]>
Reviewed-by: Dmitry Osipenko <[email protected]>
---
drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
}

+static void enable_gather_filter(struct host1x *host,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ u32 val;
+
+ if (!host->hv_regs)
+ return;
+
+ val = host1x_hypervisor_readl(
+ host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+ val |= BIT(ch->id % 32);
+ host1x_hypervisor_writel(
+ host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+ host1x_ch_writel(ch,
+ HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+ HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
unsigned int index)
{
ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+ enable_gather_filter(dev, ch);
return 0;
}

diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9..2e8b635aa660 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
}
#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+ return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+ host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+ host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)

#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c..abbbc2641ce6 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
}
#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+ return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+ host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+ host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)

#endif
--
2.14.1

2017-09-05 08:12:37

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 5/6] gpu: host1x: Fix incorrect comment for channel_request

This function actually doesn't sleep in the version that was merged.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/channel.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index db9b91d1384c..2fb93c27c1d9 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -128,8 +128,7 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
* host1x_channel_request() - Allocate a channel
* @device: Host1x unit this channel will be used to send commands to
*
- * Allocates a new host1x channel for @device. If there are no free channels,
- * this will sleep until one becomes available. May return NULL if CDMA
+ * Allocates a new host1x channel for @device. May return NULL if CDMA
* initialization fails.
*/
struct host1x_channel *host1x_channel_request(struct device *dev)
--
2.14.1

2017-09-05 08:12:35

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 4/6] gpu: host1x: Disassemble more instructions

The disassembler for debug dumps was missing some newer host1x opcodes.
Add disassembly support for these.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/hw/debug_hw.c | 57 ++++++++++++++++++++++++++++++++---
drivers/gpu/host1x/hw/debug_hw_1x01.c | 3 +-
drivers/gpu/host1x/hw/debug_hw_1x06.c | 3 +-
3 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 1e67667e308c..de2a0ba7a32d 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -30,6 +30,13 @@ enum {
HOST1X_OPCODE_IMM = 0x04,
HOST1X_OPCODE_RESTART = 0x05,
HOST1X_OPCODE_GATHER = 0x06,
+ HOST1X_OPCODE_SETSTRMID = 0x07,
+ HOST1X_OPCODE_SETAPPID = 0x08,
+ HOST1X_OPCODE_SETPYLD = 0x09,
+ HOST1X_OPCODE_INCR_W = 0x0a,
+ HOST1X_OPCODE_NONINCR_W = 0x0b,
+ HOST1X_OPCODE_GATHER_W = 0x0c,
+ HOST1X_OPCODE_RESTART_W = 0x0d,
HOST1X_OPCODE_EXTEND = 0x0e,
};

@@ -38,11 +45,16 @@ enum {
HOST1X_OPCODE_EXTEND_RELEASE_MLOCK = 0x01,
};

-static unsigned int show_channel_command(struct output *o, u32 val)
+#define INVALID_PAYLOAD 0xffffffff
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+ u32 *payload)
{
- unsigned int mask, subop, num;
+ unsigned int mask, subop, num, opcode;
+
+ opcode = val >> 28;

- switch (val >> 28) {
+ switch (opcode) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
@@ -97,6 +109,42 @@ static unsigned int show_channel_command(struct output *o, u32 val)
val >> 14 & 0x1, val & 0x3fff);
return 1;

+ case HOST1X_OPCODE_SETSTRMID:
+ host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+ val & 0x3fffff);
+ return 0;
+
+ case HOST1X_OPCODE_SETAPPID:
+ host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+ return 0;
+
+ case HOST1X_OPCODE_SETPYLD:
+ *payload = val & 0xffff;
+ host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+ return 0;
+
+ case HOST1X_OPCODE_INCR_W:
+ case HOST1X_OPCODE_NONINCR_W:
+ host1x_debug_cont(o, "%s(offset=%06x, ",
+ opcode == HOST1X_OPCODE_INCR_W ?
+ "INCR_W" : "NONINCR_W",
+ val & 0x3fffff);
+ if (*payload == 0) {
+ host1x_debug_cont(o, "[])\n");
+ return 0;
+ } else if (*payload == INVALID_PAYLOAD) {
+ host1x_debug_cont(o, "unknown)\n");
+ return 0;
+ } else {
+ host1x_debug_cont(o, "[");
+ return *payload;
+ }
+
+ case HOST1X_OPCODE_GATHER_W:
+ host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+ val & 0x3fff);
+ return 2;
+
case HOST1X_OPCODE_EXTEND:
subop = val >> 24 & 0xf;
if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
@@ -122,6 +170,7 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
/* Map dmaget cursor to corresponding mem handle */
u32 offset = phys_addr - pin_addr;
unsigned int data_count = 0, i;
+ u32 payload = INVALID_PAYLOAD;

/*
* Sometimes we're given different hardware address to the same
@@ -139,7 +188,7 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,

if (!data_count) {
host1x_debug_output(o, "%08x: %08x: ", addr, val);
- data_count = show_channel_command(o, val);
+ data_count = show_channel_command(o, val, &payload);
} else {
host1x_debug_cont(o, "%08x%s", val,
data_count > 1 ? ", " : "])\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 09e1aa7bb5dd..7d1401c6c193 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -78,6 +78,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
struct output *o)
{
u32 val, rd_ptr, wr_ptr, start, end;
+ u32 payload = INVALID_PAYLOAD;
unsigned int data_count = 0;

host1x_debug_output(o, "%u: fifo:\n", ch->id);
@@ -112,7 +113,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,

if (!data_count) {
host1x_debug_output(o, "%08x: ", val);
- data_count = show_channel_command(o, val);
+ data_count = show_channel_command(o, val, &payload);
} else {
host1x_debug_cont(o, "%08x%s", val,
data_count > 1 ? ", " : "])\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index bd89da5dc64c..b503c740c022 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -63,6 +63,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
struct output *o)
{
u32 val, rd_ptr, wr_ptr, start, end;
+ u32 payload = INVALID_PAYLOAD;
unsigned int data_count = 0;

host1x_debug_output(o, "%u: fifo:\n", ch->id);
@@ -107,7 +108,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
if (!data_count) {
host1x_debug_output(o, "%03x 0x%08x: ",
rd_ptr - start, val);
- data_count = show_channel_command(o, val);
+ data_count = show_channel_command(o, val, &payload);
} else {
host1x_debug_cont(o, "%08x%s", val,
data_count > 1 ? ", " : "])\n");
--
2.14.1

2017-09-05 08:12:29

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen <[email protected]>
---

Notes:
v2:
- Changed from set_protection(bool) to enable_protection
- Added some comments
- Added missing check for hv_regs being NULL in
enable_protection

drivers/gpu/host1x/dev.h | 15 +++++++++++++
drivers/gpu/host1x/hw/channel_hw.c | 3 +++
drivers/gpu/host1x/hw/syncpt_hw.c | 46 ++++++++++++++++++++++++++++++++++++++
drivers/gpu/host1x/syncpt.c | 8 +++++++
4 files changed, 72 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..7497cc5ead9e 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+ void (*assign_channel)(struct host1x_syncpt *syncpt,
+ struct host1x_channel *channel);
+ void (*enable_protection)(struct host1x *host);
};

struct host1x_intr_ops {
@@ -186,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
}

+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
+ struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+ return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
+{
+ return host->syncpt_op->enable_protection(host);
+}
+
static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
{
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)

syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);

+ /* assign syncpoint to channel */
+ host1x_hw_syncpt_assign_channel(host, sp, ch);
+
job->syncpt_end = syncval;

/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..dc7a44614fef 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
return 0;
}

+/**
+ * syncpt_assign_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ struct host1x *host = sp->host;
+
+ if (!host->hv_regs)
+ return;
+
+ host1x_sync_writel(host,
+ HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+ HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
+{
+#if HOST1X_HW >= 6
+ if (!host->hv_regs)
+ return;
+
+ host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+ HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+ .assign_channel = syncpt_assign_channel,
+ .enable_protection = syncpt_enable_protection,
};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..4c7a4c8b2ad2 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+ /*
+ * Unassign syncpt from channels for purposes of Tegra186
+ * syncpoint protection. This prevents any channel from
+ * accessing it until it is reassigned.
+ */
+ host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
}

for (i = 0; i < host->info->nb_bases; i++)
@@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
host->bases = bases;

host1x_syncpt_restore(host);
+ host1x_hw_syncpt_enable_protection(host);

/* Allocate sync point to use for clearing waits for expired fences */
host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
--
2.14.1

2017-09-05 08:12:27

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH v2 3/6] gpu: host1x: Improve debug disassembly formatting

The host1x driver prints out "disassembly" dumps of the command FIFO
and gather contents on submission timeouts. However, the output has
been quite difficult to read with unnecessary newlines and occasional
missing parentheses.

Fix these problems by using pr_cont to remove unnecessary newlines
and by fixing other small issues.

Signed-off-by: Mikko Perttunen <[email protected]>
Reviewed-by: Dmitry Osipenko <[email protected]>
Tested-by: Dmitry Osipenko <[email protected]>
---
This uses pr_cont, which there are currently talks of being replaced
with something better. I kept using it here for now until there is
some conclusion of what's the best way to replace it.

drivers/gpu/host1x/debug.c | 14 ++++++++++-
drivers/gpu/host1x/debug.h | 14 ++++++++---
drivers/gpu/host1x/hw/debug_hw.c | 46 ++++++++++++++++++++++-------------
drivers/gpu/host1x/hw/debug_hw_1x01.c | 8 +++---
drivers/gpu/host1x/hw/debug_hw_1x06.c | 9 ++++---
5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c..dc77ec452ffc 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);

- o->fn(o->ctx, o->buf, len);
+ o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+ va_end(args);
+
+ o->fn(o->ctx, o->buf, len, true);
}

static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..990cce47e737 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
struct host1x;

struct output {
- void (*fn)(void *ctx, const char *str, size_t len);
+ void (*fn)(void *ctx, const char *str, size_t len, bool cont);
void *ctx;
char buf[256];
};

-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+ bool cont)
{
seq_write((struct seq_file *)ctx, str, len);
}

-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+ bool cont)
{
- pr_info("%s", str);
+ if (cont)
+ pr_cont("%s", str);
+ else
+ pr_info("%s", str);
}

void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);

extern unsigned int host1x_debug_trace_cmdbuf;

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 770d92e62d69..1e67667e308c 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,48 +40,59 @@ enum {

static unsigned int show_channel_command(struct output *o, u32 val)
{
- unsigned int mask, subop;
+ unsigned int mask, subop, num;

switch (val >> 28) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
- host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+ host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
val >> 6 & 0x3ff,
val >> 16 & 0xfff, mask);
return hweight8(mask);
}

- host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+ host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
return 0;

case HOST1X_OPCODE_INCR:
- host1x_debug_output(o, "INCR(offset=%03x, [",
+ num = val & 0xffff;
+ host1x_debug_cont(o, "INCR(offset=%03x, [",
val >> 16 & 0xfff);
- return val & 0xffff;
+ if (!num)
+ host1x_debug_cont(o, "])\n");
+
+ return num;

case HOST1X_OPCODE_NONINCR:
- host1x_debug_output(o, "NONINCR(offset=%03x, [",
+ num = val & 0xffff;
+ host1x_debug_cont(o, "NONINCR(offset=%03x, [",
val >> 16 & 0xfff);
- return val & 0xffff;
+ if (!num)
+ host1x_debug_cont(o, "])\n");
+
+ return num;

case HOST1X_OPCODE_MASK:
mask = val & 0xffff;
- host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+ host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [",
val >> 16 & 0xfff, mask);
+ if (!mask)
+ host1x_debug_cont(o, "])\n");
+
return hweight16(mask);

case HOST1X_OPCODE_IMM:
- host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+ host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n",
val >> 16 & 0xfff, val & 0xffff);
return 0;

case HOST1X_OPCODE_RESTART:
- host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+ host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4);
return 0;

case HOST1X_OPCODE_GATHER:
- host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+ host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
val >> 16 & 0xfff, val >> 15 & 0x1,
val >> 14 & 0x1, val & 0x3fff);
return 1;
@@ -89,16 +100,17 @@ static unsigned int show_channel_command(struct output *o, u32 val)
case HOST1X_OPCODE_EXTEND:
subop = val >> 24 & 0xf;
if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
- host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+ host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n",
val & 0xff);
else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
- host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+ host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n",
val & 0xff);
else
- host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+ host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val);
return 0;

default:
+ host1x_debug_cont(o, "UNKNOWN\n");
return 0;
}
}
@@ -126,11 +138,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
u32 val = *(map_addr + offset / 4 + i);

if (!data_count) {
- host1x_debug_output(o, "%08x: %08x:", addr, val);
+ host1x_debug_output(o, "%08x: %08x: ", addr, val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}
}
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 8f243903cc7f..09e1aa7bb5dd 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -111,11 +111,11 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);

if (!data_count) {
- host1x_debug_output(o, "%08x:", val);
+ host1x_debug_output(o, "%08x: ", val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}

@@ -126,7 +126,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
} while (rd_ptr != wr_ptr);

if (data_count)
- host1x_debug_output(o, ", ...])\n");
+ host1x_debug_cont(o, ", ...])\n");
host1x_debug_output(o, "\n");

host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index 9cdee657fb46..bd89da5dc64c 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -105,11 +105,12 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
HOST1X_HV_CMDFIFO_PEEK_READ);

if (!data_count) {
- host1x_debug_output(o, "%08x:", val);
+ host1x_debug_output(o, "%03x 0x%08x: ",
+ rd_ptr - start, val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}

@@ -120,7 +121,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
} while (rd_ptr != wr_ptr);

if (data_count)
- host1x_debug_output(o, ", ...])\n");
+ host1x_debug_cont(o, ", ...])\n");
host1x_debug_output(o, "\n");

host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL);
--
2.14.1

2017-09-05 12:42:11

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v2 6/6] drm/tegra: Use u64_to_user_ptr helper

On 05.09.2017 11:10, Mikko Perttunen wrote:
> Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
> to user pointers instead of writing out the cast manually.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---

This patch doesn't apply to linux-next, you should probably rebase this series.

> drivers/gpu/drm/tegra/drm.c | 18 ++++++++++--------
> 1 file changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index e3331a2bc082..72d5c0021540 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -388,18 +388,21 @@ int tegra_drm_submit(struct tegra_drm_context *context,
> unsigned int num_cmdbufs = args->num_cmdbufs;
> unsigned int num_relocs = args->num_relocs;
> unsigned int num_waitchks = args->num_waitchks;
> - struct drm_tegra_cmdbuf __user *cmdbufs =
> - (void __user *)(uintptr_t)args->cmdbufs;
> - struct drm_tegra_reloc __user *relocs =
> - (void __user *)(uintptr_t)args->relocs;
> - struct drm_tegra_waitchk __user *waitchks =
> - (void __user *)(uintptr_t)args->waitchks;
> + struct drm_tegra_cmdbuf __user *cmdbufs;
> + struct drm_tegra_reloc __user *relocs;
> + struct drm_tegra_waitchk __user *waitchks;
> + struct drm_tegra_syncpt __user *user_syncpt;
> struct drm_tegra_syncpt syncpt;
> struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
> struct host1x_syncpt *sp;
> struct host1x_job *job;
> int err;
>
> + cmdbufs = u64_to_user_ptr(args->cmdbufs);
> + relocs = u64_to_user_ptr(args->relocs);
> + waitchks = u64_to_user_ptr(args->waitchks);

What about to prefix these variables with 'user_' for consistency?

> + user_syncpt = u64_to_user_ptr(args->syncpts);
> +
> /* We don't yet support other than one syncpt_incr struct per submit */
> if (args->num_syncpts != 1)
> return -EINVAL;
> @@ -520,8 +523,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
> }
> }
>
> - if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
> - sizeof(syncpt))) {
> + if (copy_from_user(&syncpt, user_syncpt, sizeof(syncpt))) {
> err = -EFAULT;
> goto fail;
> }
>


--
Dmitry

2017-09-05 13:02:25

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v2 4/6] gpu: host1x: Disassemble more instructions

On 05.09.2017 11:10, Mikko Perttunen wrote:
> The disassembler for debug dumps was missing some newer host1x opcodes.
> Add disassembly support for these.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/hw/debug_hw.c | 57 ++++++++++++++++++++++++++++++++---
> drivers/gpu/host1x/hw/debug_hw_1x01.c | 3 +-
> drivers/gpu/host1x/hw/debug_hw_1x06.c | 3 +-
> 3 files changed, 57 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
> index 1e67667e308c..de2a0ba7a32d 100644
> --- a/drivers/gpu/host1x/hw/debug_hw.c
> +++ b/drivers/gpu/host1x/hw/debug_hw.c
> @@ -30,6 +30,13 @@ enum {
> HOST1X_OPCODE_IMM = 0x04,
> HOST1X_OPCODE_RESTART = 0x05,
> HOST1X_OPCODE_GATHER = 0x06,
> + HOST1X_OPCODE_SETSTRMID = 0x07,
> + HOST1X_OPCODE_SETAPPID = 0x08,
> + HOST1X_OPCODE_SETPYLD = 0x09,
> + HOST1X_OPCODE_INCR_W = 0x0a,
> + HOST1X_OPCODE_NONINCR_W = 0x0b,
> + HOST1X_OPCODE_GATHER_W = 0x0c,
> + HOST1X_OPCODE_RESTART_W = 0x0d,
> HOST1X_OPCODE_EXTEND = 0x0e,
> };
>
> @@ -38,11 +45,16 @@ enum {
> HOST1X_OPCODE_EXTEND_RELEASE_MLOCK = 0x01,
> };
>
> -static unsigned int show_channel_command(struct output *o, u32 val)
> +#define INVALID_PAYLOAD 0xffffffff
> +
> +static unsigned int show_channel_command(struct output *o, u32 val,
> + u32 *payload)
> {
> - unsigned int mask, subop, num;
> + unsigned int mask, subop, num, opcode;
> +
> + opcode = val >> 28;
>
> - switch (val >> 28) {
> + switch (opcode) {
> case HOST1X_OPCODE_SETCLASS:
> mask = val & 0x3f;
> if (mask) {
> @@ -97,6 +109,42 @@ static unsigned int show_channel_command(struct output *o, u32 val)
> val >> 14 & 0x1, val & 0x3fff);
> return 1;
>

Opcodes below aren't relevant to older Tegra's, seems "#if HOST1X_HW >= 6"
should be added here.

> + case HOST1X_OPCODE_SETSTRMID:
> + host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
> + val & 0x3fffff);
> + return 0;
> +
> + case HOST1X_OPCODE_SETAPPID:
> + host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
> + return 0;
> +
> + case HOST1X_OPCODE_SETPYLD:
> + *payload = val & 0xffff;
> + host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
> + return 0;
> +
> + case HOST1X_OPCODE_INCR_W:
> + case HOST1X_OPCODE_NONINCR_W:
> + host1x_debug_cont(o, "%s(offset=%06x, ",
> + opcode == HOST1X_OPCODE_INCR_W ?
> + "INCR_W" : "NONINCR_W",
> + val & 0x3fffff);
> + if (*payload == 0) {
> + host1x_debug_cont(o, "[])\n");
> + return 0;
> + } else if (*payload == INVALID_PAYLOAD) {
> + host1x_debug_cont(o, "unknown)\n");
> + return 0;
> + } else {
> + host1x_debug_cont(o, "[");
> + return *payload;
> + }
> +
> + case HOST1X_OPCODE_GATHER_W:
> + host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
> + val & 0x3fff);
> + return 2;
> +
> case HOST1X_OPCODE_EXTEND:
> subop = val >> 24 & 0xf;
> if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
> @@ -122,6 +170,7 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
> /* Map dmaget cursor to corresponding mem handle */
> u32 offset = phys_addr - pin_addr;
> unsigned int data_count = 0, i;
> + u32 payload = INVALID_PAYLOAD;
>
> /*
> * Sometimes we're given different hardware address to the same
> @@ -139,7 +188,7 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
>
> if (!data_count) {
> host1x_debug_output(o, "%08x: %08x: ", addr, val);
> - data_count = show_channel_command(o, val);
> + data_count = show_channel_command(o, val, &payload);
> } else {
> host1x_debug_cont(o, "%08x%s", val,
> data_count > 1 ? ", " : "])\n");
> diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
> index 09e1aa7bb5dd..7d1401c6c193 100644
> --- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
> +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
> @@ -78,6 +78,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
> struct output *o)
> {
> u32 val, rd_ptr, wr_ptr, start, end;
> + u32 payload = INVALID_PAYLOAD;
> unsigned int data_count = 0;
>
> host1x_debug_output(o, "%u: fifo:\n", ch->id);
> @@ -112,7 +113,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
>
> if (!data_count) {
> host1x_debug_output(o, "%08x: ", val);
> - data_count = show_channel_command(o, val);
> + data_count = show_channel_command(o, val, &payload);
> } else {
> host1x_debug_cont(o, "%08x%s", val,
> data_count > 1 ? ", " : "])\n");
> diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
> index bd89da5dc64c..b503c740c022 100644
> --- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
> +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
> @@ -63,6 +63,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
> struct output *o)
> {
> u32 val, rd_ptr, wr_ptr, start, end;
> + u32 payload = INVALID_PAYLOAD;
> unsigned int data_count = 0;
>
> host1x_debug_output(o, "%u: fifo:\n", ch->id);
> @@ -107,7 +108,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
> if (!data_count) {
> host1x_debug_output(o, "%03x 0x%08x: ",
> rd_ptr - start, val);
> - data_count = show_channel_command(o, val);
> + data_count = show_channel_command(o, val, &payload);
> } else {
> host1x_debug_cont(o, "%08x%s", val,
> data_count > 1 ? ", " : "])\n");
>


--
Dmitry

2017-09-05 13:05:19

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v2 5/6] gpu: host1x: Fix incorrect comment for channel_request

On 05.09.2017 11:10, Mikko Perttunen wrote:
> This function actually doesn't sleep in the version that was merged.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/channel.c | 3 +--
> 1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
> index db9b91d1384c..2fb93c27c1d9 100644
> --- a/drivers/gpu/host1x/channel.c
> +++ b/drivers/gpu/host1x/channel.c
> @@ -128,8 +128,7 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
> * host1x_channel_request() - Allocate a channel
> * @device: Host1x unit this channel will be used to send commands to
> *
> - * Allocates a new host1x channel for @device. If there are no free channels,
> - * this will sleep until one becomes available. May return NULL if CDMA
> + * Allocates a new host1x channel for @device. May return NULL if CDMA
> * initialization fails.
> */
> struct host1x_channel *host1x_channel_request(struct device *dev)
>

Reviewed-by: Dmitry Osipenko <[email protected]>

--
Dmitry

2017-09-05 13:33:18

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

On 05.09.2017 11:10, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
>
> Notes:
> v2:
> - Changed from set_protection(bool) to enable_protection
> - Added some comments
> - Added missing check for hv_regs being NULL in
> enable_protection
>
> drivers/gpu/host1x/dev.h | 15 +++++++++++++
> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
> drivers/gpu/host1x/hw/syncpt_hw.c | 46 ++++++++++++++++++++++++++++++++++++++
> drivers/gpu/host1x/syncpt.c | 8 +++++++
> 4 files changed, 72 insertions(+)
>
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..7497cc5ead9e 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
> u32 (*load)(struct host1x_syncpt *syncpt);
> int (*cpu_incr)(struct host1x_syncpt *syncpt);
> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> + struct host1x_channel *channel);
> + void (*enable_protection)(struct host1x *host);
> };
>
> struct host1x_intr_ops {
> @@ -186,6 +189,18 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
> return host->syncpt_op->patch_wait(sp, patch_addr);
> }
>
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> + struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
> +{
> + return host->syncpt_op->enable_protection(host);
> +}
> +
> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
> void (*syncpt_thresh_work)(struct work_struct *))
> {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>
> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);

This function could be renamed to host1x_hw_assign_syncpt_to_channel() and then
comment to it won't be needed.

It is not very nice that channel would be re-assigned on each submit. Maybe that
assignment should be done by host1x_syncpt_request() ?

> +
> job->syncpt_end = syncval;
>
> /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..dc7a44614fef 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
> return 0;
> }
>
> +/**
> + * syncpt_assign_channel() - Assign syncpoint to channel
> + * @sp: syncpoint
> + * @ch: channel
> + *
> + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
> + * @ch, preventing other channels from incrementing the syncpoints. If @ch is
> + * NULL, unassigns the syncpoint.
> + *
> + * On older chips, do nothing.
> + */
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +/**
> + * syncpt_enable_protection() - Enable syncpoint protection
> + * @host: host1x instance
> + *
> + * On chips with the syncpoint protection feature (Tegra186+), enable this
> + * feature. On older chips, do nothing.
> + */
> +static void syncpt_enable_protection(struct host1x *host)
> +{
> +#if HOST1X_HW >= 6
> + if (!host->hv_regs)
> + return;
> +
> + host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
> + HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .restore = syncpt_restore,
> .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .load = syncpt_load,
> .cpu_incr = syncpt_cpu_incr,
> .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .enable_protection = syncpt_enable_protection,
> };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..4c7a4c8b2ad2 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + /*
> + * Unassign syncpt from channels for purposes of Tegra186
> + * syncpoint protection. This prevents any channel from
> + * accessing it until it is reassigned.
> + */
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }
>
> for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
> host->bases = bases;
>
> host1x_syncpt_restore(host);
> + host1x_hw_syncpt_enable_protection(host);
>
> /* Allocate sync point to use for clearing waits for expired fences */
> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>


--
Dmitry

2017-09-22 14:02:33

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

On 09/05/2017 04:33 PM, Dmitry Osipenko wrote:
> On 05.09.2017 11:10, Mikko Perttunen wrote:
>> ... >> diff --git a/drivers/gpu/host1x/hw/channel_hw.c
b/drivers/gpu/host1x/hw/channel_hw.c
>> index 8447a56c41ca..0161da331702 100644
>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>>
>> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>>
>> + /* assign syncpoint to channel */
>> + host1x_hw_syncpt_assign_channel(host, sp, ch);
>
> This function could be renamed to host1x_hw_assign_syncpt_to_channel() and then
> comment to it won't be needed.

Maybe host1x_hw_syncpt_assign_to_channel? I'd like to keep the current
noun_verb format. Though IMHO even the current name is pretty
descriptive in itself.

>
> It is not very nice that channel would be re-assigned on each submit. Maybe that
> assignment should be done by host1x_syncpt_request() ?

host1x_syncpt_request doesn't know about the channel so we'd have to
thread this information there and through each client driver in
drm/tegra/, so I decided not to do this at this time. I'm planning a new
channel allocation model which will change that side of the code anyway,
so I'd like to revisit this at that point. For our current channel
model, the current implementation doesn't have any functional downsides
anyway.

>
>> +
>> job->syncpt_end = syncval;
>>
>> /* add a setclass for modules that require it */
>> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
>> index 7b0270d60742..dc7a44614fef 100644
>> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
>> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
>> @@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>> return 0;
>> }
>>
>> +/**
>> + * syncpt_assign_channel() - Assign syncpoint to channel
>> + * @sp: syncpoint
>> + * @ch: channel
>> + *
>> + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
>> + * @ch, preventing other channels from incrementing the syncpoints. If @ch is
>> + * NULL, unassigns the syncpoint.
>> + *
>> + * On older chips, do nothing.
>> + */
>> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
>> + struct host1x_channel *ch)
>> +{
>> +#if HOST1X_HW >= 6
>> + struct host1x *host = sp->host;
>> +
>> + if (!host->hv_regs)
>> + return;
>> +
>> + host1x_sync_writel(host,
>> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
>> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
>> +#endif
>> +}
>> +
>> +/**
>> + * syncpt_enable_protection() - Enable syncpoint protection
>> + * @host: host1x instance
>> + *
>> + * On chips with the syncpoint protection feature (Tegra186+), enable this
>> + * feature. On older chips, do nothing.
>> + */
>> +static void syncpt_enable_protection(struct host1x *host)
>> +{
>> +#if HOST1X_HW >= 6
>> + if (!host->hv_regs)
>> + return;
>> +
>> + host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
>> + HOST1X_HV_SYNCPT_PROT_EN);
>> +#endif
>> +}
>> +
>> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>> .restore = syncpt_restore,
>> .restore_wait_base = syncpt_restore_wait_base,
>> @@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>> .load = syncpt_load,
>> .cpu_incr = syncpt_cpu_incr,
>> .patch_wait = syncpt_patch_wait,
>> + .assign_channel = syncpt_assign_channel,
>> + .enable_protection = syncpt_enable_protection,
>> };
>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
>> index 048ac9e344ce..4c7a4c8b2ad2 100644
>> --- a/drivers/gpu/host1x/syncpt.c
>> +++ b/drivers/gpu/host1x/syncpt.c
>> @@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
>> for (i = 0; i < host->info->nb_pts; i++) {
>> syncpt[i].id = i;
>> syncpt[i].host = host;
>> +
>> + /*
>> + * Unassign syncpt from channels for purposes of Tegra186
>> + * syncpoint protection. This prevents any channel from
>> + * accessing it until it is reassigned.
>> + */
>> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>> }
>>
>> for (i = 0; i < host->info->nb_bases; i++)
>> @@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
>> host->bases = bases;
>>
>> host1x_syncpt_restore(host);
>> + host1x_hw_syncpt_enable_protection(host);
>>
>> /* Allocate sync point to use for clearing waits for expired fences */
>> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>>
>
>

2017-09-22 23:18:01

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH v2 1/6] gpu: host1x: Enable Tegra186 syncpoint protection

On 22.09.2017 17:02, Mikko Perttunen wrote:
> On 09/05/2017 04:33 PM, Dmitry Osipenko wrote:
>> On 05.09.2017 11:10, Mikko Perttunen wrote:
>>> ... >> diff --git a/drivers/gpu/host1x/hw/channel_hw.c
> b/drivers/gpu/host1x/hw/channel_hw.c
>>> index 8447a56c41ca..0161da331702 100644
>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>>>         syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>>>   +    /* assign syncpoint to channel */
>>> +    host1x_hw_syncpt_assign_channel(host, sp, ch);
>>
>> This function could be renamed to host1x_hw_assign_syncpt_to_channel() and then
>> comment to it won't be needed.
>
> Maybe host1x_hw_syncpt_assign_to_channel? I'd like to keep the current noun_verb
> format. Though IMHO even the current name is pretty descriptive in itself.
>

That variant sounds good to me as well.

>>
>> It is not very nice that channel would be re-assigned on each submit. Maybe that
>> assignment should be done by host1x_syncpt_request() ?
>
> host1x_syncpt_request doesn't know about the channel so we'd have to thread this
> information there and through each client driver in drm/tegra/, so I decided not
> to do this at this time. I'm planning a new channel allocation model which will
> change that side of the code anyway, so I'd like to revisit this at that point.
> For our current channel model, the current implementation doesn't have any
> functional downsides anyway.
>

Another very minor downside is that it causes an extra dummy invocation on
pre-Tegra186. Of course that could be changed later in a follow-up patch, not a
big deal :)

>>
>>> +
>>>       job->syncpt_end = syncval;
>>>         /* add a setclass for modules that require it */
>>> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c
>>> b/drivers/gpu/host1x/hw/syncpt_hw.c
>>> index 7b0270d60742..dc7a44614fef 100644
>>> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
>>> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
>>> @@ -106,6 +106,50 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp,
>>> void *patch_addr)
>>>       return 0;
>>>   }
>>>   +/**
>>> + * syncpt_assign_channel() - Assign syncpoint to channel
>>> + * @sp: syncpoint
>>> + * @ch: channel
>>> + *
>>> + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
>>> + * @ch, preventing other channels from incrementing the syncpoints. If @ch is
>>> + * NULL, unassigns the syncpoint.
>>> + *
>>> + * On older chips, do nothing.
>>> + */
>>> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
>>> +                  struct host1x_channel *ch)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> +    struct host1x *host = sp->host;
>>> +
>>> +    if (!host->hv_regs)
>>> +        return;
>>> +
>>> +    host1x_sync_writel(host,
>>> +               HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
>>> +               HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
>>> +#endif
>>> +}
>>> +
>>> +/**
>>> + * syncpt_enable_protection() - Enable syncpoint protection
>>> + * @host: host1x instance
>>> + *
>>> + * On chips with the syncpoint protection feature (Tegra186+), enable this
>>> + * feature. On older chips, do nothing.
>>> + */
>>> +static void syncpt_enable_protection(struct host1x *host)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> +    if (!host->hv_regs)
>>> +        return;
>>> +
>>> +    host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
>>> +                 HOST1X_HV_SYNCPT_PROT_EN);
>>> +#endif
>>> +}
>>> +
>>>   static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>>>       .restore = syncpt_restore,
>>>       .restore_wait_base = syncpt_restore_wait_base,
>>> @@ -113,4 +157,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>>>       .load = syncpt_load,
>>>       .cpu_incr = syncpt_cpu_incr,
>>>       .patch_wait = syncpt_patch_wait,
>>> +    .assign_channel = syncpt_assign_channel,
>>> +    .enable_protection = syncpt_enable_protection,
>>>   };
>>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
>>> index 048ac9e344ce..4c7a4c8b2ad2 100644
>>> --- a/drivers/gpu/host1x/syncpt.c
>>> +++ b/drivers/gpu/host1x/syncpt.c
>>> @@ -398,6 +398,13 @@ int host1x_syncpt_init(struct host1x *host)
>>>       for (i = 0; i < host->info->nb_pts; i++) {
>>>           syncpt[i].id = i;
>>>           syncpt[i].host = host;
>>> +
>>> +        /*
>>> +         * Unassign syncpt from channels for purposes of Tegra186
>>> +         * syncpoint protection. This prevents any channel from
>>> +         * accessing it until it is reassigned.
>>> +         */
>>> +        host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>>>       }
>>>         for (i = 0; i < host->info->nb_bases; i++)
>>> @@ -408,6 +415,7 @@ int host1x_syncpt_init(struct host1x *host)
>>>       host->bases = bases;
>>>         host1x_syncpt_restore(host);
>>> +    host1x_hw_syncpt_enable_protection(host);
>>>         /* Allocate sync point to use for clearing waits for expired fences */
>>>       host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>>>
>>
>>


--
Dmitry