2017-08-18 16:16:40

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH 0/4] Miscellaneous improvements to Host1x and TegraDRM

Hi all,

here are some new features and improvements.

Patch 1 enables syncpoint protection which prevents channels from
touching syncpoints not belonging to them on Tegra186.

Patch 2 enables the gather filter which prevents userspace command
buffers from using CDMA commands usually reserved for the kernel.
A test is available at git://github.com/cyndis/host1x_test, branch
gather-filter.

Patch 3 greatly improves formatting of debug dumps spewed by host1x
in case of job timeouts. They are now actually readable by humans
without use of additional scripts.

Patch 4 is a simple aesthetical fix to the TegraDRM submit path.

Everything was tested on TX1 and TX2 and should be applied on the
previously posted Tegra186 support series.

Cheers,
Mikko

*** BLURB HERE ***

Mikko Perttunen (4):
gpu: host1x: Enable Tegra186 syncpoint protection
gpu: host1x: Enable gather filter
gpu: host1x: Improve debug disassembly formatting
drm/tegra: Use u64_to_user_ptr helper

drivers/gpu/drm/tegra/drm.c | 9 +++---
drivers/gpu/host1x/debug.c | 14 ++++++++-
drivers/gpu/host1x/debug.h | 14 ++++++---
drivers/gpu/host1x/dev.h | 16 ++++++++++
drivers/gpu/host1x/hw/channel_hw.c | 25 ++++++++++++++++
drivers/gpu/host1x/hw/debug_hw.c | 46 ++++++++++++++++++-----------
drivers/gpu/host1x/hw/debug_hw_1x01.c | 8 ++---
drivers/gpu/host1x/hw/debug_hw_1x06.c | 9 +++---
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++
drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++
drivers/gpu/host1x/syncpt.c | 3 ++
12 files changed, 159 insertions(+), 35 deletions(-)

--
2.14.1


2017-08-18 16:16:43

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
specific channels, preventing any other channels from incrementing
them.

Enable this feature where available and assign syncpoints to channels
when submitting a job. Syncpoints are currently never unassigned from
channels since that would require extra work and is unnecessary with
the current channel allocation model.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
drivers/gpu/host1x/hw/channel_hw.c | 3 +++
drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
drivers/gpu/host1x/syncpt.c | 3 +++
4 files changed, 48 insertions(+)

diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index def802c0a6bf..2432a30ff6e2 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
u32 (*load)(struct host1x_syncpt *syncpt);
int (*cpu_incr)(struct host1x_syncpt *syncpt);
int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+ void (*assign_channel)(struct host1x_syncpt *syncpt,
+ struct host1x_channel *channel);
+ void (*set_protection)(struct host1x *host, bool enabled);
};

struct host1x_intr_ops {
@@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
return host->syncpt_op->patch_wait(sp, patch_addr);
}

+static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
+ struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+ return host->syncpt_op->assign_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
+ bool enabled)
+{
+ return host->syncpt_op->set_protection(host, enabled);
+}
+
static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
void (*syncpt_thresh_work)(struct work_struct *))
{
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 8447a56c41ca..0161da331702 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)

syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);

+ /* assign syncpoint to channel */
+ host1x_hw_syncpt_assign_channel(host, sp, ch);
+
job->syncpt_end = syncval;

/* add a setclass for modules that require it */
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 7b0270d60742..5d117ab1699e 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
return 0;
}

+static void syncpt_assign_channel(struct host1x_syncpt *sp,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ struct host1x *host = sp->host;
+
+ if (!host->hv_regs)
+ return;
+
+ host1x_sync_writel(host,
+ HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+ HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+static void syncpt_set_protection(struct host1x *host, bool enabled)
+{
+#if HOST1X_HW >= 6
+ host1x_hypervisor_writel(host,
+ enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
+ HOST1X_HV_SYNCPT_PROT_EN);
+#endif
+}
+
static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.restore = syncpt_restore,
.restore_wait_base = syncpt_restore_wait_base,
@@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
.load = syncpt_load,
.cpu_incr = syncpt_cpu_incr,
.patch_wait = syncpt_patch_wait,
+ .assign_channel = syncpt_assign_channel,
+ .set_protection = syncpt_set_protection,
};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 048ac9e344ce..fe4d963b3e2a 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
+
+ host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
}

for (i = 0; i < host->info->nb_bases; i++)
@@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
host->bases = bases;

host1x_syncpt_restore(host);
+ host1x_hw_syncpt_set_protection(host, true);

/* Allocate sync point to use for clearing waits for expired fences */
host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
--
2.14.1

2017-08-18 16:16:42

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
to user pointers instead of writing out the cast manually.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/drm/tegra/drm.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e3331a2bc082..78c98736b0a5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
unsigned int num_relocs = args->num_relocs;
unsigned int num_waitchks = args->num_waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs =
- (void __user *)(uintptr_t)args->cmdbufs;
- struct drm_tegra_reloc __user *relocs =
- (void __user *)(uintptr_t)args->relocs;
+ u64_to_user_ptr(args->cmdbufs);
+ struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
struct drm_tegra_waitchk __user *waitchks =
- (void __user *)(uintptr_t)args->waitchks;
+ u64_to_user_ptr(args->waitchks);
struct drm_tegra_syncpt syncpt;
struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
struct host1x_syncpt *sp;
@@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
}
}

- if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
+ if (copy_from_user(&syncpt, u64_to_user_ptr(args->syncpts),
sizeof(syncpt))) {
err = -EFAULT;
goto fail;
--
2.14.1

2017-08-18 16:17:16

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH 2/4] gpu: host1x: Enable gather filter

The gather filter is a feature present on Tegra124 and newer where the
hardware prevents GATHERed command buffers from executing commands
normally reserved for the CDMA pushbuffer which is maintained by the
kernel driver.

This commit enables the gather filter on all supporting hardware.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
3 files changed, 46 insertions(+)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 0161da331702..5c0dc6bb51d1 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
return err;
}

+static void enable_gather_filter(struct host1x *host,
+ struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ u32 val;
+
+ if (!host->hv_regs)
+ return;
+
+ val = host1x_hypervisor_readl(
+ host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+ val |= BIT(ch->id % 32);
+ host1x_hypervisor_writel(
+ host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+ host1x_ch_writel(ch,
+ HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+ HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
+
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
unsigned int index)
{
ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+ enable_gather_filter(dev, ch);
return 0;
}

diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 95e6f96142b9..2e8b635aa660 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
}
#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+ return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+ host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+ host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)

#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index fce6e2c1ff4c..abbbc2641ce6 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
}
#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+ return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+ host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+ return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+ host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)

#endif
--
2.14.1

2017-08-18 16:17:32

by Mikko Perttunen

[permalink] [raw]
Subject: [PATCH 3/4] gpu: host1x: Improve debug disassembly formatting

The host1x driver prints out "disassembly" dumps of the command FIFO
and gather contents on submission timeouts. However, the output has
been quite difficult to read with unnecessary newlines and occasional
missing parentheses.

Fix these problems by using pr_cont to remove unnecessary newlines
and by fixing other small issues.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/debug.c | 14 ++++++++++-
drivers/gpu/host1x/debug.h | 14 ++++++++---
drivers/gpu/host1x/hw/debug_hw.c | 46 ++++++++++++++++++++++-------------
drivers/gpu/host1x/hw/debug_hw_1x01.c | 8 +++---
drivers/gpu/host1x/hw/debug_hw_1x06.c | 9 ++++---
5 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 2aae0e63214c..dc77ec452ffc 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -40,7 +40,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);

- o->fn(o->ctx, o->buf, len);
+ o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, fmt);
+ len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+ va_end(args);
+
+ o->fn(o->ctx, o->buf, len, true);
}

static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..990cce47e737 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -24,22 +24,28 @@
struct host1x;

struct output {
- void (*fn)(void *ctx, const char *str, size_t len);
+ void (*fn)(void *ctx, const char *str, size_t len, bool cont);
void *ctx;
char buf[256];
};

-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+ bool cont)
{
seq_write((struct seq_file *)ctx, str, len);
}

-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+ bool cont)
{
- pr_info("%s", str);
+ if (cont)
+ pr_cont("%s", str);
+ else
+ pr_info("%s", str);
}

void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);

extern unsigned int host1x_debug_trace_cmdbuf;

diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 770d92e62d69..1e67667e308c 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,48 +40,59 @@ enum {

static unsigned int show_channel_command(struct output *o, u32 val)
{
- unsigned int mask, subop;
+ unsigned int mask, subop, num;

switch (val >> 28) {
case HOST1X_OPCODE_SETCLASS:
mask = val & 0x3f;
if (mask) {
- host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+ host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
val >> 6 & 0x3ff,
val >> 16 & 0xfff, mask);
return hweight8(mask);
}

- host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+ host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
return 0;

case HOST1X_OPCODE_INCR:
- host1x_debug_output(o, "INCR(offset=%03x, [",
+ num = val & 0xffff;
+ host1x_debug_cont(o, "INCR(offset=%03x, [",
val >> 16 & 0xfff);
- return val & 0xffff;
+ if (!num)
+ host1x_debug_cont(o, "])\n");
+
+ return num;

case HOST1X_OPCODE_NONINCR:
- host1x_debug_output(o, "NONINCR(offset=%03x, [",
+ num = val & 0xffff;
+ host1x_debug_cont(o, "NONINCR(offset=%03x, [",
val >> 16 & 0xfff);
- return val & 0xffff;
+ if (!num)
+ host1x_debug_cont(o, "])\n");
+
+ return num;

case HOST1X_OPCODE_MASK:
mask = val & 0xffff;
- host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+ host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [",
val >> 16 & 0xfff, mask);
+ if (!mask)
+ host1x_debug_cont(o, "])\n");
+
return hweight16(mask);

case HOST1X_OPCODE_IMM:
- host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+ host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n",
val >> 16 & 0xfff, val & 0xffff);
return 0;

case HOST1X_OPCODE_RESTART:
- host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+ host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4);
return 0;

case HOST1X_OPCODE_GATHER:
- host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+ host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
val >> 16 & 0xfff, val >> 15 & 0x1,
val >> 14 & 0x1, val & 0x3fff);
return 1;
@@ -89,16 +100,17 @@ static unsigned int show_channel_command(struct output *o, u32 val)
case HOST1X_OPCODE_EXTEND:
subop = val >> 24 & 0xf;
if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
- host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+ host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n",
val & 0xff);
else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
- host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+ host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n",
val & 0xff);
else
- host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+ host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val);
return 0;

default:
+ host1x_debug_cont(o, "UNKNOWN\n");
return 0;
}
}
@@ -126,11 +138,11 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
u32 val = *(map_addr + offset / 4 + i);

if (!data_count) {
- host1x_debug_output(o, "%08x: %08x:", addr, val);
+ host1x_debug_output(o, "%08x: %08x: ", addr, val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}
}
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 8f243903cc7f..09e1aa7bb5dd 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -111,11 +111,11 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);

if (!data_count) {
- host1x_debug_output(o, "%08x:", val);
+ host1x_debug_output(o, "%08x: ", val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}

@@ -126,7 +126,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
} while (rd_ptr != wr_ptr);

if (data_count)
- host1x_debug_output(o, ", ...])\n");
+ host1x_debug_cont(o, ", ...])\n");
host1x_debug_output(o, "\n");

host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index 9cdee657fb46..bd89da5dc64c 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -105,11 +105,12 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
HOST1X_HV_CMDFIFO_PEEK_READ);

if (!data_count) {
- host1x_debug_output(o, "%08x:", val);
+ host1x_debug_output(o, "%03x 0x%08x: ",
+ rd_ptr - start, val);
data_count = show_channel_command(o, val);
} else {
- host1x_debug_output(o, "%08x%s", val,
- data_count > 0 ? ", " : "])\n");
+ host1x_debug_cont(o, "%08x%s", val,
+ data_count > 1 ? ", " : "])\n");
data_count--;
}

@@ -120,7 +121,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
} while (rd_ptr != wr_ptr);

if (data_count)
- host1x_debug_output(o, ", ...])\n");
+ host1x_debug_cont(o, ", ...])\n");
host1x_debug_output(o, "\n");

host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL);
--
2.14.1

2017-08-18 21:54:56

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 3/4] gpu: host1x: Improve debug disassembly formatting

On 18.08.2017 19:15, Mikko Perttunen wrote:
> The host1x driver prints out "disassembly" dumps of the command FIFO
> and gather contents on submission timeouts. However, the output has
> been quite difficult to read with unnecessary newlines and occasional
> missing parentheses.
>
> Fix these problems by using pr_cont to remove unnecessary newlines
> and by fixing other small issues.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
It's indeed a bit more readable now.

Reviewed-by: Dmitry Osipenko <[email protected]>
Tested-by: Dmitry Osipenko <[email protected]>

--
Dmitry

2017-08-18 22:05:20

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
> to user pointers instead of writing out the cast manually.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/drm/tegra/drm.c | 9 ++++-----
> 1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
> index e3331a2bc082..78c98736b0a5 100644
> --- a/drivers/gpu/drm/tegra/drm.c
> +++ b/drivers/gpu/drm/tegra/drm.c
> @@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
> unsigned int num_relocs = args->num_relocs;
> unsigned int num_waitchks = args->num_waitchks;
> struct drm_tegra_cmdbuf __user *cmdbufs =
> - (void __user *)(uintptr_t)args->cmdbufs;
> - struct drm_tegra_reloc __user *relocs =
> - (void __user *)(uintptr_t)args->relocs;
> + u64_to_user_ptr(args->cmdbufs);
> + struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
> struct drm_tegra_waitchk __user *waitchks =
> - (void __user *)(uintptr_t)args->waitchks;
> + u64_to_user_ptr(args->waitchks);

What about to factor out 'u64_to_user_ptr()' assignments to reduce messiness a
tad? Like this:

struct drm_tegra_waitchk __user *waitchks;
struct drm_tegra_cmdbuf __user *cmdbufs;
struct drm_tegra_reloc __user *relocs;
...
waitchks = u64_to_user_ptr(args->waitchks);
cmdbufs = u64_to_user_ptr(args->cmdbufs);
relocs = u64_to_user_ptr(args->relocs);


> struct drm_tegra_syncpt syncpt;
> struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
> struct host1x_syncpt *sp;
> @@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
> }
> }
>
> - if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
> + if (copy_from_user(&syncpt, u64_to_user_ptr(args->syncpts),

What about to define and use 'struct drm_tegra_reloc __user *syncpts' for
consistency with other '__user' definitions?

> sizeof(syncpt))) {
> err = -EFAULT;
> goto fail;
>

--
Dmitry

2017-08-18 22:36:10

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
> drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
> drivers/gpu/host1x/syncpt.c | 3 +++
> 4 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
> u32 (*load)(struct host1x_syncpt *syncpt);
> int (*cpu_incr)(struct host1x_syncpt *syncpt);
> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> + struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
> };
>
> struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
> return host->syncpt_op->patch_wait(sp, patch_addr);
> }
>
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> + struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> + bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
> void (*syncpt_thresh_work)(struct work_struct *))
> {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>
> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
> job->syncpt_end = syncval;
>
> /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
> return 0;
> }
>
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> + enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> + HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .restore = syncpt_restore,
> .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .load = syncpt_load,
> .cpu_incr = syncpt_cpu_incr,
> .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
> };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }
>
> for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
> host->bases = bases;
>
> host1x_syncpt_restore(host);
> + host1x_hw_syncpt_set_protection(host, true);

Is it really okay to force the protection? Maybe protection should be enabled
with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
avoid software jobs validation for Tegra124+.

>
> /* Allocate sync point to use for clearing waits for expired fences */
> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>


--
Dmitry

2017-08-19 08:06:39

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 4/4] drm/tegra: Use u64_to_user_ptr helper

On 08/19/2017 01:05 AM, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> Use the u64_to_user_ptr helper macro to cast IOCTL argument u64 values
>> to user pointers instead of writing out the cast manually.
>>
>> Signed-off-by: Mikko Perttunen <[email protected]>
>> ---
>> drivers/gpu/drm/tegra/drm.c | 9 ++++-----
>> 1 file changed, 4 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
>> index e3331a2bc082..78c98736b0a5 100644
>> --- a/drivers/gpu/drm/tegra/drm.c
>> +++ b/drivers/gpu/drm/tegra/drm.c
>> @@ -389,11 +389,10 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>> unsigned int num_relocs = args->num_relocs;
>> unsigned int num_waitchks = args->num_waitchks;
>> struct drm_tegra_cmdbuf __user *cmdbufs =
>> - (void __user *)(uintptr_t)args->cmdbufs;
>> - struct drm_tegra_reloc __user *relocs =
>> - (void __user *)(uintptr_t)args->relocs;
>> + u64_to_user_ptr(args->cmdbufs);
>> + struct drm_tegra_reloc __user *relocs = u64_to_user_ptr(args->relocs);
>> struct drm_tegra_waitchk __user *waitchks =
>> - (void __user *)(uintptr_t)args->waitchks;
>> + u64_to_user_ptr(args->waitchks);
>
> What about to factor out 'u64_to_user_ptr()' assignments to reduce messiness a
> tad? Like this:
>
> struct drm_tegra_waitchk __user *waitchks;
> struct drm_tegra_cmdbuf __user *cmdbufs;
> struct drm_tegra_reloc __user *relocs;
> ...
> waitchks = u64_to_user_ptr(args->waitchks);
> cmdbufs = u64_to_user_ptr(args->cmdbufs);
> relocs = u64_to_user_ptr(args->relocs);
>
>
>> struct drm_tegra_syncpt syncpt;
>> struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
>> struct host1x_syncpt *sp;
>> @@ -520,7 +519,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
>> }
>> }
>>
>> - if (copy_from_user(&syncpt, (void __user *)(uintptr_t)args->syncpts,
>> + if (copy_from_user(&syncpt, u64_to_user_ptr(args->syncpts),
>
> What about to define and use 'struct drm_tegra_reloc __user *syncpts' for
> consistency with other '__user' definitions?
>
>> sizeof(syncpt))) {
>> err = -EFAULT;
>> goto fail;
>>
>

Yeah, these are good ideas. I'll post a v2 at some point with these
changes. Thanks!

Mikko

2017-08-19 08:10:16

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection



On 08/19/2017 01:36 AM, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
>> specific channels, preventing any other channels from incrementing
>> them.
>>
>> Enable this feature where available and assign syncpoints to channels
>> when submitting a job. Syncpoints are currently never unassigned from
>> channels since that would require extra work and is unnecessary with
>> the current channel allocation model.
>>
>> Signed-off-by: Mikko Perttunen <[email protected]>
>> ---
>> drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
>> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
>> drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
>> drivers/gpu/host1x/syncpt.c | 3 +++
>> 4 files changed, 48 insertions(+)
>>
>> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
>> index def802c0a6bf..2432a30ff6e2 100644
>> --- a/drivers/gpu/host1x/dev.h
>> +++ b/drivers/gpu/host1x/dev.h
>> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
>> u32 (*load)(struct host1x_syncpt *syncpt);
>> int (*cpu_incr)(struct host1x_syncpt *syncpt);
>> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
>> + void (*assign_channel)(struct host1x_syncpt *syncpt,
>> + struct host1x_channel *channel);
>> + void (*set_protection)(struct host1x *host, bool enabled);
>> };
>>
>> struct host1x_intr_ops {
>> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
>> return host->syncpt_op->patch_wait(sp, patch_addr);
>> }
>>
>> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
>> + struct host1x_syncpt *sp,
>> + struct host1x_channel *ch)
>> +{
>> + return host->syncpt_op->assign_channel(sp, ch);
>> +}
>> +
>> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
>> + bool enabled)
>> +{
>> + return host->syncpt_op->set_protection(host, enabled);
>> +}
>> +
>> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
>> void (*syncpt_thresh_work)(struct work_struct *))
>> {
>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
>> index 8447a56c41ca..0161da331702 100644
>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>>
>> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>>
>> + /* assign syncpoint to channel */
>> + host1x_hw_syncpt_assign_channel(host, sp, ch);
>> +
>> job->syncpt_end = syncval;
>>
>> /* add a setclass for modules that require it */
>> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
>> index 7b0270d60742..5d117ab1699e 100644
>> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
>> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
>> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
>> return 0;
>> }
>>
>> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
>> + struct host1x_channel *ch)
>> +{
>> +#if HOST1X_HW >= 6
>> + struct host1x *host = sp->host;
>> +
>> + if (!host->hv_regs)
>> + return;
>> +
>> + host1x_sync_writel(host,
>> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
>> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
>> +#endif
>> +}
>> +
>> +static void syncpt_set_protection(struct host1x *host, bool enabled)
>> +{
>> +#if HOST1X_HW >= 6
>> + host1x_hypervisor_writel(host,
>> + enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
>> + HOST1X_HV_SYNCPT_PROT_EN);
>> +#endif
>> +}
>> +
>> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>> .restore = syncpt_restore,
>> .restore_wait_base = syncpt_restore_wait_base,
>> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
>> .load = syncpt_load,
>> .cpu_incr = syncpt_cpu_incr,
>> .patch_wait = syncpt_patch_wait,
>> + .assign_channel = syncpt_assign_channel,
>> + .set_protection = syncpt_set_protection,
>> };
>> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
>> index 048ac9e344ce..fe4d963b3e2a 100644
>> --- a/drivers/gpu/host1x/syncpt.c
>> +++ b/drivers/gpu/host1x/syncpt.c
>> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
>> for (i = 0; i < host->info->nb_pts; i++) {
>> syncpt[i].id = i;
>> syncpt[i].host = host;
>> +
>> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
>> }
>>
>> for (i = 0; i < host->info->nb_bases; i++)
>> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
>> host->bases = bases;
>>
>> host1x_syncpt_restore(host);
>> + host1x_hw_syncpt_set_protection(host, true);
>
> Is it really okay to force the protection? Maybe protection should be enabled
> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
> avoid software jobs validation for Tegra124+.

I don't quite get your comment. The hardware syncpt protection layer
being enabled should never hurt - it doesn't mess with any valid jobs.
It's also only on Tegra186 so I'm not sure where the Tegra124 comes from.

Cheers,
Mikko

>
>>
>> /* Allocate sync point to use for clearing waits for expired fences */
>> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>>
>
>

2017-08-19 10:10:05

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 19.08.2017 11:10, Mikko Perttunen wrote:
[snip]
>>> + host1x_hw_syncpt_set_protection(host, true);
>>
>> Is it really okay to force the protection? Maybe protection should be enabled
>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>> avoid software jobs validation for Tegra124+.
>
> I don't quite get your comment. The hardware syncpt protection layer being
> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
> on Tegra186 so I'm not sure where the Tegra124 comes from.

Right, it's the gather filter on T124+, my bad. This raises several questions.

1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
actually want to be a bit more flexible and allow to disable it. Imagine that
you are making a custom application and want to utilize channels in a different way.

2) Since syncpoint protection is a T186 feature, what about previous
generations? Should we validate syncpoints in software for them? We have
'syncpoint validation' patch staged in grate's kernel
https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
(I'll start sending out this and other patches after a bit more thorough
testing.) Improperly used syncpoints potentially could allow one program to
damage others.

3) What exactly does gather filter? Could you list all the commands that it
filters out, please?

4) What about T30/T114 that do not have gather filter? Should we validate those
commands for them in a software firewall?

So maybe we should implement several layers of validation in the SW firewall.
Like all layers for T20 (memory boundaries validation etc), software gather
filter for T30/114 and software syncpoint validation for T30/114/124/210.

--
Dmitry

2017-08-19 10:35:11

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
> On 19.08.2017 11:10, Mikko Perttunen wrote:
> [snip]
>>>> + host1x_hw_syncpt_set_protection(host, true);
>>>
>>> Is it really okay to force the protection? Maybe protection should be enabled
>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>> avoid software jobs validation for Tegra124+.
>>
>> I don't quite get your comment. The hardware syncpt protection layer being
>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>
> Right, it's the gather filter on T124+, my bad. This raises several questions.
>
> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
> actually want to be a bit more flexible and allow to disable it. Imagine that
> you are making a custom application and want to utilize channels in a different way.

I think it should be up to the user to decide whether they want the
firewall or not. It's clearly the most useful on the older chips -
especially Tegra20 due to lack of IOMMU. The performance penalty is too
great to force it on always.

The programming model should always be considered the same - the rules
of what you are allowed to do are the same whether the firewall, or any
hardware-implemented protection features, are on or not.

>
> 2) Since syncpoint protection is a T186 feature, what about previous
> generations? Should we validate syncpoints in software for them? We have
> 'syncpoint validation' patch staged in grate's kernel
> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
> (I'll start sending out this and other patches after a bit more thorough
> testing.) Improperly used syncpoints potentially could allow one program to
> damage others.

Yes, I think the firewall should have this feature for older
generations. We could disable the check on Tegra186, as you point
towards in question 4.

>
> 3) What exactly does gather filter? Could you list all the commands that it
> filters out, please?

According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and
EXTEND are filtered.

>
> 4) What about T30/T114 that do not have gather filter? Should we validate those
> commands for them in a software firewall?

Yes, the firewall should validate that.

>
> So maybe we should implement several layers of validation in the SW firewall.
> Like all layers for T20 (memory boundaries validation etc), software gather
> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>

That seems like a good idea.

Thanks,
Mikko

2017-08-19 10:42:46

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 18.08.2017 19:15, Mikko Perttunen wrote:
> The gather filter is a feature present on Tegra124 and newer where the
> hardware prevents GATHERed command buffers from executing commands
> normally reserved for the CDMA pushbuffer which is maintained by the
> kernel driver.
>
> This commit enables the gather filter on all supporting hardware.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---

TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
happens. Is that interrupt disabled by default or it would cause 'unhandled
interrupt'?

--
Dmitry

2017-08-19 10:46:45

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 08/19/2017 01:42 PM, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> The gather filter is a feature present on Tegra124 and newer where the
>> hardware prevents GATHERed command buffers from executing commands
>> normally reserved for the CDMA pushbuffer which is maintained by the
>> kernel driver.
>>
>> This commit enables the gather filter on all supporting hardware.
>>
>> Signed-off-by: Mikko Perttunen <[email protected]>
>> ---
>
> TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
> happens. Is that interrupt disabled by default or it would cause 'unhandled
> interrupt'?
>

It's disabled by default. Jobs that are stopped by the filter are then
handled by the usual timeout mechanism.

Mikko

2017-08-19 11:11:15

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 19.08.2017 13:35, Mikko Perttunen wrote:
> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>> [snip]
>>>>> + host1x_hw_syncpt_set_protection(host, true);
>>>>
>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>> avoid software jobs validation for Tegra124+.
>>>
>>> I don't quite get your comment. The hardware syncpt protection layer being
>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>
>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>
>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>> actually want to be a bit more flexible and allow to disable it. Imagine that
>> you are making a custom application and want to utilize channels in a
>> different way.
>
> I think it should be up to the user to decide whether they want the firewall or
> not. It's clearly the most useful on the older chips - especially Tegra20 due to
> lack of IOMMU. The performance penalty is too great to force it on always.
>

Of course there is some overhead but is not that great. Usually command buffer
contains just a dozen of commands. It should be an interesting challenge to
optimize its performance though.

> The programming model should always be considered the same - the rules of what
> you are allowed to do are the same whether the firewall, or any
> hardware-implemented protection features, are on or not.
>

Well, okay.

>>
>> 2) Since syncpoint protection is a T186 feature, what about previous
>> generations? Should we validate syncpoints in software for them? We have
>> 'syncpoint validation' patch staged in grate's kernel
>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>
>> (I'll start sending out this and other patches after a bit more thorough
>> testing.) Improperly used syncpoints potentially could allow one program to
>> damage others.
>
> Yes, I think the firewall should have this feature for older generations. We
> could disable the check on Tegra186, as you point towards in question 4.
>
>>
>> 3) What exactly does gather filter? Could you list all the commands that it
>> filters out, please?
>
> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
> are filtered.
>

Okay, then what about SETSTRMID command, I don't see its disassembly in the
host1x gather debug dump. Is it accidentally missed?

>>
>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>> commands for them in a software firewall?
>
> Yes, the firewall should validate that.
>
>>
>> So maybe we should implement several layers of validation in the SW firewall.
>> Like all layers for T20 (memory boundaries validation etc), software gather
>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>
>
> That seems like a good idea.

Alright, factoring out firewall from job.c probably should be the first step.

--
Dmitry

2017-08-19 11:32:33

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection



On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:
> On 19.08.2017 13:35, Mikko Perttunen wrote:
>> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>>> [snip]
>>>>>> + host1x_hw_syncpt_set_protection(host, true);
>>>>>
>>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>>> avoid software jobs validation for Tegra124+.
>>>>
>>>> I don't quite get your comment. The hardware syncpt protection layer being
>>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also only
>>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>>
>>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>>
>>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>>> actually want to be a bit more flexible and allow to disable it. Imagine that
>>> you are making a custom application and want to utilize channels in a
>>> different way.
>>
>> I think it should be up to the user to decide whether they want the firewall or
>> not. It's clearly the most useful on the older chips - especially Tegra20 due to
>> lack of IOMMU. The performance penalty is too great to force it on always.
>>
>
> Of course there is some overhead but is not that great. Usually command buffer
> contains just a dozen of commands. It should be an interesting challenge to
> optimize its performance though.
>
>> The programming model should always be considered the same - the rules of what
>> you are allowed to do are the same whether the firewall, or any
>> hardware-implemented protection features, are on or not.
>>
>
> Well, okay.
>
>>>
>>> 2) Since syncpoint protection is a T186 feature, what about previous
>>> generations? Should we validate syncpoints in software for them? We have
>>> 'syncpoint validation' patch staged in grate's kernel
>>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>>
>>> (I'll start sending out this and other patches after a bit more thorough
>>> testing.) Improperly used syncpoints potentially could allow one program to
>>> damage others.
>>
>> Yes, I think the firewall should have this feature for older generations. We
>> could disable the check on Tegra186, as you point towards in question 4.
>>
>>>
>>> 3) What exactly does gather filter? Could you list all the commands that it
>>> filters out, please?
>>
>> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
>> are filtered.
>>
>
> Okay, then what about SETSTRMID command, I don't see its disassembly in the
> host1x gather debug dump. Is it accidentally missed?
>

True, it's a new command in Tegra186 and I missed adding it to the
disassembler. It's probably fine to add it in another patch since it's
only intended for kernel use and it's useless without IOMMU support
anyway (which we don't have currently on Tegra186).

>>>
>>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>>> commands for them in a software firewall?
>>
>> Yes, the firewall should validate that.
>>
>>>
>>> So maybe we should implement several layers of validation in the SW firewall.
>>> Like all layers for T20 (memory boundaries validation etc), software gather
>>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>>
>>
>> That seems like a good idea.
>
> Alright, factoring out firewall from job.c probably should be the first step.
>

2017-08-19 11:51:43

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 19.08.2017 14:32, Mikko Perttunen wrote:
>
>
> On 08/19/2017 02:11 PM, Dmitry Osipenko wrote:
>> On 19.08.2017 13:35, Mikko Perttunen wrote:
>>> On 08/19/2017 01:09 PM, Dmitry Osipenko wrote:
>>>> On 19.08.2017 11:10, Mikko Perttunen wrote:
>>>> [snip]
>>>>>>> + host1x_hw_syncpt_set_protection(host, true);
>>>>>>
>>>>>> Is it really okay to force the protection? Maybe protection should be enabled
>>>>>> with a respect to CONFIG_TEGRA_HOST1X_FIREWALL? In that case we would have to
>>>>>> avoid software jobs validation for Tegra124+.
>>>>>
>>>>> I don't quite get your comment. The hardware syncpt protection layer being
>>>>> enabled should never hurt - it doesn't mess with any valid jobs. It's also
>>>>> only
>>>>> on Tegra186 so I'm not sure where the Tegra124 comes from.
>>>>
>>>> Right, it's the gather filter on T124+, my bad. This raises several questions.
>>>>
>>>> 1) Why we have CONFIG_TEGRA_HOST1X_FIREWALL? Should it be always enforced or we
>>>> actually want to be a bit more flexible and allow to disable it. Imagine that
>>>> you are making a custom application and want to utilize channels in a
>>>> different way.
>>>
>>> I think it should be up to the user to decide whether they want the firewall or
>>> not. It's clearly the most useful on the older chips - especially Tegra20 due to
>>> lack of IOMMU. The performance penalty is too great to force it on always.
>>>
>>
>> Of course there is some overhead but is not that great. Usually command buffer
>> contains just a dozen of commands. It should be an interesting challenge to
>> optimize its performance though.
>>
>>> The programming model should always be considered the same - the rules of what
>>> you are allowed to do are the same whether the firewall, or any
>>> hardware-implemented protection features, are on or not.
>>>
>>
>> Well, okay.
>>
>>>>
>>>> 2) Since syncpoint protection is a T186 feature, what about previous
>>>> generations? Should we validate syncpoints in software for them? We have
>>>> 'syncpoint validation' patch staged in grate's kernel
>>>> https://github.com/grate-driver/linux/commit/c8b6c82173f2ee9fead23380e8330b8099e7d5e7
>>>>
>>>>
>>>> (I'll start sending out this and other patches after a bit more thorough
>>>> testing.) Improperly used syncpoints potentially could allow one program to
>>>> damage others.
>>>
>>> Yes, I think the firewall should have this feature for older generations. We
>>> could disable the check on Tegra186, as you point towards in question 4.
>>>
>>>>
>>>> 3) What exactly does gather filter? Could you list all the commands that it
>>>> filters out, please?
>>>
>>> According to the Tegra186 TRM (section 16.8.32), SETCLASS, SETSTRMID and EXTEND
>>> are filtered.
>>>
>>
>> Okay, then what about SETSTRMID command, I don't see its disassembly in the
>> host1x gather debug dump. Is it accidentally missed?
>>
>
> True, it's a new command in Tegra186 and I missed adding it to the disassembler.
> It's probably fine to add it in another patch since it's only intended for
> kernel use and it's useless without IOMMU support anyway (which we don't have
> currently on Tegra186).
>

Yeah, but it probably would be more preferable that this patch would predate the
"gather filter" enabling.

>>>>
>>>> 4) What about T30/T114 that do not have gather filter? Should we validate those
>>>> commands for them in a software firewall?
>>>
>>> Yes, the firewall should validate that.
>>>
>>>>
>>>> So maybe we should implement several layers of validation in the SW firewall.
>>>> Like all layers for T20 (memory boundaries validation etc), software gather
>>>> filter for T30/114 and software syncpoint validation for T30/114/124/210.
>>>>
>>>
>>> That seems like a good idea.
>>
>> Alright, factoring out firewall from job.c probably should be the first step.
>>


--
Dmitry

2017-08-19 12:02:58

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
[snip]
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }

What about to factor out that assignment and add a comment, something like this:

/* clear syncpoint-channel assignments on Tegra186+ */
for (i = 0; i < host->info->nb_pts; i++)
host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);

And maybe even add an inline function for clarity, like:

static inline void host1x_hw_syncpt_deassign_channel(struct host1x *host,
struct host1x_syncpt *sp)
{
return host->syncpt_op->assign_channel(sp, NULL);
}

--
Dmitry

2017-08-19 12:06:02

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 19.08.2017 13:46, Mikko Perttunen wrote:
> On 08/19/2017 01:42 PM, Dmitry Osipenko wrote:
>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>> The gather filter is a feature present on Tegra124 and newer where the
>>> hardware prevents GATHERed command buffers from executing commands
>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>> kernel driver.
>>>
>>> This commit enables the gather filter on all supporting hardware.
>>>
>>> Signed-off-by: Mikko Perttunen <[email protected]>
>>> ---
>>
>> TRM says that "Invalid Gbuffer cmd" interrupt would be raised when filtering
>> happens. Is that interrupt disabled by default or it would cause 'unhandled
>> interrupt'?
>>
>
> It's disabled by default. Jobs that are stopped by the filter are then handled
> by the usual timeout mechanism.
>

Alright, then it looks good to me.

Reviewed-by: Dmitry Osipenko <[email protected]>

--
Dmitry

2017-08-20 16:18:28

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
> drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
> drivers/gpu/host1x/syncpt.c | 3 +++
> 4 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
> u32 (*load)(struct host1x_syncpt *syncpt);
> int (*cpu_incr)(struct host1x_syncpt *syncpt);
> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> + struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
> };
>
> struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
> return host->syncpt_op->patch_wait(sp, patch_addr);
> }
>
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> + struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> + bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
> void (*syncpt_thresh_work)(struct work_struct *))
> {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>
> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
> job->syncpt_end = syncval;
>
> /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
> return 0;
> }
>
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> + enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> + HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .restore = syncpt_restore,
> .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .load = syncpt_load,
> .cpu_incr = syncpt_cpu_incr,
> .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
> };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }
>
> for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
> host->bases = bases;
>
> host1x_syncpt_restore(host);
> + host1x_hw_syncpt_set_protection(host, true);

Since protection is never disabled maybe something like
host1x_hw_syncpt_enable_protection() would fit a bit better.

>
> /* Allocate sync point to use for clearing waits for expired fences */
> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>


--
Dmitry

2017-08-20 16:25:04

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 18.08.2017 19:15, Mikko Perttunen wrote:
> The gather filter is a feature present on Tegra124 and newer where the
> hardware prevents GATHERed command buffers from executing commands
> normally reserved for the CDMA pushbuffer which is maintained by the
> kernel driver.
>
> This commit enables the gather filter on all supporting hardware.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
> drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
> drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
> 3 files changed, 46 insertions(+)
>
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 0161da331702..5c0dc6bb51d1 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
> return err;
> }
>
> +static void enable_gather_filter(struct host1x *host,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + u32 val;
> +
> + if (!host->hv_regs)
> + return;

Is it really possible that gather filter could be not present on HW without
hypervisor? Maybe there is other way to enable it in that case?

Is possible at all that hypervisor could be missed?

> +
> + val = host1x_hypervisor_readl(
> + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
> + val |= BIT(ch->id % 32);
> + host1x_hypervisor_writel(
> + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
> +#elif HOST1X_HW >= 4
> + host1x_ch_writel(ch,
> + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
> + HOST1X_CHANNEL_CHANNELCTRL);
> +#endif
> +}
> +
> static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
> unsigned int index)
> {
> ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
> + enable_gather_filter(dev, ch);
> return 0;
> }
>
> diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> index 95e6f96142b9..2e8b635aa660 100644
> --- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> +++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
> @@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
> }
> #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
> host1x_channel_dmactrl_dmainitget()
> +static inline u32 host1x_channel_channelctrl_r(void)
> +{
> + return 0x98;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL \
> + host1x_channel_channelctrl_r()
> +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
> +{
> + return (v & 0x1) << 2;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
> + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
>
> #endif
> diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> index fce6e2c1ff4c..abbbc2641ce6 100644
> --- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> +++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
> @@ -117,5 +117,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void)
> }
> #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
> host1x_channel_dmactrl_dmainitget()
> +static inline u32 host1x_channel_channelctrl_r(void)
> +{
> + return 0x98;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL \
> + host1x_channel_channelctrl_r()
> +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
> +{
> + return (v & 0x1) << 2;
> +}
> +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
> + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
>
> #endif
>


--
Dmitry

2017-08-20 16:44:49

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 20.08.2017 19:24, Dmitry Osipenko wrote:
> On 18.08.2017 19:15, Mikko Perttunen wrote:
>> The gather filter is a feature present on Tegra124 and newer where the
>> hardware prevents GATHERed command buffers from executing commands
>> normally reserved for the CDMA pushbuffer which is maintained by the
>> kernel driver.
>>
>> This commit enables the gather filter on all supporting hardware.
>>
>> Signed-off-by: Mikko Perttunen <[email protected]>
>> ---
>> drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
>> drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
>> drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
>> 3 files changed, 46 insertions(+)
>>
>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
>> index 0161da331702..5c0dc6bb51d1 100644
>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>> return err;
>> }
>>
>> +static void enable_gather_filter(struct host1x *host,
>> + struct host1x_channel *ch)
>> +{
>> +#if HOST1X_HW >= 6
>> + u32 val;
>> +
>> + if (!host->hv_regs)
>> + return;
>
> Is it really possible that gather filter could be not present on HW without
> hypervisor? Maybe there is other way to enable it in that case?
>
> Is possible at all that hypervisor could be missed?

BTW, this is also incoherent with the 'syncpoint protection' patch which doesn't
check for hypervisor presence.

--
Dmitry

2017-08-20 16:59:22

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
> drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
> drivers/gpu/host1x/syncpt.c | 3 +++
> 4 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
> u32 (*load)(struct host1x_syncpt *syncpt);
> int (*cpu_incr)(struct host1x_syncpt *syncpt);
> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> + struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
> };
>
> struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
> return host->syncpt_op->patch_wait(sp, patch_addr);
> }
>
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> + struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> + bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
> void (*syncpt_thresh_work)(struct work_struct *))
> {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>
> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +
> job->syncpt_end = syncval;
>
> /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
> return 0;
> }
>
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;

This check should be placed in syncpt_set_protection().

> +
> + host1x_sync_writel(host,
> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> + enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> + HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .restore = syncpt_restore,
> .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .load = syncpt_load,
> .cpu_incr = syncpt_cpu_incr,
> .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
> };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }
>
> for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
> host->bases = bases;
>
> host1x_syncpt_restore(host);
> + host1x_hw_syncpt_set_protection(host, true);
>
> /* Allocate sync point to use for clearing waits for expired fences */
> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>


--
Dmitry

2017-08-20 16:59:44

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter

On 20.08.2017 19:44, Dmitry Osipenko wrote:
> On 20.08.2017 19:24, Dmitry Osipenko wrote:
>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>> The gather filter is a feature present on Tegra124 and newer where the
>>> hardware prevents GATHERed command buffers from executing commands
>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>> kernel driver.
>>>
>>> This commit enables the gather filter on all supporting hardware.
>>>
>>> Signed-off-by: Mikko Perttunen <[email protected]>
>>> ---
>>> drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
>>> drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
>>> drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
>>> 3 files changed, 46 insertions(+)
>>>
>>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
>>> index 0161da331702..5c0dc6bb51d1 100644
>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>>> return err;
>>> }
>>>
>>> +static void enable_gather_filter(struct host1x *host,
>>> + struct host1x_channel *ch)
>>> +{
>>> +#if HOST1X_HW >= 6
>>> + u32 val;
>>> +
>>> + if (!host->hv_regs)
>>> + return;
>>
>> Is it really possible that gather filter could be not present on HW without
>> hypervisor? Maybe there is other way to enable it in that case?
>>
>> Is possible at all that hypervisor could be missed?
>
> BTW, this is also incoherent with the 'syncpoint protection' patch which doesn't
> check for hypervisor presence.
>

However, I noticed that check and it's wrongly placed ;) See comment to the
'syncpoint protection' patch.

--
Dmitry

2017-08-20 18:13:05

by Dmitry Osipenko

[permalink] [raw]
Subject: Re: [PATCH 1/4] gpu: host1x: Enable Tegra186 syncpoint protection

On 18.08.2017 19:15, Mikko Perttunen wrote:
> Since Tegra186 the Host1x hardware allows syncpoints to be assigned to
> specific channels, preventing any other channels from incrementing
> them.
>
> Enable this feature where available and assign syncpoints to channels
> when submitting a job. Syncpoints are currently never unassigned from
> channels since that would require extra work and is unnecessary with
> the current channel allocation model.
>
> Signed-off-by: Mikko Perttunen <[email protected]>
> ---
> drivers/gpu/host1x/dev.h | 16 ++++++++++++++++
> drivers/gpu/host1x/hw/channel_hw.c | 3 +++
> drivers/gpu/host1x/hw/syncpt_hw.c | 26 ++++++++++++++++++++++++++
> drivers/gpu/host1x/syncpt.c | 3 +++
> 4 files changed, 48 insertions(+)
>
> diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
> index def802c0a6bf..2432a30ff6e2 100644
> --- a/drivers/gpu/host1x/dev.h
> +++ b/drivers/gpu/host1x/dev.h
> @@ -79,6 +79,9 @@ struct host1x_syncpt_ops {
> u32 (*load)(struct host1x_syncpt *syncpt);
> int (*cpu_incr)(struct host1x_syncpt *syncpt);
> int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
> + void (*assign_channel)(struct host1x_syncpt *syncpt,
> + struct host1x_channel *channel);
> + void (*set_protection)(struct host1x *host, bool enabled);
> };
>
> struct host1x_intr_ops {
> @@ -186,6 +189,19 @@ static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
> return host->syncpt_op->patch_wait(sp, patch_addr);
> }
>
> +static inline void host1x_hw_syncpt_assign_channel(struct host1x *host,
> + struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> + return host->syncpt_op->assign_channel(sp, ch);
> +}
> +
> +static inline void host1x_hw_syncpt_set_protection(struct host1x *host,
> + bool enabled)
> +{
> + return host->syncpt_op->set_protection(host, enabled);
> +}
> +
> static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
> void (*syncpt_thresh_work)(struct work_struct *))
> {
> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
> index 8447a56c41ca..0161da331702 100644
> --- a/drivers/gpu/host1x/hw/channel_hw.c
> +++ b/drivers/gpu/host1x/hw/channel_hw.c
> @@ -147,6 +147,9 @@ static int channel_submit(struct host1x_job *job)
>
> syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
>
> + /* assign syncpoint to channel */
> + host1x_hw_syncpt_assign_channel(host, sp, ch);
> +

Since there is one client per channel, it probably would make sense to assign
client syncpoints on host1x_channel_request().

> job->syncpt_end = syncval;
>
> /* add a setclass for modules that require it */
> diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
> index 7b0270d60742..5d117ab1699e 100644
> --- a/drivers/gpu/host1x/hw/syncpt_hw.c
> +++ b/drivers/gpu/host1x/hw/syncpt_hw.c
> @@ -106,6 +106,30 @@ static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
> return 0;
> }
>
> +static void syncpt_assign_channel(struct host1x_syncpt *sp,
> + struct host1x_channel *ch)
> +{
> +#if HOST1X_HW >= 6
> + struct host1x *host = sp->host;
> +
> + if (!host->hv_regs)
> + return;
> +
> + host1x_sync_writel(host,
> + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
> + HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
> +#endif
> +}
> +
> +static void syncpt_set_protection(struct host1x *host, bool enabled)
> +{
> +#if HOST1X_HW >= 6
> + host1x_hypervisor_writel(host,
> + enabled ? HOST1X_HV_SYNCPT_PROT_EN_CH_EN : 0,
> + HOST1X_HV_SYNCPT_PROT_EN);
> +#endif
> +}
> +
> static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .restore = syncpt_restore,
> .restore_wait_base = syncpt_restore_wait_base,
> @@ -113,4 +137,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
> .load = syncpt_load,
> .cpu_incr = syncpt_cpu_incr,
> .patch_wait = syncpt_patch_wait,
> + .assign_channel = syncpt_assign_channel,
> + .set_protection = syncpt_set_protection,
> };
> diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
> index 048ac9e344ce..fe4d963b3e2a 100644
> --- a/drivers/gpu/host1x/syncpt.c
> +++ b/drivers/gpu/host1x/syncpt.c
> @@ -398,6 +398,8 @@ int host1x_syncpt_init(struct host1x *host)
> for (i = 0; i < host->info->nb_pts; i++) {
> syncpt[i].id = i;
> syncpt[i].host = host;
> +
> + host1x_hw_syncpt_assign_channel(host, &syncpt[i], NULL);
> }
>
> for (i = 0; i < host->info->nb_bases; i++)
> @@ -408,6 +410,7 @@ int host1x_syncpt_init(struct host1x *host)
> host->bases = bases;
>
> host1x_syncpt_restore(host);
> + host1x_hw_syncpt_set_protection(host, true);
>
> /* Allocate sync point to use for clearing waits for expired fences */
> host->nop_sp = host1x_syncpt_alloc(host, NULL, 0);
>


--
Dmitry

2017-08-21 17:27:52

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter



On 08/20/2017 07:59 PM, Dmitry Osipenko wrote:
> On 20.08.2017 19:44, Dmitry Osipenko wrote:
>> On 20.08.2017 19:24, Dmitry Osipenko wrote:
>>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>>> The gather filter is a feature present on Tegra124 and newer where the
>>>> hardware prevents GATHERed command buffers from executing commands
>>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>>> kernel driver.
>>>>
>>>> This commit enables the gather filter on all supporting hardware.
>>>>
>>>> Signed-off-by: Mikko Perttunen <[email protected]>
>>>> ---
>>>> drivers/gpu/host1x/hw/channel_hw.c | 22 ++++++++++++++++++++++
>>>> drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
>>>> drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
>>>> 3 files changed, 46 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
>>>> index 0161da331702..5c0dc6bb51d1 100644
>>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job *job)
>>>> return err;
>>>> }
>>>>
>>>> +static void enable_gather_filter(struct host1x *host,
>>>> + struct host1x_channel *ch)
>>>> +{
>>>> +#if HOST1X_HW >= 6
>>>> + u32 val;
>>>> +
>>>> + if (!host->hv_regs)
>>>> + return;
>>>
>>> Is it really possible that gather filter could be not present on HW without
>>> hypervisor? Maybe there is other way to enable it in that case?

The hardware may have the hypervisor but Linux may be running as a
virtual machine without access to the hypervisor, in which case we
cannot access the registers, and it's the responsibility of the other OS
acting as hypervisor to enable the gather filter.

>>>
>>> Is possible at all that hypervisor could be missed?
>>
>> BTW, this is also incoherent with the 'syncpoint protection' patch which doesn't
>> check for hypervisor presence.
>>
>
> However, I noticed that check and it's wrongly placed ;) See comment to the
> 'syncpoint protection' patch.
>

2017-08-21 17:28:19

by Mikko Perttunen

[permalink] [raw]
Subject: Re: [PATCH 2/4] gpu: host1x: Enable gather filter



On 08/21/2017 08:27 PM, Mikko Perttunen wrote:
>
>
> On 08/20/2017 07:59 PM, Dmitry Osipenko wrote:
>> On 20.08.2017 19:44, Dmitry Osipenko wrote:
>>> On 20.08.2017 19:24, Dmitry Osipenko wrote:
>>>> On 18.08.2017 19:15, Mikko Perttunen wrote:
>>>>> The gather filter is a feature present on Tegra124 and newer where the
>>>>> hardware prevents GATHERed command buffers from executing commands
>>>>> normally reserved for the CDMA pushbuffer which is maintained by the
>>>>> kernel driver.
>>>>>
>>>>> This commit enables the gather filter on all supporting hardware.
>>>>>
>>>>> Signed-off-by: Mikko Perttunen <[email protected]>
>>>>> ---
>>>>> drivers/gpu/host1x/hw/channel_hw.c | 22
>>>>> ++++++++++++++++++++++
>>>>> drivers/gpu/host1x/hw/hw_host1x04_channel.h | 12 ++++++++++++
>>>>> drivers/gpu/host1x/hw/hw_host1x05_channel.h | 12 ++++++++++++
>>>>> 3 files changed, 46 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/host1x/hw/channel_hw.c
>>>>> b/drivers/gpu/host1x/hw/channel_hw.c
>>>>> index 0161da331702..5c0dc6bb51d1 100644
>>>>> --- a/drivers/gpu/host1x/hw/channel_hw.c
>>>>> +++ b/drivers/gpu/host1x/hw/channel_hw.c
>>>>> @@ -181,10 +181,32 @@ static int channel_submit(struct host1x_job
>>>>> *job)
>>>>> return err;
>>>>> }
>>>>> +static void enable_gather_filter(struct host1x *host,
>>>>> + struct host1x_channel *ch)
>>>>> +{
>>>>> +#if HOST1X_HW >= 6
>>>>> + u32 val;
>>>>> +
>>>>> + if (!host->hv_regs)
>>>>> + return;
>>>>
>>>> Is it really possible that gather filter could be not present on HW
>>>> without
>>>> hypervisor? Maybe there is other way to enable it in that case?
>
> The hardware may have the hypervisor but Linux may be running as a
> virtual machine without access to the hypervisor, in which case we
> cannot access the registers, and it's the responsibility of the other OS
> acting as hypervisor to enable the gather filter.
>
>>>>
>>>> Is possible at all that hypervisor could be missed?
>>>
>>> BTW, this is also incoherent with the 'syncpoint protection' patch
>>> which doesn't
>>> check for hypervisor presence.
>>>
>>
>> However, I noticed that check and it's wrongly placed ;) See comment
>> to the
>> 'syncpoint protection' patch.

Also - thanks, I added the missing check to that patch :)

>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html