LinuxLists.cc - [PATCH 00/10] Dynamic Host1x channel allocation

2017-11-05 11:03:02

Subject: [PATCH 00/10] Dynamic Host1x channel allocation

Hi all,

this adds support for a new model of hardware channel allocation for
Host1x/TegraDRM. In the current model, one hardware channel is
allocated for each client device at probe time. This is simple but
does not allow for optimal use of hardware resources.

In the new model, we allocate channels dynamically when a
"userspace channel", opened using the channel open IOCTL, has pending
jobs. However, each userspace channel can only have one assigned
channel at a time, ensuring current serialization behavior is kept.
As such there is no change in programming model for the userspace.

The patch adapts VIC to use the new model - GR2D and GR3D are not
modified, as the older Tegra chips they are found on do not have
a large number of hardware channels and therefore it is not clear
if the new model is beneficial (and I don't have access to those
chips to test it out).

Tested using the host1x_test test suite, and also by running
the performance test of host1x_test in parallel.

Thanks,
Mikko

Mikko Perttunen (10):
gpu: host1x: Parameterize channel aperture size
gpu: host1x: Print MLOCK state in debug dumps on T186
gpu: host1x: Add lock around channel allocation
gpu: host1x: Lock classes during job submission
gpu: host1x: Add job done callback
drm/tegra: Deliver job completion callback to client
drm/tegra: Make syncpoints be per-context
drm/tegra: Implement dynamic channel allocation model
drm/tegra: Boot VIC in runtime resume
gpu: host1x: Optionally block when acquiring channel

drivers/gpu/drm/tegra/drm.c | 82 +++++++++++++++--
drivers/gpu/drm/tegra/drm.h | 12 ++-
drivers/gpu/drm/tegra/gr2d.c | 8 +-
drivers/gpu/drm/tegra/gr3d.c | 8 +-
drivers/gpu/drm/tegra/vic.c | 120 ++++++++++++------------
drivers/gpu/host1x/cdma.c | 45 ++++++---
drivers/gpu/host1x/cdma.h | 1 +
drivers/gpu/host1x/channel.c | 47 ++++++++--
drivers/gpu/host1x/channel.h | 3 +
drivers/gpu/host1x/hw/cdma_hw.c | 122 +++++++++++++++++++++++++
drivers/gpu/host1x/hw/channel_hw.c | 74 +++++++++++----
drivers/gpu/host1x/hw/debug_hw_1x06.c | 18 +++-
drivers/gpu/host1x/hw/host1x01_hardware.h | 10 ++
drivers/gpu/host1x/hw/host1x02_hardware.h | 10 ++
drivers/gpu/host1x/hw/host1x04_hardware.h | 10 ++
drivers/gpu/host1x/hw/host1x05_hardware.h | 10 ++
drivers/gpu/host1x/hw/host1x06_hardware.h | 10 ++
drivers/gpu/host1x/hw/hw_host1x01_channel.h | 2 +
drivers/gpu/host1x/hw/hw_host1x01_sync.h | 6 ++
drivers/gpu/host1x/hw/hw_host1x02_channel.h | 2 +
drivers/gpu/host1x/hw/hw_host1x02_sync.h | 6 ++
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 2 +
drivers/gpu/host1x/hw/hw_host1x04_sync.h | 6 ++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 2 +
drivers/gpu/host1x/hw/hw_host1x05_sync.h | 6 ++
drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h | 5 +
drivers/gpu/host1x/hw/hw_host1x06_vm.h | 2 +
include/linux/host1x.h | 6 +-
28 files changed, 517 insertions(+), 118 deletions(-)

--
2.14.2

From 1583491662371641186@xxx Wed Nov 08 10:02:08 +0000 2017
X-GM-THRID: 1583491662371641186
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:03:24

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 02/10] gpu: host1x: Print MLOCK state in debug dumps on T186

Add support for dumping current MLOCK state in debug dumps also
on T186, now that MLOCKs are used by the driver.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/hw/debug_hw_1x06.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index b503c740c022..659dd6042ccc 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -131,5 +131,21 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,

static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
{
- /* TODO */
+ unsigned int i;
+
+ if (!host->hv_regs)
+ return;
+
+ host1x_debug_output(o, "---- mlocks ----\n");
+
+ for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+ u32 val = host1x_hypervisor_readl(host, HOST1X_HV_MLOCK(i));
+ if (HOST1X_HV_MLOCK_LOCKED_V(val))
+ host1x_debug_output(o, "%u: locked by channel %u\n",
+ i, HOST1X_HV_MLOCK_CH_V(val));
+ else
+ host1x_debug_output(o, "%u: unlocked\n", i);
+ }
+
+ host1x_debug_output(o, "\n");
}
--
2.14.2

From 1583309083513296043@xxx Mon Nov 06 09:40:07 +0000 2017
X-GM-THRID: 1583309083513296043
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:05:09

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 07/10] drm/tegra: Make syncpoints be per-context

As a preparation for each context potentially being able to have a
separate hardware channel, and thus requiring a separate syncpoint,
move syncpoints to be stored inside each context instead of global
client data.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/drm/tegra/drm.c | 8 ++++----
drivers/gpu/drm/tegra/drm.h | 1 +
drivers/gpu/drm/tegra/gr2d.c | 2 ++
drivers/gpu/drm/tegra/gr3d.c | 2 ++
drivers/gpu/drm/tegra/vic.c | 2 ++
5 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 3e2a4a19412e..b964e18e3058 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -783,12 +783,12 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
goto unlock;
}

- if (args->index >= context->client->base.num_syncpts) {
+ if (args->index >= 1) {
err = -EINVAL;
goto unlock;
}

- syncpt = context->client->base.syncpts[args->index];
+ syncpt = context->syncpt;
args->id = host1x_syncpt_id(syncpt);

unlock:
@@ -837,12 +837,12 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
goto unlock;
}

- if (args->syncpt >= context->client->base.num_syncpts) {
+ if (args->syncpt >= 1) {
err = -EINVAL;
goto unlock;
}

- syncpt = context->client->base.syncpts[args->syncpt];
+ syncpt = context->syncpt;

base = host1x_syncpt_get_base(syncpt);
if (!base) {
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 079aebb3fb38..11d690846fd0 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -79,6 +79,7 @@ struct tegra_drm_context {

struct tegra_drm_client *client;
struct host1x_channel *channel;
+ struct host1x_syncpt *syncpt;
unsigned int id;
};

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 6ea070da7718..3db3bcac48b9 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -76,6 +76,8 @@ static int gr2d_open_channel(struct tegra_drm_client *client,
if (!context->channel)
return -ENOMEM;

+ context->syncpt = client->base.syncpts[0];
+
return 0;
}

diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index cee2ab645cde..279438342c8c 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -86,6 +86,8 @@ static int gr3d_open_channel(struct tegra_drm_client *client,
if (!context->channel)
return -ENOMEM;

+ context->syncpt = client->base.syncpts[0];
+
return 0;
}

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 6697a21a250d..efe5f3af933e 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -240,6 +240,8 @@ static int vic_open_channel(struct tegra_drm_client *client,
return -ENOMEM;
}

+ context->syncpt = client->base.syncpts[0];
+
return 0;
}

--
2.14.2

From 1583210830922009922@xxx Sun Nov 05 07:38:26 +0000 2017
X-GM-THRID: 1582946571328910726
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:05:51

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 01/10] gpu: host1x: Parameterize channel aperture size

The size of a single channel's aperture is different on Tegra186 vs.
previous chips. Parameterize the value using a new define in the
register definition headers.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/hw/channel_hw.c | 3 +--
drivers/gpu/host1x/hw/hw_host1x01_channel.h | 2 ++
drivers/gpu/host1x/hw/hw_host1x02_channel.h | 2 ++
drivers/gpu/host1x/hw/hw_host1x04_channel.h | 2 ++
drivers/gpu/host1x/hw/hw_host1x05_channel.h | 2 ++
drivers/gpu/host1x/hw/hw_host1x06_vm.h | 2 ++
6 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 5c0dc6bb51d1..246b78c41281 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -26,7 +26,6 @@
#include "../intr.h"
#include "../job.h"

-#define HOST1X_CHANNEL_SIZE 16384
#define TRACE_MAX_LENGTH 128U

static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
@@ -205,7 +204,7 @@ static void enable_gather_filter(struct host1x *host,
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
unsigned int index)
{
- ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+ ch->regs = dev->regs + HOST1X_CHANNEL_BASE(index);
enable_gather_filter(dev, ch);
return 0;
}
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
index b4bc7ca4e051..be56a3a506de 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
@@ -51,6 +51,8 @@
#ifndef __hw_host1x_channel_host1x_h__
#define __hw_host1x_channel_host1x_h__

+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
static inline u32 host1x_channel_fifostat_r(void)
{
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
index e490bcde33fe..a142576a2c6e 100644
--- a/drivers/gpu/host1x/hw/hw_host1x02_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
@@ -51,6 +51,8 @@
#ifndef HOST1X_HW_HOST1X02_CHANNEL_H
#define HOST1X_HW_HOST1X02_CHANNEL_H

+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
static inline u32 host1x_channel_fifostat_r(void)
{
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
index 2e8b635aa660..645483c07fc2 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -51,6 +51,8 @@
#ifndef HOST1X_HW_HOST1X04_CHANNEL_H
#define HOST1X_HW_HOST1X04_CHANNEL_H

+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
static inline u32 host1x_channel_fifostat_r(void)
{
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
index abbbc2641ce6..6aef6bc1c96d 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -51,6 +51,8 @@
#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
#define HOST1X_HW_HOST1X05_CHANNEL_H

+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x4000)
+
static inline u32 host1x_channel_fifostat_r(void)
{
return 0x0;
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
index e54b33902332..0750aea78a30 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_vm.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
@@ -15,6 +15,8 @@
*
*/

+#define HOST1X_CHANNEL_BASE(x) ((x) * 0x100)
+
#define HOST1X_CHANNEL_DMASTART 0x0000
#define HOST1X_CHANNEL_DMASTART_HI 0x0004
#define HOST1X_CHANNEL_DMAPUT 0x0008
--
2.14.2

From 1583168237305480445@xxx Sat Nov 04 20:21:26 +0000 2017
X-GM-THRID: 1583168237305480445
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:05:11

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 08/10] drm/tegra: Implement dynamic channel allocation model

In the traditional channel allocation model, a single hardware channel
was allocated for each client. This is simple from an implementation
perspective but prevents use of hardware scheduling.

This patch implements a channel allocation model where when a user
submits a job for a context, a hardware channel is allocated for
that context. The same channel is kept for as long as there are
incomplete jobs for that context. This way we can use hardware
scheduling and channel isolation between userspace processes, but
also prevent idling contexts from taking up hardware resources.

For now, this patch only adapts VIC to the new model.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/drm/tegra/drm.c | 46 ++++++++++++++++++++++++++
drivers/gpu/drm/tegra/drm.h | 7 +++-
drivers/gpu/drm/tegra/vic.c | 79 +++++++++++++++++++++++----------------------
3 files changed, 92 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index b964e18e3058..658bc8814f38 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -382,6 +382,51 @@ static int host1x_waitchk_copy_from_user(struct host1x_waitchk *dest,
return 0;
}

+/**
+ * tegra_drm_context_get_channel() - Get a channel for submissions
+ * @context: Context for which to get a channel for
+ *
+ * Request a free hardware host1x channel for this user context, or if the
+ * context already has one, bump its refcount.
+ *
+ * Returns 0 on success, or -EBUSY if there were no free hardware channels.
+ */
+int tegra_drm_context_get_channel(struct tegra_drm_context *context)
+{
+ struct host1x_client *client = &context->client->base;
+
+ mutex_lock(&context->lock);
+
+ if (context->pending_jobs == 0) {
+ context->channel = host1x_channel_request(client->dev);
+ if (!context->channel) {
+ mutex_unlock(&context->lock);
+ return -EBUSY;
+ }
+ }
+
+ context->pending_jobs++;
+
+ mutex_unlock(&context->lock);
+
+ return 0;
+}
+
+/**
+ * tegra_drm_context_put_channel() - Put a previously gotten channel
+ * @context: Context which channel is no longer needed
+ *
+ * Decrease the refcount of the channel associated with this context,
+ * freeing it if the refcount drops to zero.
+ */
+void tegra_drm_context_put_channel(struct tegra_drm_context *context)
+{
+ mutex_lock(&context->lock);
+ if (--context->pending_jobs == 0)
+ host1x_channel_put(context->channel);
+ mutex_unlock(&context->lock);
+}
+
static void tegra_drm_job_done(struct host1x_job *job)
{
struct tegra_drm_context *context = job->callback_data;
@@ -737,6 +782,7 @@ static int tegra_open_channel(struct drm_device *drm, void *data,
kfree(context);

kref_init(&context->ref);
+ mutex_init(&context->lock);

mutex_unlock(&fpriv->lock);
return err;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 11d690846fd0..d0c3f1f779f6 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -78,9 +78,12 @@ struct tegra_drm_context {
struct kref ref;

struct tegra_drm_client *client;
+ unsigned int id;
+
+ struct mutex lock;
struct host1x_channel *channel;
struct host1x_syncpt *syncpt;
- unsigned int id;
+ unsigned int pending_jobs;
};

struct tegra_drm_client_ops {
@@ -95,6 +98,8 @@ struct tegra_drm_client_ops {
void (*submit_done)(struct tegra_drm_context *context);
};

+int tegra_drm_context_get_channel(struct tegra_drm_context *context);
+void tegra_drm_context_put_channel(struct tegra_drm_context *context);
int tegra_drm_submit(struct tegra_drm_context *context,
struct drm_tegra_submit *args, struct drm_device *drm,
struct drm_file *file);
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index efe5f3af933e..0cacf023a890 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -33,7 +33,6 @@ struct vic {

void __iomem *regs;
struct tegra_drm_client client;
- struct host1x_channel *channel;
struct iommu_domain *domain;
struct device *dev;
struct clk *clk;
@@ -161,28 +160,12 @@ static int vic_init(struct host1x_client *client)
goto detach_device;
}

- vic->channel = host1x_channel_request(client->dev);
- if (!vic->channel) {
- err = -ENOMEM;
- goto detach_device;
- }
-
- client->syncpts[0] = host1x_syncpt_request(client->dev, 0);
- if (!client->syncpts[0]) {
- err = -ENOMEM;
- goto free_channel;
- }
-
err = tegra_drm_register_client(tegra, drm);
if (err < 0)
- goto free_syncpt;
+ goto detach_device;

return 0;

-free_syncpt:
- host1x_syncpt_free(client->syncpts[0]);
-free_channel:
- host1x_channel_put(vic->channel);
detach_device:
if (tegra->domain)
iommu_detach_device(tegra->domain, vic->dev);
@@ -202,9 +185,6 @@ static int vic_exit(struct host1x_client *client)
if (err < 0)
return err;

- host1x_syncpt_free(client->syncpts[0]);
- host1x_channel_put(vic->channel);
-
if (vic->domain) {
iommu_detach_device(vic->domain, vic->dev);
vic->domain = NULL;
@@ -221,7 +201,24 @@ static const struct host1x_client_ops vic_client_ops = {
static int vic_open_channel(struct tegra_drm_client *client,
struct tegra_drm_context *context)
{
- struct vic *vic = to_vic(client);
+ context->syncpt = host1x_syncpt_request(client->base.dev, 0);
+ if (!context->syncpt)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void vic_close_channel(struct tegra_drm_context *context)
+{
+ host1x_syncpt_free(context->syncpt);
+}
+
+static int vic_submit(struct tegra_drm_context *context,
+ struct drm_tegra_submit *args, struct drm_device *drm,
+ struct drm_file *file)
+{
+ struct host1x_client *client = &context->client->base;
+ struct vic *vic = dev_get_drvdata(client->dev);
int err;

err = pm_runtime_get_sync(vic->dev);
@@ -229,35 +226,41 @@ static int vic_open_channel(struct tegra_drm_client *client,
return err;

err = vic_boot(vic);
- if (err < 0) {
- pm_runtime_put(vic->dev);
- return err;
- }
+ if (err < 0)
+ goto put_vic;

- context->channel = host1x_channel_get(vic->channel);
- if (!context->channel) {
- pm_runtime_put(vic->dev);
- return -ENOMEM;
- }
+ err = tegra_drm_context_get_channel(context);
+ if (err < 0)
+ goto put_vic;

- context->syncpt = client->base.syncpts[0];
+ err = tegra_drm_submit(context, args, drm, file);
+ if (err)
+ goto put_channel;

return 0;
+
+put_channel:
+ tegra_drm_context_put_channel(context);
+put_vic:
+ pm_runtime_put(vic->dev);
+
+ return err;
}

-static void vic_close_channel(struct tegra_drm_context *context)
+static void vic_submit_done(struct tegra_drm_context *context)
{
- struct vic *vic = to_vic(context->client);
-
- host1x_channel_put(context->channel);
+ struct host1x_client *client = &context->client->base;
+ struct vic *vic = dev_get_drvdata(client->dev);

+ tegra_drm_context_put_channel(context);
pm_runtime_put(vic->dev);
}

static const struct tegra_drm_client_ops vic_ops = {
.open_channel = vic_open_channel,
.close_channel = vic_close_channel,
- .submit = tegra_drm_submit,
+ .submit = vic_submit,
+ .submit_done = vic_submit_done,
};

#define NVIDIA_TEGRA_124_VIC_FIRMWARE "nvidia/tegra124/vic03_ucode.bin"
@@ -340,8 +343,6 @@ static int vic_probe(struct platform_device *pdev)
vic->client.base.ops = &vic_client_ops;
vic->client.base.dev = dev;
vic->client.base.class = HOST1X_CLASS_VIC;
- vic->client.base.syncpts = syncpts;
- vic->client.base.num_syncpts = 1;
vic->dev = dev;
vic->config = vic_config;

--
2.14.2

From 1583091321058385441@xxx Fri Nov 03 23:58:53 +0000 2017
X-GM-THRID: 1582933530219462368
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:06:18

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 05/10] gpu: host1x: Add job done callback

Allow job submitters to set a callback to be called when the job has
completed. The jobs are stored and the callbacks called outside the
CDMA lock area to allow the callbacks to do CDMA-requiring operations
like freeing channels.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/cdma.c | 44 +++++++++++++++++++++++++++++++++-----------
include/linux/host1x.h | 4 ++++
2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index f787cfe69c11..57221d199d33 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -251,17 +251,24 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
cdma->timeout.client = 0;
}

-/*
- * For all sync queue entries that have already finished according to the
- * current sync point registers:
- * - unpin & unref their mems
- * - pop their push buffer slots
- * - remove them from the sync queue
+/**
+ * update_cdma_locked() - Update CDMA sync queue
+ * @cdma: CDMA instance to update
+ * @done_jobs: List that finished jobs will be added to
+ *
+ * Go through the CDMA's sync queue, and for each job that has been finished,
+ * - unpin it
+ * - pop its push buffer slots
+ * - remove it from the sync queue
+ * - add it to the done_jobs list.
+ *
* This is normally called from the host code's worker thread, but can be
* called manually if necessary.
- * Must be called with the cdma lock held.
+ *
+ * Must be called with the CDMA lock held.
*/
-static void update_cdma_locked(struct host1x_cdma *cdma)
+static void update_cdma_locked(struct host1x_cdma *cdma,
+ struct list_head *done_jobs)
{
bool signal = false;
struct host1x *host1x = cdma_to_host1x(cdma);
@@ -305,8 +312,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
signal = true;
}

- list_del(&job->list);
- host1x_job_put(job);
+ list_move_tail(&job->list, done_jobs);
}

if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY &&
@@ -542,7 +548,23 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
*/
void host1x_cdma_update(struct host1x_cdma *cdma)
{
+ struct host1x_job *job, *tmp;
+ LIST_HEAD(done_jobs);
+
mutex_lock(&cdma->lock);
- update_cdma_locked(cdma);
+ update_cdma_locked(cdma, &done_jobs);
mutex_unlock(&cdma->lock);
+
+ /*
+ * The done callback may want to free the channel, which requires
+ * taking the CDMA lock, so we need to do it outside the above lock
+ * region.
+ */
+ list_for_each_entry_safe(job, tmp, &done_jobs, list) {
+ if (job->done)
+ job->done(job);
+
+ list_del(&job->list);
+ host1x_job_put(job);
+ }
}
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 630b1a98ab58..f931d28a68ff 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -253,6 +253,10 @@ struct host1x_job {
/* Check if class belongs to the unit */
int (*is_valid_class)(u32 class);

+ /* Job done callback */
+ void (*done)(struct host1x_job *job);
+ void *callback_data;
+
/* Request a SETCLASS to this class */
u32 class;

--
2.14.2

From 1583088089894098346@xxx Fri Nov 03 23:07:31 +0000 2017
X-GM-THRID: 1583088089894098346
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:03:04

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 03/10] gpu: host1x: Add lock around channel allocation

With the new channel allocation model, multiple threads can be
allocating channels simultaneously. Therefore we need to add a lock
around the code.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/host1x/channel.c | 7 +++++++
drivers/gpu/host1x/channel.h | 2 ++
2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2fb93c27c1d9..9d8cad12f9d8 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -42,6 +42,8 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,

bitmap_zero(chlist->allocated_channels, num_channels);

+ mutex_init(&chlist->lock);
+
return 0;
}

@@ -111,8 +113,11 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
unsigned int max_channels = host->info->nb_channels;
unsigned int index;

+ mutex_lock(&chlist->lock);
+
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
if (index >= max_channels) {
+ mutex_unlock(&chlist->lock);
dev_err(host->dev, "failed to find free channel\n");
return NULL;
}
@@ -121,6 +126,8 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)

set_bit(index, chlist->allocated_channels);

+ mutex_unlock(&chlist->lock);
+
return &chlist->channels[index];
}

diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 7068e42d42df..e68a8ae9a670 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -29,6 +29,8 @@ struct host1x_channel;

struct host1x_channel_list {
struct host1x_channel *channels;
+
+ struct mutex lock;
unsigned long *allocated_channels;
};

--
2.14.2

From 1583364830247513862@xxx Tue Nov 07 00:26:12 +0000 2017
X-GM-THRID: 1583364830247513862
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread

2017-11-05 11:04:30

by Mikko Perttunen

[permalink] [raw]

Subject: [PATCH 09/10] drm/tegra: Boot VIC in runtime resume

This ensures that there are no concurrency issues when multiple users
are trying to use VIC concurrently, and also simplifies the code
slightly.

Signed-off-by: Mikko Perttunen <[email protected]>
---
drivers/gpu/drm/tegra/vic.c | 47 +++++++++++++++++++--------------------------
1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 0cacf023a890..3de20f287112 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -29,7 +29,6 @@ struct vic_config {

struct vic {
struct falcon falcon;
- bool booted;

void __iomem *regs;
struct tegra_drm_client client;
@@ -51,33 +50,12 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset)
writel(value, vic->regs + offset);
}

-static int vic_runtime_resume(struct device *dev)
-{
- struct vic *vic = dev_get_drvdata(dev);
-
- return clk_prepare_enable(vic->clk);
-}
-
-static int vic_runtime_suspend(struct device *dev)
-{
- struct vic *vic = dev_get_drvdata(dev);
-
- clk_disable_unprepare(vic->clk);
-
- vic->booted = false;
-
- return 0;
-}
-
static int vic_boot(struct vic *vic)
{
u32 fce_ucode_size, fce_bin_data_offset;
void *hdr;
int err = 0;

- if (vic->booted)
- return 0;
-
/* setup clockgating registers */
vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
CG_IDLE_CG_EN |
@@ -108,7 +86,26 @@ static int vic_boot(struct vic *vic)
return err;
}

- vic->booted = true;
+ return 0;
+}
+
+static int vic_runtime_resume(struct device *dev)
+{
+ struct vic *vic = dev_get_drvdata(dev);
+ int err;
+
+ err = clk_prepare_enable(vic->clk);
+ if (err < 0)
+ return err;
+
+ return vic_boot(vic);
+}
+
+static int vic_runtime_suspend(struct device *dev)
+{
+ struct vic *vic = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(vic->clk);

return 0;
}
@@ -225,10 +222,6 @@ static int vic_submit(struct tegra_drm_context *context,
if (err < 0)
return err;

- err = vic_boot(vic);
- if (err < 0)
- goto put_vic;
-
err = tegra_drm_context_get_channel(context);
if (err < 0)
goto put_vic;
--
2.14.2

From 1583039895533110115@xxx Fri Nov 03 10:21:30 +0000 2017
X-GM-THRID: 1583039895533110115
X-Gmail-Labels: Inbox,Category Forums,HistoricalUnread