This patch set implements the MSM_WAIT_IOVA ioctl which lets
userspace sleep until the value at a given iova reaches a certain
condition. This is needed in turnip to implement the
VK_QUERY_RESULT_WAIT_BIT flag for vkGetQueryPoolResults.
First, we add a GPU-wide wait queue that is signaled on all IRQs.
We can then wait on this wait queue inside MSM_WAIT_IOVA until the
condition is met.
The corresponding merge request in mesa can be found at:
https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3279
Brian Ho (2):
drm/msm: Add a GPU-wide wait queue
drm/msm: Add MSM_WAIT_IOVA ioctl
drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
drivers/gpu/drm/msm/msm_gpu.c | 4 +++
drivers/gpu/drm/msm/msm_gpu.h | 3 ++
include/uapi/drm/msm_drm.h | 13 ++++++++
4 files changed, 81 insertions(+), 2 deletions(-)
--
2.25.0.rc1.283.g88dfdc4193-goog
This wait queue is signaled on all IRQs for a given GPU and will be
used as part of the new MSM_WAIT_IOVA ioctl so userspace can sleep
until the value at a given iova reaches a certain condition.
Signed-off-by: Brian Ho <[email protected]>
---
drivers/gpu/drm/msm/msm_gpu.c | 4 ++++
drivers/gpu/drm/msm/msm_gpu.h | 3 +++
2 files changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index a052364a5d74..d7310c1336e5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -779,6 +779,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
static irqreturn_t irq_handler(int irq, void *data)
{
struct msm_gpu *gpu = data;
+ wake_up_all(&gpu->event);
+
return gpu->funcs->irq(gpu);
}
@@ -871,6 +873,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
spin_lock_init(&gpu->perf_lock);
+ init_waitqueue_head(&gpu->event);
+
/* Map registers: */
gpu->mmio = msm_ioremap(pdev, config->ioname, name);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ab8f0f9c9dc8..60562f065dbc 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -104,6 +104,9 @@ struct msm_gpu {
struct msm_gem_address_space *aspace;
+ /* GPU-wide wait queue that is signaled on all IRQs */
+ wait_queue_head_t event;
+
/* Power Control: */
struct regulator *gpu_reg, *gpu_cx;
struct clk_bulk_data *grp_clks;
--
2.25.0.rc1.283.g88dfdc4193-goog
Implements an ioctl to wait until a value at a given iova is greater
than or equal to a supplied value.
This will initially be used by turnip (open-source Vulkan driver for
QC in mesa) for occlusion queries where the userspace driver can
block on a query becoming available before continuing via
vkGetQueryPoolResults.
Signed-off-by: Brian Ho <[email protected]>
---
drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
include/uapi/drm/msm_drm.h | 13 ++++++++
2 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c84f0a8b3f2c..dcc46874a5a2 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -36,10 +36,11 @@
* MSM_GEM_INFO ioctl.
* - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
* GEM object's debug name
- * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
+ * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
+ * - 1.6.0 - Add WAIT_IOVA ioctl
*/
#define MSM_VERSION_MAJOR 1
-#define MSM_VERSION_MINOR 5
+#define MSM_VERSION_MINOR 6
#define MSM_VERSION_PATCHLEVEL 0
static const struct drm_mode_config_funcs mode_config_funcs = {
@@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
return msm_submitqueue_remove(file->driver_priv, id);
}
+static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
+ struct drm_file *file)
+{
+ struct msm_drm_private *priv = dev->dev_private;
+ struct drm_gem_object *obj;
+ struct drm_msm_wait_iova *args = data;
+ ktime_t timeout = to_ktime(args->timeout);
+ unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
+ struct msm_gpu *gpu = priv->gpu;
+ void *base_vaddr;
+ uint64_t *vaddr;
+ int ret;
+
+ if (args->pad)
+ return -EINVAL;
+
+ if (!gpu)
+ return 0;
+
+ obj = drm_gem_object_lookup(file, args->handle);
+ if (!obj)
+ return -ENOENT;
+
+ base_vaddr = msm_gem_get_vaddr(obj);
+ if (IS_ERR(base_vaddr)) {
+ ret = PTR_ERR(base_vaddr);
+ goto err_put_gem_object;
+ }
+ if (args->offset + sizeof(*vaddr) > obj->size) {
+ ret = -EINVAL;
+ goto err_put_vaddr;
+ }
+
+ vaddr = base_vaddr + args->offset;
+
+ /* Assumes WC mapping */
+ ret = wait_event_interruptible_timeout(
+ gpu->event, *vaddr >= args->value, remaining_jiffies);
+
+ if (ret == 0) {
+ ret = -ETIMEDOUT;
+ goto err_put_vaddr;
+ } else if (ret == -ERESTARTSYS) {
+ goto err_put_vaddr;
+ }
+
+ msm_gem_put_vaddr(obj);
+ drm_gem_object_put_unlocked(obj);
+ return 0;
+
+err_put_vaddr:
+ msm_gem_put_vaddr(obj);
+err_put_gem_object:
+ drm_gem_object_put_unlocked(obj);
+ return ret;
+}
+
static const struct drm_ioctl_desc msm_ioctls[] = {
DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
@@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova, DRM_RENDER_ALLOW),
};
static const struct vm_operations_struct vm_ops = {
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 0b85ed6a3710..8477f28a4ee1 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
__u32 pad;
};
+/* This ioctl blocks until the u64 value at bo + offset is greater than or
+ * equal to the reference value.
+ */
+struct drm_msm_wait_iova {
+ __u32 handle; /* in, GEM handle */
+ __u32 pad;
+ struct drm_msm_timespec timeout; /* in */
+ __u64 offset; /* offset into bo */
+ __u64 value; /* reference value */
+};
+
#define DRM_MSM_GET_PARAM 0x00
/* placeholder:
#define DRM_MSM_SET_PARAM 0x01
@@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
#define DRM_MSM_SUBMITQUEUE_NEW 0x0A
#define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
#define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
+#define DRM_MSM_WAIT_IOVA 0x0D
#define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
#define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
#define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
#define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
+#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
#if defined(__cplusplus)
}
--
2.25.0.rc1.283.g88dfdc4193-goog
On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
>
> Implements an ioctl to wait until a value at a given iova is greater
> than or equal to a supplied value.
>
> This will initially be used by turnip (open-source Vulkan driver for
> QC in mesa) for occlusion queries where the userspace driver can
> block on a query becoming available before continuing via
> vkGetQueryPoolResults.
>
> Signed-off-by: Brian Ho <[email protected]>
> ---
> drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> include/uapi/drm/msm_drm.h | 13 ++++++++
> 2 files changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index c84f0a8b3f2c..dcc46874a5a2 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -36,10 +36,11 @@
> * MSM_GEM_INFO ioctl.
> * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> * GEM object's debug name
> - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> + * - 1.6.0 - Add WAIT_IOVA ioctl
> */
> #define MSM_VERSION_MAJOR 1
> -#define MSM_VERSION_MINOR 5
> +#define MSM_VERSION_MINOR 6
> #define MSM_VERSION_PATCHLEVEL 0
>
> static const struct drm_mode_config_funcs mode_config_funcs = {
> @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
> return msm_submitqueue_remove(file->driver_priv, id);
> }
>
> +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct msm_drm_private *priv = dev->dev_private;
> + struct drm_gem_object *obj;
> + struct drm_msm_wait_iova *args = data;
> + ktime_t timeout = to_ktime(args->timeout);
> + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> + struct msm_gpu *gpu = priv->gpu;
> + void *base_vaddr;
> + uint64_t *vaddr;
> + int ret;
> +
> + if (args->pad)
> + return -EINVAL;
> +
> + if (!gpu)
> + return 0;
hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> +
> + obj = drm_gem_object_lookup(file, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + base_vaddr = msm_gem_get_vaddr(obj);
> + if (IS_ERR(base_vaddr)) {
> + ret = PTR_ERR(base_vaddr);
> + goto err_put_gem_object;
> + }
> + if (args->offset + sizeof(*vaddr) > obj->size) {
> + ret = -EINVAL;
> + goto err_put_vaddr;
> + }
> +
> + vaddr = base_vaddr + args->offset;
> +
> + /* Assumes WC mapping */
> + ret = wait_event_interruptible_timeout(
> + gpu->event, *vaddr >= args->value, remaining_jiffies);
> +
> + if (ret == 0) {
> + ret = -ETIMEDOUT;
> + goto err_put_vaddr;
> + } else if (ret == -ERESTARTSYS) {
> + goto err_put_vaddr;
> + }
maybe:
} else {
ret = 0;
}
and then drop the next three lines?
> +
> + msm_gem_put_vaddr(obj);
> + drm_gem_object_put_unlocked(obj);
> + return 0;
> +
> +err_put_vaddr:
> + msm_gem_put_vaddr(obj);
> +err_put_gem_object:
> + drm_gem_object_put_unlocked(obj);
> + return ret;
> +}
> +
> static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
> @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova, DRM_RENDER_ALLOW),
> };
>
> static const struct vm_operations_struct vm_ops = {
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 0b85ed6a3710..8477f28a4ee1 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> __u32 pad;
> };
>
> +/* This ioctl blocks until the u64 value at bo + offset is greater than or
> + * equal to the reference value.
> + */
> +struct drm_msm_wait_iova {
> + __u32 handle; /* in, GEM handle */
> + __u32 pad;
> + struct drm_msm_timespec timeout; /* in */
> + __u64 offset; /* offset into bo */
> + __u64 value; /* reference value */
Maybe we should go ahead and add a __u64 mask;
that would let us wait for 32b values as well, and wait for bits in a bitmask
Other than those minor comments, it looks pretty good to me
BR,
-R
> +};
> +
> #define DRM_MSM_GET_PARAM 0x00
> /* placeholder:
> #define DRM_MSM_SET_PARAM 0x01
> @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> +#define DRM_MSM_WAIT_IOVA 0x0D
>
> #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
> #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
>
> #if defined(__cplusplus)
> }
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> Implements an ioctl to wait until a value at a given iova is greater
> than or equal to a supplied value.
>
> This will initially be used by turnip (open-source Vulkan driver for
> QC in mesa) for occlusion queries where the userspace driver can
> block on a query becoming available before continuing via
> vkGetQueryPoolResults.
>
> Signed-off-by: Brian Ho <[email protected]>
> ---
> drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> include/uapi/drm/msm_drm.h | 13 ++++++++
> 2 files changed, 74 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index c84f0a8b3f2c..dcc46874a5a2 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -36,10 +36,11 @@
> * MSM_GEM_INFO ioctl.
> * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> * GEM object's debug name
> - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> + * - 1.6.0 - Add WAIT_IOVA ioctl
> */
> #define MSM_VERSION_MAJOR 1
> -#define MSM_VERSION_MINOR 5
> +#define MSM_VERSION_MINOR 6
> #define MSM_VERSION_PATCHLEVEL 0
>
> static const struct drm_mode_config_funcs mode_config_funcs = {
> @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
> return msm_submitqueue_remove(file->driver_priv, id);
> }
>
> +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct msm_drm_private *priv = dev->dev_private;
> + struct drm_gem_object *obj;
> + struct drm_msm_wait_iova *args = data;
> + ktime_t timeout = to_ktime(args->timeout);
> + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> + struct msm_gpu *gpu = priv->gpu;
> + void *base_vaddr;
> + uint64_t *vaddr;
> + int ret;
> +
> + if (args->pad)
> + return -EINVAL;
> +
> + if (!gpu)
> + return 0;
If the GPU isn't up, it should be an error since this macro is specifically
designed for just the GPU (though, I suppose the display *COULD* use it to watch
a memory mapped register or something).
> +
> + obj = drm_gem_object_lookup(file, args->handle);
> + if (!obj)
> + return -ENOENT;
> +
> + base_vaddr = msm_gem_get_vaddr(obj);
> + if (IS_ERR(base_vaddr)) {
> + ret = PTR_ERR(base_vaddr);
> + goto err_put_gem_object;
> + }
> + if (args->offset + sizeof(*vaddr) > obj->size) {
There is a chance to trigger a u64 overflow here resulting in an arbitrary (ish)
vaddr two lines below.
> + ret = -EINVAL;
> + goto err_put_vaddr;
> + }
You can check this before getting the vaddr which would save you a clean up
step.
> +
> + vaddr = base_vaddr + args->offset;
> +
> + /* Assumes WC mapping */
> + ret = wait_event_interruptible_timeout(
> + gpu->event, *vaddr >= args->value, remaining_jiffies);
I feel like a barrier might be needed before checking *vaddr just in case you
get the interrupt and wake up the queue before the write posts from the
hardware.
> +
> + if (ret == 0) {
> + ret = -ETIMEDOUT;
> + goto err_put_vaddr;
> + } else if (ret == -ERESTARTSYS) {
> + goto err_put_vaddr;
> + }
You don't need either goto here because both paths execute the following cleanup
steps. I'm also not sure you need to worry about explicitly checking the
ERESTARTSYS value, I think that this would be sufficient:
if (ret == 0)
ret = -ETIMEDOUT;
else if (ret > 0)
ret = 0;
> +
Put your err_put_vaddr: label here, but looking up, if you move the bounds check
before the msm_gem_get_vaddr, I don't think you need a label.
> + msm_gem_put_vaddr(obj);
Put the err_put_gem_object: label here.
> + drm_gem_object_put_unlocked(obj);
> + return 0;
return ret;
> +
> +err_put_vaddr:
> + msm_gem_put_vaddr(obj);
> +err_put_gem_object:
> + drm_gem_object_put_unlocked(obj);
> + return ret;
> +}
And then these guys aren't needed.
> +
> static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
> @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova, DRM_RENDER_ALLOW),
> };
>
> static const struct vm_operations_struct vm_ops = {
> diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> index 0b85ed6a3710..8477f28a4ee1 100644
> --- a/include/uapi/drm/msm_drm.h
> +++ b/include/uapi/drm/msm_drm.h
> @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> __u32 pad;
> };
>
> +/* This ioctl blocks until the u64 value at bo + offset is greater than or
> + * equal to the reference value.
> + */
> +struct drm_msm_wait_iova {
> + __u32 handle; /* in, GEM handle */
> + __u32 pad;
> + struct drm_msm_timespec timeout; /* in */
> + __u64 offset; /* offset into bo */
> + __u64 value; /* reference value */
Any specific reason why we wouldn't just put the offset and value first and save
ourselves the padding?
> +};
> +
> #define DRM_MSM_GET_PARAM 0x00
> /* placeholder:
> #define DRM_MSM_SET_PARAM 0x01
> @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> +#define DRM_MSM_WAIT_IOVA 0x0D
>
> #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
> #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
>
> #if defined(__cplusplus)
> }
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
> _______________________________________________
> Freedreno mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
On Mon, Jan 13, 2020 at 10:36:04AM -0500, Brian Ho wrote:
> This wait queue is signaled on all IRQs for a given GPU and will be
> used as part of the new MSM_WAIT_IOVA ioctl so userspace can sleep
> until the value at a given iova reaches a certain condition.
>
> Signed-off-by: Brian Ho <[email protected]>
> ---
> drivers/gpu/drm/msm/msm_gpu.c | 4 ++++
> drivers/gpu/drm/msm/msm_gpu.h | 3 +++
> 2 files changed, 7 insertions(+)
>
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index a052364a5d74..d7310c1336e5 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -779,6 +779,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> static irqreturn_t irq_handler(int irq, void *data)
> {
> struct msm_gpu *gpu = data;
> + wake_up_all(&gpu->event);
> +
I suppose it is intentional to have this happen on *all* interrupts because you
might be using the CP interrupts for fun and profit and you don't want to plumb
in callbacks? I suppose it is okay to do this for all interrupts (including
errors) but if we're spending a lot of time here we might want to only trigger
on certain IRQs.
> return gpu->funcs->irq(gpu);
> }
>
> @@ -871,6 +873,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
>
> spin_lock_init(&gpu->perf_lock);
>
> + init_waitqueue_head(&gpu->event);
> +
>
> /* Map registers: */
> gpu->mmio = msm_ioremap(pdev, config->ioname, name);
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index ab8f0f9c9dc8..60562f065dbc 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -104,6 +104,9 @@ struct msm_gpu {
>
> struct msm_gem_address_space *aspace;
>
> + /* GPU-wide wait queue that is signaled on all IRQs */
> + wait_queue_head_t event;
> +
> /* Power Control: */
> struct regulator *gpu_reg, *gpu_cx;
> struct clk_bulk_data *grp_clks;
> --
> 2.25.0.rc1.283.g88dfdc4193-goog
>
> _______________________________________________
> Freedreno mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
On Mon, Jan 13, 2020 at 9:51 AM Jordan Crouse <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> > Implements an ioctl to wait until a value at a given iova is greater
> > than or equal to a supplied value.
> >
> > This will initially be used by turnip (open-source Vulkan driver for
> > QC in mesa) for occlusion queries where the userspace driver can
> > block on a query becoming available before continuing via
> > vkGetQueryPoolResults.
> >
> > Signed-off-by: Brian Ho <[email protected]>
> > ---
> > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > include/uapi/drm/msm_drm.h | 13 ++++++++
> > 2 files changed, 74 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> > index c84f0a8b3f2c..dcc46874a5a2 100644
> > --- a/drivers/gpu/drm/msm/msm_drv.c
> > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > @@ -36,10 +36,11 @@
> > * MSM_GEM_INFO ioctl.
> > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > * GEM object's debug name
> > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > */
> > #define MSM_VERSION_MAJOR 1
> > -#define MSM_VERSION_MINOR 5
> > +#define MSM_VERSION_MINOR 6
> > #define MSM_VERSION_PATCHLEVEL 0
> >
> > static const struct drm_mode_config_funcs mode_config_funcs = {
> > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
> > return msm_submitqueue_remove(file->driver_priv, id);
> > }
> >
> > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > + struct drm_file *file)
> > +{
> > + struct msm_drm_private *priv = dev->dev_private;
> > + struct drm_gem_object *obj;
> > + struct drm_msm_wait_iova *args = data;
> > + ktime_t timeout = to_ktime(args->timeout);
> > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > + struct msm_gpu *gpu = priv->gpu;
> > + void *base_vaddr;
> > + uint64_t *vaddr;
> > + int ret;
> > +
> > + if (args->pad)
> > + return -EINVAL;
> > +
> > + if (!gpu)
> > + return 0;
>
> If the GPU isn't up, it should be an error since this macro is specifically
> designed for just the GPU (though, I suppose the display *COULD* use it to watch
> a memory mapped register or something).
>
> > +
> > + obj = drm_gem_object_lookup(file, args->handle);
> > + if (!obj)
> > + return -ENOENT;
> > +
> > + base_vaddr = msm_gem_get_vaddr(obj);
> > + if (IS_ERR(base_vaddr)) {
> > + ret = PTR_ERR(base_vaddr);
> > + goto err_put_gem_object;
> > + }
> > + if (args->offset + sizeof(*vaddr) > obj->size) {
>
> There is a chance to trigger a u64 overflow here resulting in an arbitrary (ish)
> vaddr two lines below.
>
>
> > + ret = -EINVAL;
> > + goto err_put_vaddr;
> > + }
>
> You can check this before getting the vaddr which would save you a clean up
> step.
>
> > +
> > + vaddr = base_vaddr + args->offset;
> > +
> > + /* Assumes WC mapping */
> > + ret = wait_event_interruptible_timeout(
> > + gpu->event, *vaddr >= args->value, remaining_jiffies);
>
> I feel like a barrier might be needed before checking *vaddr just in case you
> get the interrupt and wake up the queue before the write posts from the
> hardware.
>
> > +
>
> > + if (ret == 0) {
> > + ret = -ETIMEDOUT;
> > + goto err_put_vaddr;
> > + } else if (ret == -ERESTARTSYS) {
> > + goto err_put_vaddr;
> > + }
>
> You don't need either goto here because both paths execute the following cleanup
> steps. I'm also not sure you need to worry about explicitly checking the
> ERESTARTSYS value, I think that this would be sufficient:
>
> if (ret == 0)
> ret = -ETIMEDOUT;
> else if (ret > 0)
> ret = 0;
>
> > +
>
> Put your err_put_vaddr: label here, but looking up, if you move the bounds check
> before the msm_gem_get_vaddr, I don't think you need a label.
>
> > + msm_gem_put_vaddr(obj);
>
> Put the err_put_gem_object: label here.
>
> > + drm_gem_object_put_unlocked(obj);
> > + return 0;
>
> return ret;
>
> > +
> > +err_put_vaddr:
> > + msm_gem_put_vaddr(obj);
> > +err_put_gem_object:
> > + drm_gem_object_put_unlocked(obj);
> > + return ret;
> > +}
>
> And then these guys aren't needed.
>
> > +
> > static const struct drm_ioctl_desc msm_ioctls[] = {
> > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param, DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new, DRM_RENDER_ALLOW),
> > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW, msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova, DRM_RENDER_ALLOW),
> > };
> >
> > static const struct vm_operations_struct vm_ops = {
> > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > index 0b85ed6a3710..8477f28a4ee1 100644
> > --- a/include/uapi/drm/msm_drm.h
> > +++ b/include/uapi/drm/msm_drm.h
> > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > __u32 pad;
> > };
> >
> > +/* This ioctl blocks until the u64 value at bo + offset is greater than or
> > + * equal to the reference value.
> > + */
> > +struct drm_msm_wait_iova {
> > + __u32 handle; /* in, GEM handle */
> > + __u32 pad;
> > + struct drm_msm_timespec timeout; /* in */
> > + __u64 offset; /* offset into bo */
> > + __u64 value; /* reference value */
>
> Any specific reason why we wouldn't just put the offset and value first and save
> ourselves the padding?
I'd actually like to have a pad must-be-zero flag to make this easier
to extend in the future, if needed.
(Maybe there is a use-case for a wait_iova that is scoped to a
specific submit-queue, or we want to add other types of tests beyond
GTE, etc)
BR,
-R
> > +};
> > +
> > #define DRM_MSM_GET_PARAM 0x00
> > /* placeholder:
> > #define DRM_MSM_SET_PARAM 0x01
> > @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> > #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> > #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> > #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> > +#define DRM_MSM_WAIT_IOVA 0x0D
> >
> > #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
> > #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> > @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> > #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> > #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> > #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> > +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
> >
> > #if defined(__cplusplus)
> > }
> > --
> > 2.25.0.rc1.283.g88dfdc4193-goog
> >
> > _______________________________________________
> > Freedreno mailing list
> > [email protected]
> > https://lists.freedesktop.org/mailman/listinfo/freedreno
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
On Mon, Jan 13, 2020 at 9:55 AM Jordan Crouse <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 10:36:04AM -0500, Brian Ho wrote:
> > This wait queue is signaled on all IRQs for a given GPU and will be
> > used as part of the new MSM_WAIT_IOVA ioctl so userspace can sleep
> > until the value at a given iova reaches a certain condition.
> >
> > Signed-off-by: Brian Ho <[email protected]>
> > ---
> > drivers/gpu/drm/msm/msm_gpu.c | 4 ++++
> > drivers/gpu/drm/msm/msm_gpu.h | 3 +++
> > 2 files changed, 7 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> > index a052364a5d74..d7310c1336e5 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpu.c
> > @@ -779,6 +779,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> > static irqreturn_t irq_handler(int irq, void *data)
> > {
> > struct msm_gpu *gpu = data;
> > + wake_up_all(&gpu->event);
> > +
>
> I suppose it is intentional to have this happen on *all* interrupts because you
> might be using the CP interrupts for fun and profit and you don't want to plumb
> in callbacks? I suppose it is okay to do this for all interrupts (including
> errors) but if we're spending a lot of time here we might want to only trigger
> on certain IRQs.
Was just talking to Kristian about GPU hangs.. and I suspect we might
want the ioctl to return an error if there is a gpu reset (so that
userspace can use the robustness uapi to test if the gpu reset was
something it cares about, etc)
Which is as good as a reason as I can think of the wake_up_all() on all irqs..
BR,
-R
>
> > return gpu->funcs->irq(gpu);
> > }
> >
> > @@ -871,6 +873,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> >
> > spin_lock_init(&gpu->perf_lock);
> >
> > + init_waitqueue_head(&gpu->event);
> > +
> >
> > /* Map registers: */
> > gpu->mmio = msm_ioremap(pdev, config->ioname, name);
> > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> > index ab8f0f9c9dc8..60562f065dbc 100644
> > --- a/drivers/gpu/drm/msm/msm_gpu.h
> > +++ b/drivers/gpu/drm/msm/msm_gpu.h
> > @@ -104,6 +104,9 @@ struct msm_gpu {
> >
> > struct msm_gem_address_space *aspace;
> >
> > + /* GPU-wide wait queue that is signaled on all IRQs */
> > + wait_queue_head_t event;
> > +
> > /* Power Control: */
> > struct regulator *gpu_reg, *gpu_cx;
> > struct clk_bulk_data *grp_clks;
> > --
> > 2.25.0.rc1.283.g88dfdc4193-goog
> >
> > _______________________________________________
> > Freedreno mailing list
> > [email protected]
> > https://lists.freedesktop.org/mailman/listinfo/freedreno
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
>
> > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > >
> > > Implements an ioctl to wait until a value at a given iova is greater
> > > than or equal to a supplied value.
> > >
> > > This will initially be used by turnip (open-source Vulkan driver for
> > > QC in mesa) for occlusion queries where the userspace driver can
> > > block on a query becoming available before continuing via
> > > vkGetQueryPoolResults.
> > >
> > > Signed-off-by: Brian Ho <[email protected]>
> > > ---
> > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > b/drivers/gpu/drm/msm/msm_drv.c
> > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > @@ -36,10 +36,11 @@
> > > * MSM_GEM_INFO ioctl.
> > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > * GEM object's debug name
> > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > */
> > > #define MSM_VERSION_MAJOR 1
> > > -#define MSM_VERSION_MINOR 5
> > > +#define MSM_VERSION_MINOR 6
> > > #define MSM_VERSION_PATCHLEVEL 0
> > >
> > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > drm_device *dev, void *data,
> > > return msm_submitqueue_remove(file->driver_priv, id);
> > > }
> > >
> > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > + struct drm_file *file)
> > > +{
> > > + struct msm_drm_private *priv = dev->dev_private;
> > > + struct drm_gem_object *obj;
> > > + struct drm_msm_wait_iova *args = data;
> > > + ktime_t timeout = to_ktime(args->timeout);
> > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > + struct msm_gpu *gpu = priv->gpu;
> > > + void *base_vaddr;
> > > + uint64_t *vaddr;
> > > + int ret;
> > > +
> > > + if (args->pad)
> > > + return -EINVAL;
> > > +
> > > + if (!gpu)
> > > + return 0;
> >
> > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> >
> > > +
> > > + obj = drm_gem_object_lookup(file, args->handle);
> > > + if (!obj)
> > > + return -ENOENT;
> > > +
> > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > + if (IS_ERR(base_vaddr)) {
> > > + ret = PTR_ERR(base_vaddr);
> > > + goto err_put_gem_object;
> > > + }
> > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > + ret = -EINVAL;
> > > + goto err_put_vaddr;
> > > + }
> > > +
> > > + vaddr = base_vaddr + args->offset;
> > > +
> > > + /* Assumes WC mapping */
> > > + ret = wait_event_interruptible_timeout(
> > > + gpu->event, *vaddr >= args->value,
> > remaining_jiffies);
> >
>
> This needs to do the awkward looking
>
> (int64_t)(*data - value) >= 0
>
> to properly handle the wraparound case.
>
I think this comparison will run into issues if we allow for 64-bit
reference values. For example, if value is ULLONG_MAX, and *data
starts at 0 on the first comparison, we'll immediately return.
It's not too much of an issue in fence_completed (msm_fence.c), but
in this ioctl, *data can grow at an arbitrary rate. Are we concerned
about this?
> > +
> > > + if (ret == 0) {
> > > + ret = -ETIMEDOUT;
> > > + goto err_put_vaddr;
> > > + } else if (ret == -ERESTARTSYS) {
> > > + goto err_put_vaddr;
> > > + }
> >
> > maybe:
> >
> > } else {
> > ret = 0;
> > }
> >
> > and then drop the next three lines?
> >
> > > +
> > > + msm_gem_put_vaddr(obj);
> > > + drm_gem_object_put_unlocked(obj);
> > > + return 0;
> > > +
> > > +err_put_vaddr:
> > > + msm_gem_put_vaddr(obj);
> > > +err_put_gem_object:
> > > + drm_gem_object_put_unlocked(obj);
> > > + return ret;
> > > +}
> > > +
> > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > DRM_RENDER_ALLOW),
> > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > DRM_RENDER_ALLOW),
> > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > DRM_RENDER_ALLOW),
> > > };
> > >
> > > static const struct vm_operations_struct vm_ops = {
> > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > --- a/include/uapi/drm/msm_drm.h
> > > +++ b/include/uapi/drm/msm_drm.h
> > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > __u32 pad;
> > > };
> > >
> > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > or
> > > + * equal to the reference value.
> > > + */
> > > +struct drm_msm_wait_iova {
> > > + __u32 handle; /* in, GEM handle */
> > > + __u32 pad;
> > > + struct drm_msm_timespec timeout; /* in */
> > > + __u64 offset; /* offset into bo */
> > > + __u64 value; /* reference value */
> >
> > Maybe we should go ahead and add a __u64 mask;
> >
> > that would let us wait for 32b values as well, and wait for bits in a
> > bitmask
> >
>
> I think we'd be OK to just default to 32 bit values instead, since most of
> the CP commands that this is intended to work with (CP_EVENT_WRITE,
> CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> slot right after 'handle' but then we'd not have any pad/reserved fields.
> Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> add a 64 bit flag in 'pad' later on?
>
FWIW, the current usage of this in my mesa MR uses a 64 bit value.
There's no super great reason that the available bit is 64 bits and
not 32 bits (I think it made the addressing math a bit simpler), but
I'm fine with whatever you all decide on here.
> >
> > Other than those minor comments, it looks pretty good to me
> >
> > BR,
> > -R
> >
> > > +};
> > > +
> > > #define DRM_MSM_GET_PARAM 0x00
> > > /* placeholder:
> > > #define DRM_MSM_SET_PARAM 0x01
> > > @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> > > #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> > > #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> > > #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> > > +#define DRM_MSM_WAIT_IOVA 0x0D
> > >
> > > #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_MSM_GET_PARAM, struct drm_msm_param)
> > > #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> > > @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> > > #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> > > #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE +
> > DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> > > #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE +
> > DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> > > +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE +
> > DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
> > >
> > > #if defined(__cplusplus)
> > > }
> > > --
> > > 2.25.0.rc1.283.g88dfdc4193-goog
> > >
> >
On Mon, Jan 13, 2020 at 2:55 PM Brian Ho <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> > On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
> >
> > > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > > >
> > > > Implements an ioctl to wait until a value at a given iova is greater
> > > > than or equal to a supplied value.
> > > >
> > > > This will initially be used by turnip (open-source Vulkan driver for
> > > > QC in mesa) for occlusion queries where the userspace driver can
> > > > block on a query becoming available before continuing via
> > > > vkGetQueryPoolResults.
> > > >
> > > > Signed-off-by: Brian Ho <[email protected]>
> > > > ---
> > > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > @@ -36,10 +36,11 @@
> > > > * MSM_GEM_INFO ioctl.
> > > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > > * GEM object's debug name
> > > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > > */
> > > > #define MSM_VERSION_MAJOR 1
> > > > -#define MSM_VERSION_MINOR 5
> > > > +#define MSM_VERSION_MINOR 6
> > > > #define MSM_VERSION_PATCHLEVEL 0
> > > >
> > > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > > drm_device *dev, void *data,
> > > > return msm_submitqueue_remove(file->driver_priv, id);
> > > > }
> > > >
> > > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > > + struct drm_file *file)
> > > > +{
> > > > + struct msm_drm_private *priv = dev->dev_private;
> > > > + struct drm_gem_object *obj;
> > > > + struct drm_msm_wait_iova *args = data;
> > > > + ktime_t timeout = to_ktime(args->timeout);
> > > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > > + struct msm_gpu *gpu = priv->gpu;
> > > > + void *base_vaddr;
> > > > + uint64_t *vaddr;
> > > > + int ret;
> > > > +
> > > > + if (args->pad)
> > > > + return -EINVAL;
> > > > +
> > > > + if (!gpu)
> > > > + return 0;
> > >
> > > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> > >
> > > > +
> > > > + obj = drm_gem_object_lookup(file, args->handle);
> > > > + if (!obj)
> > > > + return -ENOENT;
> > > > +
> > > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > > + if (IS_ERR(base_vaddr)) {
> > > > + ret = PTR_ERR(base_vaddr);
> > > > + goto err_put_gem_object;
> > > > + }
> > > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > > + ret = -EINVAL;
> > > > + goto err_put_vaddr;
> > > > + }
> > > > +
> > > > + vaddr = base_vaddr + args->offset;
> > > > +
> > > > + /* Assumes WC mapping */
> > > > + ret = wait_event_interruptible_timeout(
> > > > + gpu->event, *vaddr >= args->value,
> > > remaining_jiffies);
> > >
> >
> > This needs to do the awkward looking
> >
> > (int64_t)(*data - value) >= 0
> >
> > to properly handle the wraparound case.
> >
>
> I think this comparison will run into issues if we allow for 64-bit
> reference values. For example, if value is ULLONG_MAX, and *data
> starts at 0 on the first comparison, we'll immediately return.
>
> It's not too much of an issue in fence_completed (msm_fence.c), but
> in this ioctl, *data can grow at an arbitrary rate. Are we concerned
> about this?
>
> > > +
> > > > + if (ret == 0) {
> > > > + ret = -ETIMEDOUT;
> > > > + goto err_put_vaddr;
> > > > + } else if (ret == -ERESTARTSYS) {
> > > > + goto err_put_vaddr;
> > > > + }
> > >
> > > maybe:
> > >
> > > } else {
> > > ret = 0;
> > > }
> > >
> > > and then drop the next three lines?
> > >
> > > > +
> > > > + msm_gem_put_vaddr(obj);
> > > > + drm_gem_object_put_unlocked(obj);
> > > > + return 0;
> > > > +
> > > > +err_put_vaddr:
> > > > + msm_gem_put_vaddr(obj);
> > > > +err_put_gem_object:
> > > > + drm_gem_object_put_unlocked(obj);
> > > > + return ret;
> > > > +}
> > > > +
> > > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > > DRM_RENDER_ALLOW),
> > > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > > DRM_RENDER_ALLOW),
> > > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > > DRM_RENDER_ALLOW),
> > > > };
> > > >
> > > > static const struct vm_operations_struct vm_ops = {
> > > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > > --- a/include/uapi/drm/msm_drm.h
> > > > +++ b/include/uapi/drm/msm_drm.h
> > > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > > __u32 pad;
> > > > };
> > > >
> > > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > > or
> > > > + * equal to the reference value.
> > > > + */
> > > > +struct drm_msm_wait_iova {
> > > > + __u32 handle; /* in, GEM handle */
> > > > + __u32 pad;
> > > > + struct drm_msm_timespec timeout; /* in */
> > > > + __u64 offset; /* offset into bo */
> > > > + __u64 value; /* reference value */
> > >
> > > Maybe we should go ahead and add a __u64 mask;
> > >
> > > that would let us wait for 32b values as well, and wait for bits in a
> > > bitmask
> > >
> >
> > I think we'd be OK to just default to 32 bit values instead, since most of
> > the CP commands that this is intended to work with (CP_EVENT_WRITE,
> > CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> > slot right after 'handle' but then we'd not have any pad/reserved fields.
> > Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> > add a 64 bit flag in 'pad' later on?
> >
>
> FWIW, the current usage of this in my mesa MR uses a 64 bit value.
> There's no super great reason that the available bit is 64 bits and
> not 32 bits (I think it made the addressing math a bit simpler), but
> I'm fine with whatever you all decide on here.
>
I assume you are waiting for a fence value written w/ CP_EVENT_WRITE?
Or at least that is what I'd recommend. That would be 32b
BR,
-R
> > >
> > > Other than those minor comments, it looks pretty good to me
> > >
> > > BR,
> > > -R
> > >
> > > > +};
> > > > +
> > > > #define DRM_MSM_GET_PARAM 0x00
> > > > /* placeholder:
> > > > #define DRM_MSM_SET_PARAM 0x01
> > > > @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> > > > #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> > > > #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> > > > #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> > > > +#define DRM_MSM_WAIT_IOVA 0x0D
> > > >
> > > > #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE +
> > > DRM_MSM_GET_PARAM, struct drm_msm_param)
> > > > #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> > > > @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE +
> > > DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE +
> > > DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> > > > +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE +
> > > DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
> > > >
> > > > #if defined(__cplusplus)
> > > > }
> > > > --
> > > > 2.25.0.rc1.283.g88dfdc4193-goog
> > > >
> > >
On Mon, Jan 13, 2020 at 3:17 PM Rob Clark <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 2:55 PM Brian Ho <[email protected]> wrote:
> >
> > On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> > > On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
> > >
> > > > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > > > >
> > > > > Implements an ioctl to wait until a value at a given iova is greater
> > > > > than or equal to a supplied value.
> > > > >
> > > > > This will initially be used by turnip (open-source Vulkan driver for
> > > > > QC in mesa) for occlusion queries where the userspace driver can
> > > > > block on a query becoming available before continuing via
> > > > > vkGetQueryPoolResults.
> > > > >
> > > > > Signed-off-by: Brian Ho <[email protected]>
> > > > > ---
> > > > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > @@ -36,10 +36,11 @@
> > > > > * MSM_GEM_INFO ioctl.
> > > > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > > > * GEM object's debug name
> > > > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > > > */
> > > > > #define MSM_VERSION_MAJOR 1
> > > > > -#define MSM_VERSION_MINOR 5
> > > > > +#define MSM_VERSION_MINOR 6
> > > > > #define MSM_VERSION_PATCHLEVEL 0
> > > > >
> > > > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > > > drm_device *dev, void *data,
> > > > > return msm_submitqueue_remove(file->driver_priv, id);
> > > > > }
> > > > >
> > > > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > > > + struct drm_file *file)
> > > > > +{
> > > > > + struct msm_drm_private *priv = dev->dev_private;
> > > > > + struct drm_gem_object *obj;
> > > > > + struct drm_msm_wait_iova *args = data;
> > > > > + ktime_t timeout = to_ktime(args->timeout);
> > > > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > > > + struct msm_gpu *gpu = priv->gpu;
> > > > > + void *base_vaddr;
> > > > > + uint64_t *vaddr;
> > > > > + int ret;
> > > > > +
> > > > > + if (args->pad)
> > > > > + return -EINVAL;
> > > > > +
> > > > > + if (!gpu)
> > > > > + return 0;
> > > >
> > > > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> > > >
> > > > > +
> > > > > + obj = drm_gem_object_lookup(file, args->handle);
> > > > > + if (!obj)
> > > > > + return -ENOENT;
> > > > > +
> > > > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > > > + if (IS_ERR(base_vaddr)) {
> > > > > + ret = PTR_ERR(base_vaddr);
> > > > > + goto err_put_gem_object;
> > > > > + }
> > > > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > > > + ret = -EINVAL;
> > > > > + goto err_put_vaddr;
> > > > > + }
> > > > > +
> > > > > + vaddr = base_vaddr + args->offset;
> > > > > +
> > > > > + /* Assumes WC mapping */
> > > > > + ret = wait_event_interruptible_timeout(
> > > > > + gpu->event, *vaddr >= args->value,
> > > > remaining_jiffies);
> > > >
> > >
> > > This needs to do the awkward looking
> > >
> > > (int64_t)(*data - value) >= 0
> > >
> > > to properly handle the wraparound case.
> > >
> >
> > I think this comparison will run into issues if we allow for 64-bit
> > reference values. For example, if value is ULLONG_MAX, and *data
> > starts at 0 on the first comparison, we'll immediately return.
The comparison will have to account of the number of bits in the
serial number. The (int64_t) (*data - value) works for 64 bit unsigned
serial numbers, but for 32 bit serials as suggested below, we need to
cast to int32_t. It does work though, in the case where value is
ULLONG_MAX and and *data is 0, 0 - ULLONG_MAX is one, which is
correct. The serial numbers wrap around and the distance is computed
modulo 2^64. See
https://en.wikipedia.org/wiki/Serial_number_arithmetic.
> >
> > It's not too much of an issue in fence_completed (msm_fence.c), but
> > in this ioctl, *data can grow at an arbitrary rate. Are we concerned
> > about this?
> >
> > > > +
> > > > > + if (ret == 0) {
> > > > > + ret = -ETIMEDOUT;
> > > > > + goto err_put_vaddr;
> > > > > + } else if (ret == -ERESTARTSYS) {
> > > > > + goto err_put_vaddr;
> > > > > + }
> > > >
> > > > maybe:
> > > >
> > > > } else {
> > > > ret = 0;
> > > > }
> > > >
> > > > and then drop the next three lines?
> > > >
> > > > > +
> > > > > + msm_gem_put_vaddr(obj);
> > > > > + drm_gem_object_put_unlocked(obj);
> > > > > + return 0;
> > > > > +
> > > > > +err_put_vaddr:
> > > > > + msm_gem_put_vaddr(obj);
> > > > > +err_put_gem_object:
> > > > > + drm_gem_object_put_unlocked(obj);
> > > > > + return ret;
> > > > > +}
> > > > > +
> > > > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > > > DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > > > DRM_RENDER_ALLOW),
> > > > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > > > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > > > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > > > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > > > DRM_RENDER_ALLOW),
> > > > > };
> > > > >
> > > > > static const struct vm_operations_struct vm_ops = {
> > > > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > > > --- a/include/uapi/drm/msm_drm.h
> > > > > +++ b/include/uapi/drm/msm_drm.h
> > > > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > > > __u32 pad;
> > > > > };
> > > > >
> > > > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > > > or
> > > > > + * equal to the reference value.
> > > > > + */
> > > > > +struct drm_msm_wait_iova {
> > > > > + __u32 handle; /* in, GEM handle */
> > > > > + __u32 pad;
> > > > > + struct drm_msm_timespec timeout; /* in */
> > > > > + __u64 offset; /* offset into bo */
> > > > > + __u64 value; /* reference value */
> > > >
> > > > Maybe we should go ahead and add a __u64 mask;
> > > >
> > > > that would let us wait for 32b values as well, and wait for bits in a
> > > > bitmask
> > > >
> > >
> > > I think we'd be OK to just default to 32 bit values instead, since most of
> > > the CP commands that this is intended to work with (CP_EVENT_WRITE,
> > > CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> > > slot right after 'handle' but then we'd not have any pad/reserved fields.
> > > Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> > > add a 64 bit flag in 'pad' later on?
> > >
> >
> > FWIW, the current usage of this in my mesa MR uses a 64 bit value.
> > There's no super great reason that the available bit is 64 bits and
> > not 32 bits (I think it made the addressing math a bit simpler), but
> > I'm fine with whatever you all decide on here.
> >
>
> I assume you are waiting for a fence value written w/ CP_EVENT_WRITE?
> Or at least that is what I'd recommend. That would be 32b
>
> BR,
> -R
>
> > > >
> > > > Other than those minor comments, it looks pretty good to me
> > > >
> > > > BR,
> > > > -R
> > > >
> > > > > +};
> > > > > +
> > > > > #define DRM_MSM_GET_PARAM 0x00
> > > > > /* placeholder:
> > > > > #define DRM_MSM_SET_PARAM 0x01
> > > > > @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> > > > > #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> > > > > #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> > > > > #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> > > > > +#define DRM_MSM_WAIT_IOVA 0x0D
> > > > >
> > > > > #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_GET_PARAM, struct drm_msm_param)
> > > > > #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> > > > > @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> > > > > +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
> > > > >
> > > > > #if defined(__cplusplus)
> > > > > }
> > > > > --
> > > > > 2.25.0.rc1.283.g88dfdc4193-goog
> > > > >
> > > >
> _______________________________________________
> Freedreno mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/freedreno
On Mon, Jan 13, 2020 at 03:17:38PM -0800, Rob Clark wrote:
> On Mon, Jan 13, 2020 at 2:55 PM Brian Ho <[email protected]> wrote:
> >
> > On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> > > On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
> > >
> > > > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > > > >
> > > > > Implements an ioctl to wait until a value at a given iova is greater
> > > > > than or equal to a supplied value.
> > > > >
> > > > > This will initially be used by turnip (open-source Vulkan driver for
> > > > > QC in mesa) for occlusion queries where the userspace driver can
> > > > > block on a query becoming available before continuing via
> > > > > vkGetQueryPoolResults.
> > > > >
> > > > > Signed-off-by: Brian Ho <[email protected]>
> > > > > ---
> > > > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > @@ -36,10 +36,11 @@
> > > > > * MSM_GEM_INFO ioctl.
> > > > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > > > * GEM object's debug name
> > > > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > > > */
> > > > > #define MSM_VERSION_MAJOR 1
> > > > > -#define MSM_VERSION_MINOR 5
> > > > > +#define MSM_VERSION_MINOR 6
> > > > > #define MSM_VERSION_PATCHLEVEL 0
> > > > >
> > > > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > > > drm_device *dev, void *data,
> > > > > return msm_submitqueue_remove(file->driver_priv, id);
> > > > > }
> > > > >
> > > > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > > > + struct drm_file *file)
> > > > > +{
> > > > > + struct msm_drm_private *priv = dev->dev_private;
> > > > > + struct drm_gem_object *obj;
> > > > > + struct drm_msm_wait_iova *args = data;
> > > > > + ktime_t timeout = to_ktime(args->timeout);
> > > > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > > > + struct msm_gpu *gpu = priv->gpu;
> > > > > + void *base_vaddr;
> > > > > + uint64_t *vaddr;
> > > > > + int ret;
> > > > > +
> > > > > + if (args->pad)
> > > > > + return -EINVAL;
> > > > > +
> > > > > + if (!gpu)
> > > > > + return 0;
> > > >
> > > > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> > > >
> > > > > +
> > > > > + obj = drm_gem_object_lookup(file, args->handle);
> > > > > + if (!obj)
> > > > > + return -ENOENT;
> > > > > +
> > > > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > > > + if (IS_ERR(base_vaddr)) {
> > > > > + ret = PTR_ERR(base_vaddr);
> > > > > + goto err_put_gem_object;
> > > > > + }
> > > > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > > > + ret = -EINVAL;
> > > > > + goto err_put_vaddr;
> > > > > + }
> > > > > +
> > > > > + vaddr = base_vaddr + args->offset;
> > > > > +
> > > > > + /* Assumes WC mapping */
> > > > > + ret = wait_event_interruptible_timeout(
> > > > > + gpu->event, *vaddr >= args->value,
> > > > remaining_jiffies);
> > > >
> > >
> > > This needs to do the awkward looking
> > >
> > > (int64_t)(*data - value) >= 0
> > >
> > > to properly handle the wraparound case.
> > >
> >
> > I think this comparison will run into issues if we allow for 64-bit
> > reference values. For example, if value is ULLONG_MAX, and *data
> > starts at 0 on the first comparison, we'll immediately return.
> >
> > It's not too much of an issue in fence_completed (msm_fence.c), but
> > in this ioctl, *data can grow at an arbitrary rate. Are we concerned
> > about this?
> >
> > > > +
> > > > > + if (ret == 0) {
> > > > > + ret = -ETIMEDOUT;
> > > > > + goto err_put_vaddr;
> > > > > + } else if (ret == -ERESTARTSYS) {
> > > > > + goto err_put_vaddr;
> > > > > + }
> > > >
> > > > maybe:
> > > >
> > > > } else {
> > > > ret = 0;
> > > > }
> > > >
> > > > and then drop the next three lines?
> > > >
> > > > > +
> > > > > + msm_gem_put_vaddr(obj);
> > > > > + drm_gem_object_put_unlocked(obj);
> > > > > + return 0;
> > > > > +
> > > > > +err_put_vaddr:
> > > > > + msm_gem_put_vaddr(obj);
> > > > > +err_put_gem_object:
> > > > > + drm_gem_object_put_unlocked(obj);
> > > > > + return ret;
> > > > > +}
> > > > > +
> > > > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > > > DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > > > DRM_RENDER_ALLOW),
> > > > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > > > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > > > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > > > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > > > DRM_RENDER_ALLOW),
> > > > > };
> > > > >
> > > > > static const struct vm_operations_struct vm_ops = {
> > > > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > > > --- a/include/uapi/drm/msm_drm.h
> > > > > +++ b/include/uapi/drm/msm_drm.h
> > > > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > > > __u32 pad;
> > > > > };
> > > > >
> > > > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > > > or
> > > > > + * equal to the reference value.
> > > > > + */
> > > > > +struct drm_msm_wait_iova {
> > > > > + __u32 handle; /* in, GEM handle */
> > > > > + __u32 pad;
> > > > > + struct drm_msm_timespec timeout; /* in */
> > > > > + __u64 offset; /* offset into bo */
> > > > > + __u64 value; /* reference value */
> > > >
> > > > Maybe we should go ahead and add a __u64 mask;
> > > >
> > > > that would let us wait for 32b values as well, and wait for bits in a
> > > > bitmask
> > > >
> > >
> > > I think we'd be OK to just default to 32 bit values instead, since most of
> > > the CP commands that this is intended to work with (CP_EVENT_WRITE,
> > > CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> > > slot right after 'handle' but then we'd not have any pad/reserved fields.
> > > Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> > > add a 64 bit flag in 'pad' later on?
> > >
> >
> > FWIW, the current usage of this in my mesa MR uses a 64 bit value.
> > There's no super great reason that the available bit is 64 bits and
> > not 32 bits (I think it made the addressing math a bit simpler), but
> > I'm fine with whatever you all decide on here.
> >
>
> I assume you are waiting for a fence value written w/ CP_EVENT_WRITE?
> Or at least that is what I'd recommend. That would be 32b
>
> BR,
> -R
>
So it's actually a little bit different than that. I allocate a bo
for the occlusion query which has an "availability" bit (0 for
unavailable, 1 for available). When the occlusion query ends, we
write the fragments passed value to the bo via CP_EVENT_WRITE and
then wait for that write to complete before setting the available
bit to 1 via a simple CP_MEM_WRITE [1].
It's that CP_MEM_WRITE that I plan on waiting on with this new iova
ioctl.
[1] https://gitlab.freedesktop.org/mesa/mesa/blob/768106c50a5569796bb6d5e04b5e4d65c1d00ea0/src/freedreno/vulkan/tu_query.c#L529
> > > >
> > > > Other than those minor comments, it looks pretty good to me
> > > >
> > > > BR,
> > > > -R
> > > >
> > > > > +};
> > > > > +
> > > > > #define DRM_MSM_GET_PARAM 0x00
> > > > > /* placeholder:
> > > > > #define DRM_MSM_SET_PARAM 0x01
> > > > > @@ -315,6 +326,7 @@ struct drm_msm_submitqueue_query {
> > > > > #define DRM_MSM_SUBMITQUEUE_NEW 0x0A
> > > > > #define DRM_MSM_SUBMITQUEUE_CLOSE 0x0B
> > > > > #define DRM_MSM_SUBMITQUEUE_QUERY 0x0C
> > > > > +#define DRM_MSM_WAIT_IOVA 0x0D
> > > > >
> > > > > #define DRM_IOCTL_MSM_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_GET_PARAM, struct drm_msm_param)
> > > > > #define DRM_IOCTL_MSM_GEM_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
> > > > > @@ -327,6 +339,7 @@ struct drm_msm_submitqueue_query {
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
> > > > > #define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
> > > > > +#define DRM_IOCTL_MSM_WAIT_IOVA DRM_IOW (DRM_COMMAND_BASE +
> > > > DRM_MSM_WAIT_IOVA, struct drm_msm_wait_iova)
> > > > >
> > > > > #if defined(__cplusplus)
> > > > > }
> > > > > --
> > > > > 2.25.0.rc1.283.g88dfdc4193-goog
> > > > >
> > > >
On Tue, Jan 14, 2020 at 8:40 AM Brian Ho <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 03:17:38PM -0800, Rob Clark wrote:
> > On Mon, Jan 13, 2020 at 2:55 PM Brian Ho <[email protected]> wrote:
> > >
> > > On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> > > > On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
> > > >
> > > > > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > > > > >
> > > > > > Implements an ioctl to wait until a value at a given iova is greater
> > > > > > than or equal to a supplied value.
> > > > > >
> > > > > > This will initially be used by turnip (open-source Vulkan driver for
> > > > > > QC in mesa) for occlusion queries where the userspace driver can
> > > > > > block on a query becoming available before continuing via
> > > > > > vkGetQueryPoolResults.
> > > > > >
> > > > > > Signed-off-by: Brian Ho <[email protected]>
> > > > > > ---
> > > > > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > > > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > > > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > @@ -36,10 +36,11 @@
> > > > > > * MSM_GEM_INFO ioctl.
> > > > > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > > > > * GEM object's debug name
> > > > > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > > > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > > > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > > > > */
> > > > > > #define MSM_VERSION_MAJOR 1
> > > > > > -#define MSM_VERSION_MINOR 5
> > > > > > +#define MSM_VERSION_MINOR 6
> > > > > > #define MSM_VERSION_PATCHLEVEL 0
> > > > > >
> > > > > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > > > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > > > > drm_device *dev, void *data,
> > > > > > return msm_submitqueue_remove(file->driver_priv, id);
> > > > > > }
> > > > > >
> > > > > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > > > > + struct drm_file *file)
> > > > > > +{
> > > > > > + struct msm_drm_private *priv = dev->dev_private;
> > > > > > + struct drm_gem_object *obj;
> > > > > > + struct drm_msm_wait_iova *args = data;
> > > > > > + ktime_t timeout = to_ktime(args->timeout);
> > > > > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > > > > + struct msm_gpu *gpu = priv->gpu;
> > > > > > + void *base_vaddr;
> > > > > > + uint64_t *vaddr;
> > > > > > + int ret;
> > > > > > +
> > > > > > + if (args->pad)
> > > > > > + return -EINVAL;
> > > > > > +
> > > > > > + if (!gpu)
> > > > > > + return 0;
> > > > >
> > > > > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> > > > >
> > > > > > +
> > > > > > + obj = drm_gem_object_lookup(file, args->handle);
> > > > > > + if (!obj)
> > > > > > + return -ENOENT;
> > > > > > +
> > > > > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > > > > + if (IS_ERR(base_vaddr)) {
> > > > > > + ret = PTR_ERR(base_vaddr);
> > > > > > + goto err_put_gem_object;
> > > > > > + }
> > > > > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > > > > + ret = -EINVAL;
> > > > > > + goto err_put_vaddr;
> > > > > > + }
> > > > > > +
> > > > > > + vaddr = base_vaddr + args->offset;
> > > > > > +
> > > > > > + /* Assumes WC mapping */
> > > > > > + ret = wait_event_interruptible_timeout(
> > > > > > + gpu->event, *vaddr >= args->value,
> > > > > remaining_jiffies);
> > > > >
> > > >
> > > > This needs to do the awkward looking
> > > >
> > > > (int64_t)(*data - value) >= 0
> > > >
> > > > to properly handle the wraparound case.
> > > >
> > >
> > > I think this comparison will run into issues if we allow for 64-bit
> > > reference values. For example, if value is ULLONG_MAX, and *data
> > > starts at 0 on the first comparison, we'll immediately return.
> > >
> > > It's not too much of an issue in fence_completed (msm_fence.c), but
> > > in this ioctl, *data can grow at an arbitrary rate. Are we concerned
> > > about this?
> > >
> > > > > +
> > > > > > + if (ret == 0) {
> > > > > > + ret = -ETIMEDOUT;
> > > > > > + goto err_put_vaddr;
> > > > > > + } else if (ret == -ERESTARTSYS) {
> > > > > > + goto err_put_vaddr;
> > > > > > + }
> > > > >
> > > > > maybe:
> > > > >
> > > > > } else {
> > > > > ret = 0;
> > > > > }
> > > > >
> > > > > and then drop the next three lines?
> > > > >
> > > > > > +
> > > > > > + msm_gem_put_vaddr(obj);
> > > > > > + drm_gem_object_put_unlocked(obj);
> > > > > > + return 0;
> > > > > > +
> > > > > > +err_put_vaddr:
> > > > > > + msm_gem_put_vaddr(obj);
> > > > > > +err_put_gem_object:
> > > > > > + drm_gem_object_put_unlocked(obj);
> > > > > > + return ret;
> > > > > > +}
> > > > > > +
> > > > > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > > > > DRM_RENDER_ALLOW),
> > > > > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > > > > DRM_RENDER_ALLOW),
> > > > > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > > > > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > > > > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > > > > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > > > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > > > > DRM_RENDER_ALLOW),
> > > > > > };
> > > > > >
> > > > > > static const struct vm_operations_struct vm_ops = {
> > > > > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > > > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > > > > --- a/include/uapi/drm/msm_drm.h
> > > > > > +++ b/include/uapi/drm/msm_drm.h
> > > > > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > > > > __u32 pad;
> > > > > > };
> > > > > >
> > > > > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > > > > or
> > > > > > + * equal to the reference value.
> > > > > > + */
> > > > > > +struct drm_msm_wait_iova {
> > > > > > + __u32 handle; /* in, GEM handle */
> > > > > > + __u32 pad;
> > > > > > + struct drm_msm_timespec timeout; /* in */
> > > > > > + __u64 offset; /* offset into bo */
> > > > > > + __u64 value; /* reference value */
> > > > >
> > > > > Maybe we should go ahead and add a __u64 mask;
> > > > >
> > > > > that would let us wait for 32b values as well, and wait for bits in a
> > > > > bitmask
> > > > >
> > > >
> > > > I think we'd be OK to just default to 32 bit values instead, since most of
> > > > the CP commands that this is intended to work with (CP_EVENT_WRITE,
> > > > CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> > > > slot right after 'handle' but then we'd not have any pad/reserved fields.
> > > > Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> > > > add a 64 bit flag in 'pad' later on?
> > > >
> > >
> > > FWIW, the current usage of this in my mesa MR uses a 64 bit value.
> > > There's no super great reason that the available bit is 64 bits and
> > > not 32 bits (I think it made the addressing math a bit simpler), but
> > > I'm fine with whatever you all decide on here.
> > >
> >
> > I assume you are waiting for a fence value written w/ CP_EVENT_WRITE?
> > Or at least that is what I'd recommend. That would be 32b
> >
> > BR,
> > -R
> >
>
> So it's actually a little bit different than that. I allocate a bo
> for the occlusion query which has an "availability" bit (0 for
> unavailable, 1 for available). When the occlusion query ends, we
> write the fragments passed value to the bo via CP_EVENT_WRITE and
> then wait for that write to complete before setting the available
> bit to 1 via a simple CP_MEM_WRITE [1].
>
> It's that CP_MEM_WRITE that I plan on waiting on with this new iova
> ioctl.
>
> [1] https://gitlab.freedesktop.org/mesa/mesa/blob/768106c50a5569796bb6d5e04b5e4d65c1d00ea0/src/freedreno/vulkan/tu_query.c#L529
>
hmm, interesting.. I had in mind something more like:
https://gitlab.freedesktop.org/drm/msm/blob/msm-next/drivers/gpu/drm/msm/adreno/a6xx_gpu.c#L137
The high bit in the first dword of the packet (which we probably
shouldn't open-code) is the "give me an irq after the value in last
dword is written to memory"..
(I haven't checked yet whether we can use the "gimme an irq" bit from userspace)
BR,
-R
On Mon, Jan 13, 2020 at 9:51 AM Jordan Crouse <[email protected]> wrote:
>
> On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> > +
> > + vaddr = base_vaddr + args->offset;
> > +
> > + /* Assumes WC mapping */
> > + ret = wait_event_interruptible_timeout(
> > + gpu->event, *vaddr >= args->value, remaining_jiffies);
>
> I feel like a barrier might be needed before checking *vaddr just in case you
> get the interrupt and wake up the queue before the write posts from the
> hardware.
>
if the gpu is doing posted (or cached) writes, I don't think there is
even a CPU side barrier primitive that could wait for that? I think
we rely on the GPU not interrupting the CPU until the write is posted
BR,
-R
On Tue, Jan 14, 2020 at 08:52:43AM -0800, Rob Clark wrote:
> On Mon, Jan 13, 2020 at 9:51 AM Jordan Crouse <[email protected]> wrote:
> >
> > On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> > > +
> > > + vaddr = base_vaddr + args->offset;
> > > +
> > > + /* Assumes WC mapping */
> > > + ret = wait_event_interruptible_timeout(
> > > + gpu->event, *vaddr >= args->value, remaining_jiffies);
> >
> > I feel like a barrier might be needed before checking *vaddr just in case you
> > get the interrupt and wake up the queue before the write posts from the
> > hardware.
> >
>
> if the gpu is doing posted (or cached) writes, I don't think there is
> even a CPU side barrier primitive that could wait for that? I think
> we rely on the GPU not interrupting the CPU until the write is posted
Once the GPU puts the write on the bus then it is up to the whims of the CPU
architecture. If the writes are being done out of order you run a chance of
firing the interrupt and making it all the way to your handler before the writes
catch up.
Since you are scheduling and doing a bunch of things in between you probably
don't need to worry but if you start missing events and you don't know why then
this could be why. A rmb() would give you piece of mind at the cost of being
Yet Another Barrier (TM).
Jordan
> BR,
> -R
> _______________________________________________
> Freedreno mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
On Tue, Jan 14, 2020 at 9:23 AM Jordan Crouse <[email protected]> wrote:
>
> On Tue, Jan 14, 2020 at 08:52:43AM -0800, Rob Clark wrote:
> > On Mon, Jan 13, 2020 at 9:51 AM Jordan Crouse <[email protected]> wrote:
> > >
> > > On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> > > > +
> > > > + vaddr = base_vaddr + args->offset;
> > > > +
> > > > + /* Assumes WC mapping */
> > > > + ret = wait_event_interruptible_timeout(
> > > > + gpu->event, *vaddr >= args->value, remaining_jiffies);
> > >
> > > I feel like a barrier might be needed before checking *vaddr just in case you
> > > get the interrupt and wake up the queue before the write posts from the
> > > hardware.
> > >
> >
> > if the gpu is doing posted (or cached) writes, I don't think there is
> > even a CPU side barrier primitive that could wait for that? I think
> > we rely on the GPU not interrupting the CPU until the write is posted
>
> Once the GPU puts the write on the bus then it is up to the whims of the CPU
> architecture. If the writes are being done out of order you run a chance of
> firing the interrupt and making it all the way to your handler before the writes
> catch up.
>
> Since you are scheduling and doing a bunch of things in between you probably
> don't need to worry but if you start missing events and you don't know why then
> this could be why. A rmb() would give you piece of mind at the cost of being
> Yet Another Barrier (TM).
Doesn't the fence case do the same thing without a barrier?
> Jordan
>
> > BR,
> > -R
> > _______________________________________________
> > Freedreno mailing list
> > [email protected]
> > https://lists.freedesktop.org/mailman/listinfo/freedreno
>
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project
On Tue, Jan 14, 2020 at 08:48:48AM -0800, Rob Clark wrote:
> On Tue, Jan 14, 2020 at 8:40 AM Brian Ho <[email protected]> wrote:
> >
> > On Mon, Jan 13, 2020 at 03:17:38PM -0800, Rob Clark wrote:
> > > On Mon, Jan 13, 2020 at 2:55 PM Brian Ho <[email protected]> wrote:
> > > >
> > > > On Mon, Jan 13, 2020 at 09:57:43AM -0800, Kristian Kristensen wrote:
> > > > > On Mon, Jan 13, 2020 at 8:25 AM Rob Clark <[email protected]> wrote:
> > > > >
> > > > > > On Mon, Jan 13, 2020 at 7:37 AM Brian Ho <[email protected]> wrote:
> > > > > > >
> > > > > > > Implements an ioctl to wait until a value at a given iova is greater
> > > > > > > than or equal to a supplied value.
> > > > > > >
> > > > > > > This will initially be used by turnip (open-source Vulkan driver for
> > > > > > > QC in mesa) for occlusion queries where the userspace driver can
> > > > > > > block on a query becoming available before continuing via
> > > > > > > vkGetQueryPoolResults.
> > > > > > >
> > > > > > > Signed-off-by: Brian Ho <[email protected]>
> > > > > > > ---
> > > > > > > drivers/gpu/drm/msm/msm_drv.c | 63 +++++++++++++++++++++++++++++++++--
> > > > > > > include/uapi/drm/msm_drm.h | 13 ++++++++
> > > > > > > 2 files changed, 74 insertions(+), 2 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > index c84f0a8b3f2c..dcc46874a5a2 100644
> > > > > > > --- a/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > +++ b/drivers/gpu/drm/msm/msm_drv.c
> > > > > > > @@ -36,10 +36,11 @@
> > > > > > > * MSM_GEM_INFO ioctl.
> > > > > > > * - 1.4.0 - softpin, MSM_RELOC_BO_DUMP, and GEM_INFO support to set/get
> > > > > > > * GEM object's debug name
> > > > > > > - * - 1.5.0 - Add SUBMITQUERY_QUERY ioctl
> > > > > > > + * - 1.5.0 - Add SUBMITQUEUE_QUERY ioctl
> > > > > > > + * - 1.6.0 - Add WAIT_IOVA ioctl
> > > > > > > */
> > > > > > > #define MSM_VERSION_MAJOR 1
> > > > > > > -#define MSM_VERSION_MINOR 5
> > > > > > > +#define MSM_VERSION_MINOR 6
> > > > > > > #define MSM_VERSION_PATCHLEVEL 0
> > > > > > >
> > > > > > > static const struct drm_mode_config_funcs mode_config_funcs = {
> > > > > > > @@ -952,6 +953,63 @@ static int msm_ioctl_submitqueue_close(struct
> > > > > > drm_device *dev, void *data,
> > > > > > > return msm_submitqueue_remove(file->driver_priv, id);
> > > > > > > }
> > > > > > >
> > > > > > > +static int msm_ioctl_wait_iova(struct drm_device *dev, void *data,
> > > > > > > + struct drm_file *file)
> > > > > > > +{
> > > > > > > + struct msm_drm_private *priv = dev->dev_private;
> > > > > > > + struct drm_gem_object *obj;
> > > > > > > + struct drm_msm_wait_iova *args = data;
> > > > > > > + ktime_t timeout = to_ktime(args->timeout);
> > > > > > > + unsigned long remaining_jiffies = timeout_to_jiffies(&timeout);
> > > > > > > + struct msm_gpu *gpu = priv->gpu;
> > > > > > > + void *base_vaddr;
> > > > > > > + uint64_t *vaddr;
> > > > > > > + int ret;
> > > > > > > +
> > > > > > > + if (args->pad)
> > > > > > > + return -EINVAL;
> > > > > > > +
> > > > > > > + if (!gpu)
> > > > > > > + return 0;
> > > > > >
> > > > > > hmm, I'm not sure we should return zero in this case.. maybe -ENODEV?
> > > > > >
> > > > > > > +
> > > > > > > + obj = drm_gem_object_lookup(file, args->handle);
> > > > > > > + if (!obj)
> > > > > > > + return -ENOENT;
> > > > > > > +
> > > > > > > + base_vaddr = msm_gem_get_vaddr(obj);
> > > > > > > + if (IS_ERR(base_vaddr)) {
> > > > > > > + ret = PTR_ERR(base_vaddr);
> > > > > > > + goto err_put_gem_object;
> > > > > > > + }
> > > > > > > + if (args->offset + sizeof(*vaddr) > obj->size) {
> > > > > > > + ret = -EINVAL;
> > > > > > > + goto err_put_vaddr;
> > > > > > > + }
> > > > > > > +
> > > > > > > + vaddr = base_vaddr + args->offset;
> > > > > > > +
> > > > > > > + /* Assumes WC mapping */
> > > > > > > + ret = wait_event_interruptible_timeout(
> > > > > > > + gpu->event, *vaddr >= args->value,
> > > > > > remaining_jiffies);
> > > > > >
> > > > >
> > > > > This needs to do the awkward looking
> > > > >
> > > > > (int64_t)(*data - value) >= 0
> > > > >
> > > > > to properly handle the wraparound case.
> > > > >
> > > >
> > > > I think this comparison will run into issues if we allow for 64-bit
> > > > reference values. For example, if value is ULLONG_MAX, and *data
> > > > starts at 0 on the first comparison, we'll immediately return.
> > > >
> > > > It's not too much of an issue in fence_completed (msm_fence.c), but
> > > > in this ioctl, *data can grow at an arbitrary rate. Are we concerned
> > > > about this?
> > > >
> > > > > > +
> > > > > > > + if (ret == 0) {
> > > > > > > + ret = -ETIMEDOUT;
> > > > > > > + goto err_put_vaddr;
> > > > > > > + } else if (ret == -ERESTARTSYS) {
> > > > > > > + goto err_put_vaddr;
> > > > > > > + }
> > > > > >
> > > > > > maybe:
> > > > > >
> > > > > > } else {
> > > > > > ret = 0;
> > > > > > }
> > > > > >
> > > > > > and then drop the next three lines?
> > > > > >
> > > > > > > +
> > > > > > > + msm_gem_put_vaddr(obj);
> > > > > > > + drm_gem_object_put_unlocked(obj);
> > > > > > > + return 0;
> > > > > > > +
> > > > > > > +err_put_vaddr:
> > > > > > > + msm_gem_put_vaddr(obj);
> > > > > > > +err_put_gem_object:
> > > > > > > + drm_gem_object_put_unlocked(obj);
> > > > > > > + return ret;
> > > > > > > +}
> > > > > > > +
> > > > > > > static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > > > DRM_IOCTL_DEF_DRV(MSM_GET_PARAM, msm_ioctl_get_param,
> > > > > > DRM_RENDER_ALLOW),
> > > > > > > DRM_IOCTL_DEF_DRV(MSM_GEM_NEW, msm_ioctl_gem_new,
> > > > > > DRM_RENDER_ALLOW),
> > > > > > > @@ -964,6 +1022,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
> > > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,
> > > > > > msm_ioctl_submitqueue_new, DRM_RENDER_ALLOW),
> > > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE,
> > > > > > msm_ioctl_submitqueue_close, DRM_RENDER_ALLOW),
> > > > > > > DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY,
> > > > > > msm_ioctl_submitqueue_query, DRM_RENDER_ALLOW),
> > > > > > > + DRM_IOCTL_DEF_DRV(MSM_WAIT_IOVA, msm_ioctl_wait_iova,
> > > > > > DRM_RENDER_ALLOW),
> > > > > > > };
> > > > > > >
> > > > > > > static const struct vm_operations_struct vm_ops = {
> > > > > > > diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
> > > > > > > index 0b85ed6a3710..8477f28a4ee1 100644
> > > > > > > --- a/include/uapi/drm/msm_drm.h
> > > > > > > +++ b/include/uapi/drm/msm_drm.h
> > > > > > > @@ -298,6 +298,17 @@ struct drm_msm_submitqueue_query {
> > > > > > > __u32 pad;
> > > > > > > };
> > > > > > >
> > > > > > > +/* This ioctl blocks until the u64 value at bo + offset is greater than
> > > > > > or
> > > > > > > + * equal to the reference value.
> > > > > > > + */
> > > > > > > +struct drm_msm_wait_iova {
> > > > > > > + __u32 handle; /* in, GEM handle */
> > > > > > > + __u32 pad;
> > > > > > > + struct drm_msm_timespec timeout; /* in */
> > > > > > > + __u64 offset; /* offset into bo */
> > > > > > > + __u64 value; /* reference value */
> > > > > >
> > > > > > Maybe we should go ahead and add a __u64 mask;
> > > > > >
> > > > > > that would let us wait for 32b values as well, and wait for bits in a
> > > > > > bitmask
> > > > > >
> > > > >
> > > > > I think we'd be OK to just default to 32 bit values instead, since most of
> > > > > the CP commands that this is intended to work with (CP_EVENT_WRITE,
> > > > > CP_WAIT_MEM_GTE etc) operate on 32 bit values. We could move 'value' to the
> > > > > slot right after 'handle' but then we'd not have any pad/reserved fields.
> > > > > Maybe we keep 'value' 64 bit but restrict it to 32 bits, with an option to
> > > > > add a 64 bit flag in 'pad' later on?
> > > > >
> > > >
> > > > FWIW, the current usage of this in my mesa MR uses a 64 bit value.
> > > > There's no super great reason that the available bit is 64 bits and
> > > > not 32 bits (I think it made the addressing math a bit simpler), but
> > > > I'm fine with whatever you all decide on here.
> > > >
> > >
> > > I assume you are waiting for a fence value written w/ CP_EVENT_WRITE?
> > > Or at least that is what I'd recommend. That would be 32b
> > >
> > > BR,
> > > -R
> > >
> >
> > So it's actually a little bit different than that. I allocate a bo
> > for the occlusion query which has an "availability" bit (0 for
> > unavailable, 1 for available). When the occlusion query ends, we
> > write the fragments passed value to the bo via CP_EVENT_WRITE and
> > then wait for that write to complete before setting the available
> > bit to 1 via a simple CP_MEM_WRITE [1].
> >
> > It's that CP_MEM_WRITE that I plan on waiting on with this new iova
> > ioctl.
> >
> > [1] https://gitlab.freedesktop.org/mesa/mesa/blob/768106c50a5569796bb6d5e04b5e4d65c1d00ea0/src/freedreno/vulkan/tu_query.c#L529
> >
>
> hmm, interesting.. I had in mind something more like:
>
> https://gitlab.freedesktop.org/drm/msm/blob/msm-next/drivers/gpu/drm/msm/adreno/a6xx_gpu.c#L137
>
> The high bit in the first dword of the packet (which we probably
> shouldn't open-code) is the "give me an irq after the value in last
> dword is written to memory"..
>
> (I haven't checked yet whether we can use the "gimme an irq" bit from userspace)
>
> BR,
> -R
I see. Let's continue discussing this on the mesa MR because there's
more context there.
Regardless of how we end up implementing vkCmdEndQuery, I think it
will be safe to default to 32 bit values for this ioctl and have a
flag to use 64 bit instead like Kristian suggested. If that sounds
good to you, I'll go ahead and make the change for the v2 patch
series.
On Tue, Jan 14, 2020 at 09:30:00AM -0800, Kristian Kristensen wrote:
> On Tue, Jan 14, 2020 at 9:23 AM Jordan Crouse <[email protected]> wrote:
> >
> > On Tue, Jan 14, 2020 at 08:52:43AM -0800, Rob Clark wrote:
> > > On Mon, Jan 13, 2020 at 9:51 AM Jordan Crouse <[email protected]> wrote:
> > > >
> > > > On Mon, Jan 13, 2020 at 10:36:05AM -0500, Brian Ho wrote:
> > > > > +
> > > > > + vaddr = base_vaddr + args->offset;
> > > > > +
> > > > > + /* Assumes WC mapping */
> > > > > + ret = wait_event_interruptible_timeout(
> > > > > + gpu->event, *vaddr >= args->value, remaining_jiffies);
> > > >
> > > > I feel like a barrier might be needed before checking *vaddr just in case you
> > > > get the interrupt and wake up the queue before the write posts from the
> > > > hardware.
> > > >
> > >
> > > if the gpu is doing posted (or cached) writes, I don't think there is
> > > even a CPU side barrier primitive that could wait for that? I think
> > > we rely on the GPU not interrupting the CPU until the write is posted
> >
> > Once the GPU puts the write on the bus then it is up to the whims of the CPU
> > architecture. If the writes are being done out of order you run a chance of
> > firing the interrupt and making it all the way to your handler before the writes
> > catch up.
> >
> > Since you are scheduling and doing a bunch of things in between you probably
> > don't need to worry but if you start missing events and you don't know why then
> > this could be why. A rmb() would give you piece of mind at the cost of being
> > Yet Another Barrier (TM).
>
> Doesn't the fence case do the same thing without a barrier?
We get a free __iormb() and dma_rmb() from every gpu_read() so I think that
is enough to catch everything up when the interrupt handler reads the status
register and in most cases we don't check the fence until after that. I'm not
sure that covers all the possible error cases. Maybe something to look into?
Also that field is marked as volatile in the struct, but I'm not sure that does
anything useful.
Jordan
> > > BR,
> > > -R
> > > _______________________________________________
> > > Freedreno mailing list
> > > [email protected]
> > > https://lists.freedesktop.org/mailman/listinfo/freedreno
> >
> > --
> > The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> > a Linux Foundation Collaborative Project
> _______________________________________________
> Freedreno mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/freedreno
--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project