2017-07-07 05:49:11

by Dawid Kurek

[permalink] [raw]
Subject: [PATCH] drm/udl: Make page_flip asynchronous

In page_flip vblank is sent with no delay. Driver does not know when the
actual update is present on the display and has no means for getting
this information from a device. It is practically impossible to say
exactly *when* as there is also i.e. a usb delay.

When we are unable to determine when the vblank actually happens we may
assume it will behave accordingly, i.e. it will present frames with
proper timing. In the worst case scenario it should take up to duration
of one frame (we may get new frame in the device just after presenting
current one so we would need to wait for the whole frame).

Because of the asynchronous nature of the delay we need to synchronize:
* read/write vrefresh/page_flip data when changing mode and
preparing/executing vblank
* USB requests to prevent interleaved access to URBs for two different
frame buffers

All those changes are backports from ChromeOS:
1. https://chromium-review.googlesource.com/250622
2. https://chromium-review.googlesource.com/249450
partially, only change in udl_modeset.c for 'udl_flip_queue'
3. https://chromium-review.googlesource.com/321378
4. https://chromium-review.googlesource.com/324119
+ fixes for checkpatch and latest drm changes

Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Signed-off-by: Dawid Kurek <[email protected]>
---
drivers/gpu/drm/udl/udl_drv.h | 4 +
drivers/gpu/drm/udl/udl_fb.c | 28 ++++---
drivers/gpu/drm/udl/udl_main.c | 3 +
drivers/gpu/drm/udl/udl_modeset.c | 160 ++++++++++++++++++++++++++++++++++----
4 files changed, 171 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 2a75ab8..b93fb8d 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -47,6 +47,7 @@ struct urb_list {
};

struct udl_fbdev;
+struct udl_flip_queue;

struct udl_device {
struct device *dev;
@@ -66,6 +67,9 @@ struct udl_device {
atomic_t bytes_identical; /* saved effort with backbuffer comparison */
atomic_t bytes_sent; /* to usb, after compression including overhead */
atomic_t cpu_kcycles_used; /* transpired during pixel processing */
+
+ struct udl_flip_queue *flip_queue;
+ struct mutex transfer_lock; /* to serialize transfers */
};

struct udl_gem_object {
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 4a65003..6dd9a49 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -82,7 +82,7 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
{
struct drm_device *dev = fb->base.dev;
struct udl_device *udl = dev->dev_private;
- int i, ret;
+ int i, ret = 0;
char *cmd;
cycles_t start_cycles, end_cycles;
int bytes_sent = 0;
@@ -91,18 +91,22 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
int aligned_x;
int bpp = fb->base.format->cpp[0];

+ mutex_lock(&udl->transfer_lock);
+
if (!fb->active_16)
- return 0;
+ goto out;

if (!fb->obj->vmapping) {
ret = udl_gem_vmap(fb->obj);
if (ret == -ENOMEM) {
DRM_ERROR("failed to vmap fb\n");
- return 0;
+ ret = 0;
+ goto out;
}
if (!fb->obj->vmapping) {
DRM_ERROR("failed to vmapping\n");
- return 0;
+ ret = 0;
+ goto out;
}
}

@@ -112,14 +116,18 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,

if ((width <= 0) ||
(x + width > fb->base.width) ||
- (y + height > fb->base.height))
- return -EINVAL;
+ (y + height > fb->base.height)) {
+ ret = -EINVAL;
+ goto out;
+ }

start_cycles = get_cycles();

urb = udl_get_urb(dev);
- if (!urb)
- return 0;
+ if (!urb) {
+ ret = 0;
+ goto out;
+ }
cmd = urb->transfer_buffer;

for (i = y; i < y + height ; i++) {
@@ -151,7 +159,9 @@ int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
>> 10)), /* Kcycles */
&udl->cpu_kcycles_used);

- return 0;
+out:
+ mutex_unlock(&udl->transfer_lock);
+ return ret;
}

static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index a9d93b8..d376233 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -319,6 +319,8 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags)
if (!udl)
return -ENOMEM;

+ mutex_init(&udl->transfer_lock);
+
udl->udev = udev;
udl->ddev = dev;
dev->dev_private = udl;
@@ -378,5 +380,6 @@ void udl_driver_unload(struct drm_device *dev)

udl_fbdev_cleanup(dev);
udl_modeset_cleanup(dev);
+ mutex_destroy(&udl->transfer_lock);
kfree(udl);
}
diff --git a/drivers/gpu/drm/udl/udl_modeset.c b/drivers/gpu/drm/udl/udl_modeset.c
index 5bcae76..9a0c84d 100644
--- a/drivers/gpu/drm/udl/udl_modeset.c
+++ b/drivers/gpu/drm/udl/udl_modeset.c
@@ -235,15 +235,21 @@ static int udl_crtc_write_mode_to_hw(struct drm_crtc *crtc)
char *buf;
int retval;

+ mutex_lock(&udl->transfer_lock);
+
urb = udl_get_urb(dev);
- if (!urb)
+ if (!urb) {
+ mutex_unlock(&udl->transfer_lock);
return -ENOMEM;
+ }

buf = (char *)urb->transfer_buffer;

memcpy(buf, udl->mode_buf, udl->mode_buf_len);
retval = udl_submit_urb(dev, urb, udl->mode_buf_len);
DRM_INFO("write mode info %d\n", udl->mode_buf_len);
+ mutex_unlock(&udl->transfer_lock);
+
return retval;
}

@@ -257,9 +263,14 @@ static void udl_crtc_dpms(struct drm_crtc *crtc, int mode)
if (mode == DRM_MODE_DPMS_OFF) {
char *buf;
struct urb *urb;
+
+ mutex_lock(&udl->transfer_lock);
+
urb = udl_get_urb(dev);
- if (!urb)
+ if (!urb) {
+ mutex_unlock(&udl->transfer_lock);
return;
+ }

buf = (char *)urb->transfer_buffer;
buf = udl_vidreg_lock(buf);
@@ -269,6 +280,8 @@ static void udl_crtc_dpms(struct drm_crtc *crtc, int mode)
buf = udl_dummy_render(buf);
retval = udl_submit_urb(dev, urb, buf - (char *)
urb->transfer_buffer);
+
+ mutex_unlock(&udl->transfer_lock);
} else {
if (udl->mode_buf_len == 0) {
DRM_ERROR("Trying to enable DPMS with no mode\n");
@@ -295,6 +308,16 @@ udl_pipe_set_base(struct drm_crtc *crtc, int x, int y,
}
#endif

+struct udl_flip_queue {
+ struct mutex lock;
+ struct workqueue_struct *wq;
+ struct delayed_work work;
+ struct drm_crtc *crtc;
+ struct drm_pending_vblank_event *event;
+ u64 flip_time; /*in jiffies */
+ u64 vblank_interval; /*in jiffies */
+};
+
static int udl_crtc_mode_set(struct drm_crtc *crtc,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode,
@@ -305,6 +328,7 @@ static int udl_crtc_mode_set(struct drm_crtc *crtc,
struct drm_device *dev = crtc->dev;
struct udl_framebuffer *ufb = to_udl_fb(crtc->primary->fb);
struct udl_device *udl = dev->dev_private;
+ struct udl_flip_queue *flip_queue = udl->flip_queue;
char *buf;
char *wrptr;
int color_depth = 0;
@@ -336,11 +360,20 @@ static int udl_crtc_mode_set(struct drm_crtc *crtc,

if (old_fb) {
struct udl_framebuffer *uold_fb = to_udl_fb(old_fb);
+
uold_fb->active_16 = false;
}
ufb->active_16 = true;
udl->mode_buf_len = wrptr - buf;

+ if (flip_queue) {
+ int vrefresh = drm_mode_vrefresh(mode);
+
+ mutex_lock(&flip_queue->lock);
+ flip_queue->vblank_interval = HZ / vrefresh;
+ mutex_unlock(&flip_queue->lock);
+ }
+
/* damage all of it */
udl_handle_damage(ufb, 0, 0, ufb->base.width, ufb->base.height);
return 0;
@@ -358,30 +391,91 @@ static void udl_crtc_destroy(struct drm_crtc *crtc)
kfree(crtc);
}

+static void udl_sched_page_flip(struct work_struct *work)
+{
+ struct delayed_work *delayed_work
+ = container_of(work, struct delayed_work, work);
+ struct udl_flip_queue *flip_queue =
+ container_of(delayed_work, struct udl_flip_queue, work);
+ struct drm_crtc *crtc;
+ struct drm_device *dev;
+ struct drm_pending_vblank_event *event;
+ struct drm_framebuffer *fb;
+
+ if (!flip_queue)
+ return;
+
+ mutex_lock(&flip_queue->lock);
+ crtc = flip_queue->crtc;
+ dev = crtc->dev;
+ fb = crtc->primary->fb;
+ event = flip_queue->event;
+ flip_queue->event = NULL;
+ mutex_unlock(&flip_queue->lock);
+
+ if (fb)
+ udl_handle_damage(to_udl_fb(fb), 0, 0, fb->width, fb->height);
+ if (event) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+}
+
static int udl_crtc_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t page_flip_flags,
struct drm_modeset_acquire_ctx *ctx)
{
- struct udl_framebuffer *ufb = to_udl_fb(fb);
struct drm_device *dev = crtc->dev;
- unsigned long flags;
+ struct udl_device *udl = dev->dev_private;
+ struct udl_flip_queue *flip_queue = udl->flip_queue;

- struct drm_framebuffer *old_fb = crtc->primary->fb;
- if (old_fb) {
- struct udl_framebuffer *uold_fb = to_udl_fb(old_fb);
- uold_fb->active_16 = false;
+ if (!flip_queue || !flip_queue->wq) {
+ DRM_ERROR("Uninitialized page flip queue\n");
+ return -ENOMEM;
}
- ufb->active_16 = true;

- udl_handle_damage(ufb, 0, 0, fb->width, fb->height);
+ mutex_lock(&flip_queue->lock);

- spin_lock_irqsave(&dev->event_lock, flags);
- if (event)
- drm_crtc_send_vblank_event(crtc, event);
- spin_unlock_irqrestore(&dev->event_lock, flags);
- crtc->primary->fb = fb;
+ flip_queue->crtc = crtc;
+ if (fb) {
+ struct udl_framebuffer *ufb = to_udl_fb(fb);
+ struct drm_framebuffer *old_fb;
+
+ old_fb = crtc->primary->fb;
+ if (old_fb) {
+ struct udl_framebuffer *uold_fb = to_udl_fb(old_fb);
+
+ uold_fb->active_16 = false;
+ }
+ ufb->active_16 = true;
+ crtc->primary->fb = fb;
+ }
+ if (event) {
+ if (flip_queue->event) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, flip_queue->event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ }
+ flip_queue->event = event;
+ }
+ if (!delayed_work_pending(&flip_queue->work)) {
+ u64 now = jiffies;
+ u64 next_flip =
+ flip_queue->flip_time + flip_queue->vblank_interval;
+
+ flip_queue->flip_time = (next_flip < now) ? now : next_flip;
+ queue_delayed_work(flip_queue->wq, &flip_queue->work,
+ flip_queue->flip_time - now);
+ }
+
+ mutex_unlock(&flip_queue->lock);

return 0;
}
@@ -423,6 +517,39 @@ static int udl_crtc_init(struct drm_device *dev)
return 0;
}

+static void udl_flip_workqueue_init(struct drm_device *dev)
+{
+ struct udl_device *udl = dev->dev_private;
+ struct udl_flip_queue *flip_queue;
+
+ flip_queue = kzalloc(sizeof(*flip_queue), GFP_KERNEL);
+ if (WARN_ON(!flip_queue))
+ return;
+
+ mutex_init(&flip_queue->lock);
+ flip_queue->wq = create_singlethread_workqueue("flip");
+ WARN_ON(!flip_queue->wq);
+ INIT_DELAYED_WORK(&flip_queue->work, udl_sched_page_flip);
+ flip_queue->flip_time = jiffies;
+ flip_queue->vblank_interval = HZ / 60;
+ udl->flip_queue = flip_queue;
+}
+
+static void udl_flip_workqueue_cleanup(struct drm_device *dev)
+{
+ struct udl_device *udl = dev->dev_private;
+ struct udl_flip_queue *flip_queue = udl->flip_queue;
+
+ if (!flip_queue)
+ return;
+ if (flip_queue->wq) {
+ flush_workqueue(flip_queue->wq);
+ destroy_workqueue(flip_queue->wq);
+ }
+ mutex_destroy(&flip_queue->lock);
+ kfree(flip_queue);
+}
+
static const struct drm_mode_config_funcs udl_mode_funcs = {
.fb_create = udl_fb_user_fb_create,
.output_poll_changed = NULL,
@@ -450,6 +577,8 @@ int udl_modeset_init(struct drm_device *dev)

udl_connector_init(dev, encoder);

+ udl_flip_workqueue_init(dev);
+
return 0;
}

@@ -467,5 +596,6 @@ void udl_modeset_restore(struct drm_device *dev)

void udl_modeset_cleanup(struct drm_device *dev)
{
+ udl_flip_workqueue_cleanup(dev);
drm_mode_config_cleanup(dev);
}
--
2.7.4



2017-07-11 06:59:01

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH] drm/udl: Make page_flip asynchronous

On Fri, Jul 7, 2017 at 7:48 AM, Dawid Kurek <[email protected]> wrote:
> In page_flip vblank is sent with no delay. Driver does not know when the
> actual update is present on the display and has no means for getting
> this information from a device. It is practically impossible to say
> exactly *when* as there is also i.e. a usb delay.
>
> When we are unable to determine when the vblank actually happens we may
> assume it will behave accordingly, i.e. it will present frames with
> proper timing. In the worst case scenario it should take up to duration
> of one frame (we may get new frame in the device just after presenting
> current one so we would need to wait for the whole frame).
>
> Because of the asynchronous nature of the delay we need to synchronize:
> * read/write vrefresh/page_flip data when changing mode and
> preparing/executing vblank
> * USB requests to prevent interleaved access to URBs for two different
> frame buffers
>
> All those changes are backports from ChromeOS:
> 1. https://chromium-review.googlesource.com/250622
> 2. https://chromium-review.googlesource.com/249450
> partially, only change in udl_modeset.c for 'udl_flip_queue'
> 3. https://chromium-review.googlesource.com/321378
> 4. https://chromium-review.googlesource.com/324119
> + fixes for checkpatch and latest drm changes
>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Dawid Kurek <[email protected]>

Can't we roll this driver over to the atomic helpers instead? There
you get nonblocking pretty much for free ... I'm not sure extending
the old modeset code has all that much benefit really.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

2017-07-13 16:26:32

by Stéphane Marchesin

[permalink] [raw]
Subject: Re: [PATCH] drm/udl: Make page_flip asynchronous

On Mon, Jul 10, 2017 at 11:58 PM, Daniel Vetter <[email protected]> wrote:
> On Fri, Jul 7, 2017 at 7:48 AM, Dawid Kurek <[email protected]> wrote:
>> In page_flip vblank is sent with no delay. Driver does not know when the
>> actual update is present on the display and has no means for getting
>> this information from a device. It is practically impossible to say
>> exactly *when* as there is also i.e. a usb delay.
>>
>> When we are unable to determine when the vblank actually happens we may
>> assume it will behave accordingly, i.e. it will present frames with
>> proper timing. In the worst case scenario it should take up to duration
>> of one frame (we may get new frame in the device just after presenting
>> current one so we would need to wait for the whole frame).
>>
>> Because of the asynchronous nature of the delay we need to synchronize:
>> * read/write vrefresh/page_flip data when changing mode and
>> preparing/executing vblank
>> * USB requests to prevent interleaved access to URBs for two different
>> frame buffers
>>
>> All those changes are backports from ChromeOS:
>> 1. https://chromium-review.googlesource.com/250622
>> 2. https://chromium-review.googlesource.com/249450
>> partially, only change in udl_modeset.c for 'udl_flip_queue'
>> 3. https://chromium-review.googlesource.com/321378
>> 4. https://chromium-review.googlesource.com/324119
>> + fixes for checkpatch and latest drm changes
>>
>> Cc: [email protected]
>> Cc: [email protected]
>> Cc: [email protected]
>> Cc: [email protected]
>> Signed-off-by: Dawid Kurek <[email protected]>
>
> Can't we roll this driver over to the atomic helpers instead? There
> you get nonblocking pretty much for free ... I'm not sure extending
> the old modeset code has all that much benefit really.

This code certainly has value by itself; it makes the driver more
efficient. I think the best can sometimes be the enemy of the good --
this code is here and written, but I don't think any of us is going to
tackle atomic for udl.

Stéphane


> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> _______________________________________________
> dri-devel mailing list
> [email protected]
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

2017-07-13 17:10:59

by Daniel Vetter

[permalink] [raw]
Subject: Re: [PATCH] drm/udl: Make page_flip asynchronous

On Thu, Jul 13, 2017 at 6:25 PM, Stéphane Marchesin
<[email protected]> wrote:
>> Can't we roll this driver over to the atomic helpers instead? There
>> you get nonblocking pretty much for free ... I'm not sure extending
>> the old modeset code has all that much benefit really.
>
> This code certainly has value by itself; it makes the driver more
> efficient. I think the best can sometimes be the enemy of the good --
> this code is here and written, but I don't think any of us is going to
> tackle atomic for udl.

Sure, I guess I should have clarified this with "If you want me to
review and merge this, then pls look into atomic, since that seems
actually beneficial for my own interest". I'm not paid by intel to
review driver patches at random, but to keep overall drm in nice
shape. Moving drivers to atomic and using more shared infrastructure
is in that interest, reviewing random driver patches isn't. Sorry for
not making clear, I kinda have that as my implicit context.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

2017-07-20 09:01:10

by Michal Lukaszek

[permalink] [raw]
Subject: RE: [PATCH] drm/udl: Make page_flip asynchronous

On Thursday, July 13, 2017 7:11 PM, Daniel Vetter wrote:

>>> Can't we roll this driver over to the atomic helpers instead? There
>>> you get nonblocking pretty much for free ... I'm not sure extending
>>> the old modeset code has all that much benefit really.

>> This code certainly has value by itself; it makes the driver more
>> efficient. I think the best can sometimes be the enemy of the good --
>> this code is here and written, but I don't think any of us is going to
>> tackle atomic for udl.

> Sure, I guess I should have clarified this with "If you want me to
> review and merge this, then pls look into atomic, since that seems
> actually beneficial for my own interest". I'm not paid by intel to
> review driver patches at random, but to keep overall drm in nice
> shape. Moving drivers to atomic and using more shared infrastructure
> is in that interest, reviewing random driver patches isn't.

So, what's the next step here then? This code makes udl work better, without investing a lot of effort to move everything to atomics.

Btw, this is already causing modesetting not to support page flips (while Chrome OS, having this code uses page flips with udl):
https://cgit.freedesktop.org/xorg/xserver/tree/hw/xfree86/drivers/modesetting/driver.c#n1198

Regards,
Michal Lukaszek

2017-07-25 11:46:29

by Chris Wilson

[permalink] [raw]
Subject: Re: [PATCH] drm/udl: Make page_flip asynchronous

Quoting Dawid Kurek (2017-07-07 06:48:49)
> In page_flip vblank is sent with no delay. Driver does not know when the
> actual update is present on the display and has no means for getting
> this information from a device. It is practically impossible to say
> exactly *when* as there is also i.e. a usb delay.
>
> When we are unable to determine when the vblank actually happens we may
> assume it will behave accordingly, i.e. it will present frames with
> proper timing. In the worst case scenario it should take up to duration
> of one frame (we may get new frame in the device just after presenting
> current one so we would need to wait for the whole frame).
>
> Because of the asynchronous nature of the delay we need to synchronize:
> * read/write vrefresh/page_flip data when changing mode and
> preparing/executing vblank
> * USB requests to prevent interleaved access to URBs for two different
> frame buffers
>
> All those changes are backports from ChromeOS:
> 1. https://chromium-review.googlesource.com/250622
> 2. https://chromium-review.googlesource.com/249450
> partially, only change in udl_modeset.c for 'udl_flip_queue'
> 3. https://chromium-review.googlesource.com/321378
> 4. https://chromium-review.googlesource.com/324119
> + fixes for checkpatch and latest drm changes
>
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Cc: [email protected]
> Signed-off-by: Dawid Kurek <[email protected]>

> +struct udl_flip_queue {
> + struct mutex lock;
> + struct workqueue_struct *wq;
> + struct delayed_work work;
> + struct drm_crtc *crtc;
> + struct drm_pending_vblank_event *event;
> + u64 flip_time; /*in jiffies */
> + u64 vblank_interval; /*in jiffies */

jiffies are unsigned long. That avoids the complication of the reader
having to consider whether all the divides are 32bit safe.

mallocing this struct seems a little overkill as opposed to embedding it
tino the udl_device, flip_queue.wq provides the safety check after
initialisation.

I couldn't spot anything drastically wrong, certainly it looks complete,
I would have structured it such that everything went through the same
worker with a hrtimer emulating the vblank. That model is then but a
stone's throw away from atomic.

Reviewed-by: Chris Wilson <[email protected]>
-Chris