[PATCH 6/6] drm/gud: Use the shadow plane helper

Noralf Trønnes noralf at tronnes.org
Wed Nov 23 12:37:38 UTC 2022



Den 23.11.2022 11.38, skrev Thomas Zimmermann:
> Hi
> 
> Am 22.11.22 um 21:58 schrieb Noralf Trønnes via B4 Submission Endpoint:
>> From: Noralf Trønnes <noralf at tronnes.org>
>>
>> Use the shadow plane helper to take care of preparing the framebuffer for
>> CPU access. The synchronous flushing is now done inline without the
>> use of
>> a worker. The async path now uses a shadow buffer to hold framebuffer
>> changes and it doesn't read the framebuffer behind userspace's back
>> anymore.
>>
>> Signed-off-by: Noralf Trønnes <noralf at tronnes.org>
>> ---
>>   drivers/gpu/drm/gud/gud_drv.c      |  1 +
>>   drivers/gpu/drm/gud/gud_internal.h |  1 +
>>   drivers/gpu/drm/gud/gud_pipe.c     | 69
>> ++++++++++++++++++++++++--------------
>>   3 files changed, 46 insertions(+), 25 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/gud/gud_drv.c
>> b/drivers/gpu/drm/gud/gud_drv.c
>> index d57dab104358..5aac7cda0505 100644
>> --- a/drivers/gpu/drm/gud/gud_drv.c
>> +++ b/drivers/gpu/drm/gud/gud_drv.c
>> @@ -365,6 +365,7 @@ static void gud_debugfs_init(struct drm_minor *minor)
>>   static const struct drm_simple_display_pipe_funcs gud_pipe_funcs = {
>>       .check      = gud_pipe_check,
>>       .update        = gud_pipe_update,
>> +    DRM_GEM_SIMPLE_DISPLAY_PIPE_SHADOW_PLANE_FUNCS
>>   };
>>     static const struct drm_mode_config_funcs gud_mode_config_funcs = {
>> diff --git a/drivers/gpu/drm/gud/gud_internal.h
>> b/drivers/gpu/drm/gud/gud_internal.h
>> index e351a1f1420d..0d148a6f27aa 100644
>> --- a/drivers/gpu/drm/gud/gud_internal.h
>> +++ b/drivers/gpu/drm/gud/gud_internal.h
>> @@ -43,6 +43,7 @@ struct gud_device {
>>       struct drm_framebuffer *fb;
>>       struct drm_rect damage;
>>       bool prev_flush_failed;
>> +    void *shadow_buf;
>>   };
>>     static inline struct gud_device *to_gud_device(struct drm_device
>> *drm)
>> diff --git a/drivers/gpu/drm/gud/gud_pipe.c
>> b/drivers/gpu/drm/gud/gud_pipe.c
>> index dfada6eedc58..7686325f7ee7 100644
>> --- a/drivers/gpu/drm/gud/gud_pipe.c
>> +++ b/drivers/gpu/drm/gud/gud_pipe.c
>> @@ -358,10 +358,10 @@ static void gud_flush_damage(struct gud_device
>> *gdrm, struct drm_framebuffer *fb
>>   void gud_flush_work(struct work_struct *work)
>>   {
>>       struct gud_device *gdrm = container_of(work, struct gud_device,
>> work);
>> -    struct iosys_map gem_map = { }, fb_map = { };
>> +    struct iosys_map shadow_map;
>>       struct drm_framebuffer *fb;
>>       struct drm_rect damage;
>> -    int idx, ret;
>> +    int idx;
>>         if (!drm_dev_enter(&gdrm->drm, &idx))
>>           return;
>> @@ -369,6 +369,7 @@ void gud_flush_work(struct work_struct *work)
>>       mutex_lock(&gdrm->damage_lock);
>>       fb = gdrm->fb;
>>       gdrm->fb = NULL;
>> +    iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf);
>>       damage = gdrm->damage;
>>       gud_clear_damage(gdrm);
>>       mutex_unlock(&gdrm->damage_lock);
>> @@ -376,33 +377,33 @@ void gud_flush_work(struct work_struct *work)
>>       if (!fb)
>>           goto out;
>>   -    ret = drm_gem_fb_vmap(fb, &gem_map, &fb_map);
>> -    if (ret)
>> -        goto fb_put;
>> +    gud_flush_damage(gdrm, fb, &shadow_map, true, &damage);
>>   -    ret = drm_gem_fb_begin_cpu_access(fb, DMA_FROM_DEVICE);
>> -    if (ret)
>> -        goto vunmap;
>> -
>> -    /* Imported buffers are assumed to be WriteCombined with uncached
>> reads */
>> -    gud_flush_damage(gdrm, fb, &fb_map, !fb->obj[0]->import_attach,
>> &damage);
>> -
>> -    drm_gem_fb_end_cpu_access(fb, DMA_FROM_DEVICE);
>> -vunmap:
>> -    drm_gem_fb_vunmap(fb, &gem_map);
>> -fb_put:
>>       drm_framebuffer_put(fb);
>>   out:
>>       drm_dev_exit(idx);
>>   }
>>   -static void gud_fb_queue_damage(struct gud_device *gdrm, struct
>> drm_framebuffer *fb,
>> -                struct drm_rect *damage)
>> +static int gud_fb_queue_damage(struct gud_device *gdrm, struct
>> drm_framebuffer *fb,
>> +                   const struct iosys_map *map, struct drm_rect *damage)
>>   {
>>       struct drm_framebuffer *old_fb = NULL;
>> +    struct iosys_map shadow_map;
>>         mutex_lock(&gdrm->damage_lock);
>>   +    if (!gdrm->shadow_buf) {
>> +        gdrm->shadow_buf = vzalloc(fb->pitches[0] * fb->height);
>> +        if (!gdrm->shadow_buf) {
>> +            mutex_unlock(&gdrm->damage_lock);
>> +            return -ENOMEM;
>> +        }
>> +    }
>> +
>> +    iosys_map_set_vaddr(&shadow_map, gdrm->shadow_buf);
>> +    iosys_map_incr(&shadow_map, drm_fb_clip_offset(fb->pitches[0],
>> fb->format, damage));
>> +    drm_fb_memcpy(&shadow_map, fb->pitches, map, fb, damage);
>> +
> 
> It's all good (sans the begin_cpu_access issue) in terms of code, but
> the memcpy here can have quite an impact on performace.
> 

Yes if the gud display is the only one in the rendering loop it's not
good. This is on an old HP laptop running the async path (RGB565 over USB):

$ modetest -M gud -s 36:1920x1080 -v
setting mode 1920x1080-60.00Hz on connectors 36, crtc 33
freq: 414.56Hz
freq: 439.71Hz

This is the normal path:

$ modetest -M gud -s 36:1920x1080 -v
setting mode 1920x1080-60.00Hz on connectors 36, crtc 33
freq: 18.11Hz
freq: 17.98Hz

> So why not just nuke the async code entirely? It's a userspace problem
> and we don't have many(/any?) other drivers with such a workaround. Udl,
> our other usb-based driver, works well enough without.
> 

I want to do it gradually so I did consider switching the default to
synchronous flushing in this patchset. I think maybe I should just do
that since you say it "works well enough".

I took the async idea from tiny/gm12u320 and Hans explained the need for
the async worker. He's well versed with userspace so I did the same. But
that is 3 years ago and things change. My long term plan is to drop the
async path, but I want to keep it until I know there are no problems.

I've seen some reports about the udl driver having horrible performance,
but I haven't seen any explanations as to why. It could be because GNOME
until recently[1] didn't provide damage info to the kernel.

The GNOME rendering loop issue was solved in 3.38[2]

Noralf.

[1] https://gitlab.gnome.org/GNOME/mutter/-/merge_requests/1879
[2]
https://blogs.gnome.org/shell-dev/2020/07/02/splitting-up-the-frame-clock/

> Best regards
> Thomas
> 
>>       if (fb != gdrm->fb) {
>>           old_fb = gdrm->fb;
>>           drm_framebuffer_get(fb);
>> @@ -420,6 +421,26 @@ static void gud_fb_queue_damage(struct gud_device
>> *gdrm, struct drm_framebuffer
>>         if (old_fb)
>>           drm_framebuffer_put(old_fb);
>> +
>> +    return 0;
>> +}
>> +
>> +static void gud_fb_handle_damage(struct gud_device *gdrm, struct
>> drm_framebuffer *fb,
>> +                 const struct iosys_map *map, struct drm_rect *damage)
>> +{
>> +    int ret;
>> +
>> +    if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE)
>> +        drm_rect_init(damage, 0, 0, fb->width, fb->height);
>> +
>> +    if (gud_async_flush) {
>> +        ret = gud_fb_queue_damage(gdrm, fb, map, damage);
>> +        if (ret != -ENOMEM)
>> +            return;
>> +    }
>> +
>> +    /* Imported buffers are assumed to be WriteCombined with uncached
>> reads */
>> +    gud_flush_damage(gdrm, fb, map, !fb->obj[0]->import_attach, damage);
>>   }
>>     int gud_pipe_check(struct drm_simple_display_pipe *pipe,
>> @@ -544,6 +565,7 @@ void gud_pipe_update(struct
>> drm_simple_display_pipe *pipe,
>>       struct drm_device *drm = pipe->crtc.dev;
>>       struct gud_device *gdrm = to_gud_device(drm);
>>       struct drm_plane_state *state = pipe->plane.state;
>> +    struct drm_shadow_plane_state *shadow_plane_state =
>> to_drm_shadow_plane_state(state);
>>       struct drm_framebuffer *fb = state->fb;
>>       struct drm_crtc *crtc = &pipe->crtc;
>>       struct drm_rect damage;
>> @@ -557,6 +579,8 @@ void gud_pipe_update(struct
>> drm_simple_display_pipe *pipe,
>>               gdrm->fb = NULL;
>>           }
>>           gud_clear_damage(gdrm);
>> +        vfree(gdrm->shadow_buf);
>> +        gdrm->shadow_buf = NULL;
>>           mutex_unlock(&gdrm->damage_lock);
>>       }
>>   @@ -572,13 +596,8 @@ void gud_pipe_update(struct
>> drm_simple_display_pipe *pipe,
>>       if (crtc->state->active_changed)
>>           gud_usb_set_u8(gdrm, GUD_REQ_SET_DISPLAY_ENABLE,
>> crtc->state->active);
>>   -    if (drm_atomic_helper_damage_merged(old_state, state, &damage)) {
>> -        if (gdrm->flags & GUD_DISPLAY_FLAG_FULL_UPDATE)
>> -            drm_rect_init(&damage, 0, 0, fb->width, fb->height);
>> -        gud_fb_queue_damage(gdrm, fb, &damage);
>> -        if (!gud_async_flush)
>> -            flush_work(&gdrm->work);
>> -    }
>> +    if (drm_atomic_helper_damage_merged(old_state, state, &damage))
>> +        gud_fb_handle_damage(gdrm, fb, &shadow_plane_state->data[0],
>> &damage);
>>         if (!crtc->state->enable)
>>           gud_usb_set_u8(gdrm, GUD_REQ_SET_CONTROLLER_ENABLE, 0);
>>
> 


More information about the dri-devel mailing list