[Mesa-dev] [PATCH 7/7] i965: Implement EGL_KHR_mutable_render_buffer

Thu Aug 9 09:48:28 UTC 2018

Reviewed-by: Tapani Pälli <tapani.palli at intel.com>

On 07/31/2018 09:18 PM, Chad Versace wrote:
> Tested with a low-latency handwriting application on Android Nougat on
> the Chrome OS Pixelbook (codename Eve) with Kabylake.
> ---
>   src/mesa/drivers/dri/i965/brw_context.c  | 86 +++++++++++++++++++++++-
>   src/mesa/drivers/dri/i965/brw_context.h  | 12 ++++
>   src/mesa/drivers/dri/i965/intel_screen.c | 13 +++-
>   3 files changed, 107 insertions(+), 4 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 968fc1d43d6..9dfd9520555 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -239,6 +239,35 @@ intel_flush_front(struct gl_context *ctx)
>      }
>   }
>   
> +static void
> +brw_display_shared_buffer(struct brw_context *brw)
> +{
> +   __DRIcontext *dri_context = brw->driContext;
> +   __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
> +   __DRIscreen *dri_screen = brw->screen->driScrnPriv;
> +   int fence_fd = -1;
> +
> +   if (!brw->is_shared_buffer_bound)
> +      return;
> +
> +   if (!brw->is_shared_buffer_dirty)
> +      return;
> +
> +   if (brw->screen->has_exec_fence) {
> +      /* This function is always called during a flush operation, so there is
> +       * no need to flush again here. But we want to provide a fence_fd to the
> +       * loader, and a redundant flush is the easiest way to acquire one.
> +       */
> +      if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
> +         return;
> +   }
> +
> +   dri_screen->mutableRenderBuffer.loader
> +      ->displaySharedBuffer(dri_drawable, fence_fd,
> +                            dri_drawable->loaderPrivate);
> +   brw->is_shared_buffer_dirty = false;
> +}
> +
>   static void
>   intel_glFlush(struct gl_context *ctx)
>   {
> @@ -246,7 +275,7 @@ intel_glFlush(struct gl_context *ctx)
>   
>      intel_batchbuffer_flush(brw);
>      intel_flush_front(ctx);
> -
> +   brw_display_shared_buffer(brw);
>      brw->need_flush_throttle = true;
>   }
>   
> @@ -1457,6 +1486,11 @@ intel_prepare_render(struct brw_context *brw)
>       */
>      if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
>         brw->front_buffer_dirty = true;
> +
> +   if (brw->is_shared_buffer_bound) {
> +      /* Subsequent rendering will probably dirty the shared buffer. */
> +      brw->is_shared_buffer_dirty = true;
> +   }
>   }
>   
>   /**
> @@ -1690,8 +1724,12 @@ intel_update_image_buffer(struct brw_context *intel,
>      else
>         last_mt = rb->singlesample_mt;
>   
> -   if (last_mt && last_mt->bo == buffer->bo)
> +   if (last_mt && last_mt->bo == buffer->bo) {
> +      if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
> +         intel_miptree_make_shareable(intel, last_mt);
> +      }
>         return;
> +   }
>   
>      /* Only allow internal compression if samples == 0.  For multisampled
>       * window system buffers, the only thing the single-sampled buffer is used
> @@ -1720,6 +1758,35 @@ intel_update_image_buffer(struct brw_context *intel,
>          rb->Base.Base.NumSamples > 1) {
>         intel_renderbuffer_upsample(intel, rb);
>      }
> +
> +   if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
> +      /* The compositor and the application may access this image
> +       * concurrently. The display hardware may even scanout the image while
> +       * the GPU is rendering to it.  Aux surfaces cause difficulty with
> +       * concurrent access, so permanently disable aux for this miptree.
> +       *
> +       * Perhaps we could improve overall application performance by
> +       * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
> +       * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
> +       * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
> +       * approach to be highly dependent on the application's GL usage.
> +       *
> +       * I [chadv] expect clever disabling/reenabling to be counterproductive
> +       * in the use cases I care about: applications that render nearly
> +       * realtime handwriting to the surface while possibly undergiong
> +       * simultaneously scanout as a display plane. The app requires low
> +       * render latency. Even though the app spends most of its time in
> +       * shared-buffer mode, it also frequently transitions between
> +       * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
> +       * mode.  Visual sutter during the transitions should be avoided.
> +       *
> +       * In this case, I [chadv] believe reducing the GPU workload at
> +       * shared-buffer/double-buffer transitions would offer a smoother app
> +       * experience than any savings due to aux compression. But I've
> +       * collected no data to prove my theory.
> +       */
> +      intel_miptree_make_shareable(intel, mt);
> +   }
>   }
>   
>   static void
> @@ -1780,4 +1847,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
>                                   images.back,
>                                   __DRI_IMAGE_BUFFER_BACK);
>      }
> +
> +   if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
> +      assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
> +      drawable->w = images.back->width;
> +      drawable->h = images.back->height;
> +      intel_update_image_buffer(brw,
> +                                drawable,
> +                                back_rb,
> +                                images.back,
> +                                __DRI_IMAGE_BUFFER_SHARED);
> +      brw->is_shared_buffer_bound = true;
> +   } else {
> +      brw->is_shared_buffer_bound = false;
> +      brw->is_shared_buffer_dirty = false;
> +   }
>   }
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 72be8f2a4d0..1ff1896cb42 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -791,6 +791,18 @@ struct brw_context
>       */
>      bool front_buffer_dirty;
>   
> +   /**
> +    * True if the __DRIdrawable's current __DRIimageBufferMask is
> +    * __DRI_IMAGE_BUFFER_SHARED.
> +    */
> +   bool is_shared_buffer_bound;
> +
> +   /**
> +    * True if a shared buffer is bound and it has received any rendering since
> +    * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer().
> +    */
> +   bool is_shared_buffer_dirty;
> +
>      /** Framerate throttling: @{ */
>      struct brw_bo *throttle_batch[2];
>   
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
> index f1c195c5d14..b117928f18e 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1588,12 +1588,17 @@ static const __DRI2blobExtension intelBlobExtension = {
>      .set_cache_funcs = brw_set_cache_funcs
>   };
>   
> +static const __DRImutableRenderBufferDriverExtension intelMutableRenderBufferExtension = {
> +   .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 },
> +};
> +
>   static const __DRIextension *screenExtensions[] = {
>       &intelTexBufferExtension.base,
>       &intelFenceExtension.base,
>       &intelFlushExtension.base,
>       &intelImageExtension.base,
>       &intelRendererQueryExtension.base,
> +    &intelMutableRenderBufferExtension.base,
>       &dri2ConfigQueryExtension.base,
>       &dri2NoErrorExtension.base,
>       &intelBlobExtension.base,
> @@ -1606,6 +1611,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
>       &intelFlushExtension.base,
>       &intelImageExtension.base,
>       &intelRendererQueryExtension.base,
> +    &intelMutableRenderBufferExtension.base,
>       &dri2ConfigQueryExtension.base,
>       &dri2Robustness.base,
>       &dri2NoErrorExtension.base,
> @@ -2159,7 +2165,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
>      bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
>                                                 "allow_rgb10_configs");
>   
> -   /* Generate singlesample configs without accumulation buffer. */
> +   /* Generate singlesample configs, each without accumulation buffer
> +    * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
> +    */
>      for (unsigned i = 0; i < num_formats; i++) {
>         __DRIconfig **new_configs;
>         int num_depth_stencil_bits = 2;
> @@ -2195,7 +2203,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
>                                        num_depth_stencil_bits,
>                                        back_buffer_modes, 2,
>                                        singlesample_samples, 1,
> -                                     false, false, false);
> +                                     false, false,
> +                                     /*mutable_render_buffer*/ true);
>         configs = driConcatConfigs(configs, new_configs);
>      }
>   
>