[Mesa-dev] [PATCH 7/7] i965: Implement EGL_KHR_mutable_render_buffer
Tapani Pälli
tapani.palli at intel.com
Thu Aug 9 09:48:28 UTC 2018
Reviewed-by: Tapani Pälli <tapani.palli at intel.com>
On 07/31/2018 09:18 PM, Chad Versace wrote:
> Tested with a low-latency handwriting application on Android Nougat on
> the Chrome OS Pixelbook (codename Eve) with Kabylake.
> ---
> src/mesa/drivers/dri/i965/brw_context.c | 86 +++++++++++++++++++++++-
> src/mesa/drivers/dri/i965/brw_context.h | 12 ++++
> src/mesa/drivers/dri/i965/intel_screen.c | 13 +++-
> 3 files changed, 107 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 968fc1d43d6..9dfd9520555 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -239,6 +239,35 @@ intel_flush_front(struct gl_context *ctx)
> }
> }
>
> +static void
> +brw_display_shared_buffer(struct brw_context *brw)
> +{
> + __DRIcontext *dri_context = brw->driContext;
> + __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
> + __DRIscreen *dri_screen = brw->screen->driScrnPriv;
> + int fence_fd = -1;
> +
> + if (!brw->is_shared_buffer_bound)
> + return;
> +
> + if (!brw->is_shared_buffer_dirty)
> + return;
> +
> + if (brw->screen->has_exec_fence) {
> + /* This function is always called during a flush operation, so there is
> + * no need to flush again here. But we want to provide a fence_fd to the
> + * loader, and a redundant flush is the easiest way to acquire one.
> + */
> + if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
> + return;
> + }
> +
> + dri_screen->mutableRenderBuffer.loader
> + ->displaySharedBuffer(dri_drawable, fence_fd,
> + dri_drawable->loaderPrivate);
> + brw->is_shared_buffer_dirty = false;
> +}
> +
> static void
> intel_glFlush(struct gl_context *ctx)
> {
> @@ -246,7 +275,7 @@ intel_glFlush(struct gl_context *ctx)
>
> intel_batchbuffer_flush(brw);
> intel_flush_front(ctx);
> -
> + brw_display_shared_buffer(brw);
> brw->need_flush_throttle = true;
> }
>
> @@ -1457,6 +1486,11 @@ intel_prepare_render(struct brw_context *brw)
> */
> if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
> brw->front_buffer_dirty = true;
> +
> + if (brw->is_shared_buffer_bound) {
> + /* Subsequent rendering will probably dirty the shared buffer. */
> + brw->is_shared_buffer_dirty = true;
> + }
> }
>
> /**
> @@ -1690,8 +1724,12 @@ intel_update_image_buffer(struct brw_context *intel,
> else
> last_mt = rb->singlesample_mt;
>
> - if (last_mt && last_mt->bo == buffer->bo)
> + if (last_mt && last_mt->bo == buffer->bo) {
> + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
> + intel_miptree_make_shareable(intel, last_mt);
> + }
> return;
> + }
>
> /* Only allow internal compression if samples == 0. For multisampled
> * window system buffers, the only thing the single-sampled buffer is used
> @@ -1720,6 +1758,35 @@ intel_update_image_buffer(struct brw_context *intel,
> rb->Base.Base.NumSamples > 1) {
> intel_renderbuffer_upsample(intel, rb);
> }
> +
> + if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
> + /* The compositor and the application may access this image
> + * concurrently. The display hardware may even scanout the image while
> + * the GPU is rendering to it. Aux surfaces cause difficulty with
> + * concurrent access, so permanently disable aux for this miptree.
> + *
> + * Perhaps we could improve overall application performance by
> + * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
> + * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
> + * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
> + * approach to be highly dependent on the application's GL usage.
> + *
> + * I [chadv] expect clever disabling/reenabling to be counterproductive
> + * in the use cases I care about: applications that render nearly
> + * realtime handwriting to the surface while possibly undergiong
> + * simultaneously scanout as a display plane. The app requires low
> + * render latency. Even though the app spends most of its time in
> + * shared-buffer mode, it also frequently transitions between
> + * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
> + * mode. Visual sutter during the transitions should be avoided.
> + *
> + * In this case, I [chadv] believe reducing the GPU workload at
> + * shared-buffer/double-buffer transitions would offer a smoother app
> + * experience than any savings due to aux compression. But I've
> + * collected no data to prove my theory.
> + */
> + intel_miptree_make_shareable(intel, mt);
> + }
> }
>
> static void
> @@ -1780,4 +1847,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
> images.back,
> __DRI_IMAGE_BUFFER_BACK);
> }
> +
> + if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
> + assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
> + drawable->w = images.back->width;
> + drawable->h = images.back->height;
> + intel_update_image_buffer(brw,
> + drawable,
> + back_rb,
> + images.back,
> + __DRI_IMAGE_BUFFER_SHARED);
> + brw->is_shared_buffer_bound = true;
> + } else {
> + brw->is_shared_buffer_bound = false;
> + brw->is_shared_buffer_dirty = false;
> + }
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 72be8f2a4d0..1ff1896cb42 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -791,6 +791,18 @@ struct brw_context
> */
> bool front_buffer_dirty;
>
> + /**
> + * True if the __DRIdrawable's current __DRIimageBufferMask is
> + * __DRI_IMAGE_BUFFER_SHARED.
> + */
> + bool is_shared_buffer_bound;
> +
> + /**
> + * True if a shared buffer is bound and it has received any rendering since
> + * the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer().
> + */
> + bool is_shared_buffer_dirty;
> +
> /** Framerate throttling: @{ */
> struct brw_bo *throttle_batch[2];
>
> diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
> index f1c195c5d14..b117928f18e 100644
> --- a/src/mesa/drivers/dri/i965/intel_screen.c
> +++ b/src/mesa/drivers/dri/i965/intel_screen.c
> @@ -1588,12 +1588,17 @@ static const __DRI2blobExtension intelBlobExtension = {
> .set_cache_funcs = brw_set_cache_funcs
> };
>
> +static const __DRImutableRenderBufferDriverExtension intelMutableRenderBufferExtension = {
> + .base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 },
> +};
> +
> static const __DRIextension *screenExtensions[] = {
> &intelTexBufferExtension.base,
> &intelFenceExtension.base,
> &intelFlushExtension.base,
> &intelImageExtension.base,
> &intelRendererQueryExtension.base,
> + &intelMutableRenderBufferExtension.base,
> &dri2ConfigQueryExtension.base,
> &dri2NoErrorExtension.base,
> &intelBlobExtension.base,
> @@ -1606,6 +1611,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
> &intelFlushExtension.base,
> &intelImageExtension.base,
> &intelRendererQueryExtension.base,
> + &intelMutableRenderBufferExtension.base,
> &dri2ConfigQueryExtension.base,
> &dri2Robustness.base,
> &dri2NoErrorExtension.base,
> @@ -2159,7 +2165,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
> bool allow_rgb10_configs = driQueryOptionb(&screen->optionCache,
> "allow_rgb10_configs");
>
> - /* Generate singlesample configs without accumulation buffer. */
> + /* Generate singlesample configs, each without accumulation buffer
> + * and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
> + */
> for (unsigned i = 0; i < num_formats; i++) {
> __DRIconfig **new_configs;
> int num_depth_stencil_bits = 2;
> @@ -2195,7 +2203,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
> num_depth_stencil_bits,
> back_buffer_modes, 2,
> singlesample_samples, 1,
> - false, false, false);
> + false, false,
> + /*mutable_render_buffer*/ true);
> configs = driConcatConfigs(configs, new_configs);
> }
>
>
More information about the mesa-dev
mailing list