[virglrenderer-devel] [PATCH v2 1/2] vrend: If available use glCopyImageSubData to execute memcopy like blits

Wed Jul 11 01:07:05 UTC 2018

On Tue, Jul 10, 2018 at 4:07 PM Marek Olšák <maraeo at gmail.com> wrote:
>
> On Tue, Jul 10, 2018 at 4:07 PM, Gurchetan Singh
> <gurchetansingh at chromium.org> wrote:
> > On Tue, Jul 3, 2018 at 4:21 AM Gert Wollny <gert.wollny at collabora.com> wrote:
> >>
> >> When the host is  gles >= 3.2, gl >= 4.3, or when the extension
> >> GL_(ARB|EXT|OES)_copy_image is available, memcopy like blitting and region
> >> copying can be done for many color format combinations by using
> >> glCopyImageSubData.
> >>
> >> This fixes a number of tests from the subset
> >>   dEQP-GLES31.functional.copy_image.non_compressed.viewclass_*
> >>
> >> v2: - Clean list of canonical formats (Gurchetan Singh)
> >>     - Use size of canonical formats to decide whether they can be copied via
> >>       gCopyImageSubData
> >>     - Also honour the render state when deciding whether glCopyImageSubData
> >>       will do, or whether we need to do a blit.
> >>
> >> Signed-off-by: Gert Wollny <gert.wollny at collabora.com>
> >> ---
> >>  src/vrend_formats.c  | 40 +++++++++++++++++++++++++++++++
> >>  src/vrend_renderer.c | 57 ++++++++++++++++++++++++++++++++++++++++++--
> >>  src/vrend_renderer.h |  2 ++
> >>  3 files changed, 97 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/src/vrend_formats.c b/src/vrend_formats.c
> >> index eb9f217..07f9985 100644
> >> --- a/src/vrend_formats.c
> >> +++ b/src/vrend_formats.c
> >> @@ -443,3 +443,43 @@ void vrend_build_format_list_gles(void)
> >>     */
> >>    add_formats(gles_z32_format);
> >>  }
> >> +
> >> +static uint vrend_is_canonical_format(enum pipe_format format)
> >> +{
> >> +   /* These are the canonical formats that are set by gallium */
> >> +
> >> +   switch (format) {
> >> +   /* 128 bit */
> >> +   case PIPE_FORMAT_R32G32B32A32_UINT:
> >> +      return 128;
> >> +
> >> +    /* 64 bit */
> >> +   case PIPE_FORMAT_R32G32_UINT:
> >> +   case PIPE_FORMAT_R16G16B16A16_UINT:
> >> +      return 64;
> >> +
> >> +   /* 32 bit */
> >> +   case PIPE_FORMAT_R8G8B8A8_UNORM:
> >> +   case PIPE_FORMAT_R16G16_UNORM:
> >> +   case PIPE_FORMAT_R32_UINT:
> >> +      return 32;
> >> +
> >> +   /* 16 bit */
> >> +   case PIPE_FORMAT_R8G8_UNORM:
> >> +   case PIPE_FORMAT_R16_UINT:
> >> +      return 16;
> >> +
> >> +   case PIPE_FORMAT_R8_UINT:
> >> +      return 8;
> >> +   default:
> >> +      return 0;
> >> +   }
> >> +}
> >> +
> >> +bool vrend_canonical_formats_of_same_size(enum pipe_format lhs, enum pipe_format rhs)
> >> +{
> >> +   uint lhss = vrend_is_canonical_format(lhs);
> >> +   uint rhss = vrend_is_canonical_format(rhs);
> >> +
> >> +   return (lhss > 0 ) && (lhss == rhss);
> >> +}
> >> \ No newline at end of file
> >> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> >> index 23494a8..1bf33f1 100644
> >> --- a/src/vrend_renderer.c
> >> +++ b/src/vrend_renderer.c
> >> @@ -119,6 +119,7 @@ struct global_renderer_state {
> >>     bool have_texture_storage;
> >>     bool have_tessellation;
> >>     bool have_texture_view;
> >> +   bool have_copy_image;
> >>
> >>     /* these appeared broken on at least one driver */
> >>     bool use_explicit_locations;
> >> @@ -4527,6 +4528,12 @@ int vrend_renderer_init(struct vrend_if_cbs *cbs, uint32_t flags)
> >>     if (gl_ver >= 46 || epoxy_has_gl_extension("GL_ARB_polygon_offset_clamp"))
> >>        vrend_state.have_polygon_offset_clamp = true;
> >>
> >> +   if (gl_ver >= 43 || (gles && gl_ver >= 32) ||
> >> +       epoxy_has_gl_extension("GL_ARB_copy_image") ||
> >> +       epoxy_has_gl_extension("GL_EXT_copy_image") ||
> >> +       epoxy_has_gl_extension("GL_OES_copy_image"))
> >> +       vrend_state.have_copy_image = true;
> >> +
> >>     /* callbacks for when we are cleaning up the object table */
> >>     vrend_resource_set_destroy_callback(vrend_destroy_resource_object);
> >>     vrend_object_set_destroy_callback(VIRGL_OBJECT_QUERY, vrend_destroy_query_object);
> >> @@ -5746,9 +5753,11 @@ static int vrend_transfer_send_readpixels(struct vrend_context *ctx,
> >>
> >>     switch (elsize) {
> >>     case 1:
> >> +   case 3:
> >>        glPixelStorei(GL_PACK_ALIGNMENT, 1);
> >>        break;
> >>     case 2:
> >> +   case 6:
> >>        glPixelStorei(GL_PACK_ALIGNMENT, 2);
> >>        break;
> >>     case 4:
> >> @@ -6330,6 +6339,22 @@ static void vrend_resource_copy_fallback(struct vrend_resource *src_res,
> >>     free(tptr);
> >>  }
> >>
> >> +
> >> +static inline void
> >> +vrend_copy_sub_image(struct vrend_resource* src_res, struct vrend_resource * dst_res,
> >> +                     uint32_t src_level, const struct pipe_box *src_box,
> >> +                     uint32_t dst_level, uint32_t dstx, uint32_t dsty, uint32_t dstz)
> >> +{
> >> +   glCopyImageSubData(src_res->id,
> >> +                      tgsitargettogltarget(src_res->base.target, src_res->base.nr_samples),
> >> +                      src_level, src_box->x, src_box->y, src_box->z,
> >> +                      dst_res->id,
> >> +                      tgsitargettogltarget(dst_res->base.target, dst_res->base.nr_samples),
> >> +                      dst_level, dstx, dsty, dstz,
> >> +                      src_box->width, src_box->height,src_box->depth);
> >> +}
> >> +
> >> +
> >>  void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
> >>                                           uint32_t dst_handle, uint32_t dst_level,
> >>                                           uint32_t dstx, uint32_t dsty, uint32_t dstz,
> >> @@ -6362,11 +6387,21 @@ void vrend_renderer_resource_copy_region(struct vrend_context *ctx,
> >>        return;
> >>     }
> >>
> >> +   if (vrend_state.have_copy_image) {
> >> +      const struct util_format_description *src_desc = util_format_description(src_res->base.format);
> >> +      const struct util_format_description *dst_desc = util_format_description(dst_res->base.format);
> >> +      if (util_is_format_compatible(src_desc,dst_desc) &&
> >
> > nit: space after comma
> >
> >> +          src_res->base.nr_samples == dst_res->base.nr_samples) {
> >> +         vrend_copy_sub_image(src_res, dst_res, src_level, src_box,
> >> +                              dst_level, dstx, dsty, dstz);
> >> +         return;
> >> +      }
> >> +   }
> >> +
> >>     if (!vrend_format_can_render(src_res->base.format) ||
> >>         !vrend_format_can_render(dst_res->base.format)) {
> >>        vrend_resource_copy_fallback(src_res, dst_res, dst_level, dstx,
> >>                                     dsty, dstz, src_level, src_box);
> >> -
> >>        return;
> >>     }
> >>
> >> @@ -6638,7 +6673,25 @@ void vrend_renderer_blit(struct vrend_context *ctx,
> >>     if (info->render_condition_enable == false)
> >>        vrend_pause_render_condition(ctx, true);
> >>
> >> -   vrend_renderer_blit_int(ctx, src_res, dst_res, info);
> >> +   /* The gallium blit function can be called for a general blit that may
> >> +    * scale, convert the data, and apply some rander states
> >
> > nit: render
> >
> >>        or if is called via
> >> +    * glCopyImageSubData. For the latter case forward the call to the
> >> +    * glCopyImageSubData function, otherwise use a framebuffer blit.
> >> +    */
> >
> > This comment could be better.  glCopyImageSubData and other blit
> > functions are implemented on top of Gallium blit.  Gallium does magic
> > behind the scenes to make glCopyImageSubData work.  Our framebuffer
> > blit doesn't seem to take into account the instance
> > (src_res->base.format != info->src.format).  We need to detect this
> > and do glCopyImageSubData in that case.
> >
> > We may be able to simplify this detection logic a bit.  My reading of
> > st_cb_copyimage.c leads me to believe:
> >
> > 1) pipe->blit() expects a per-channel memcpy -- that is, RGBA8 ->
> > BGRA8 flips the red and blue channels.  This based on the comments in
> > swizzled_copy, such as
> >
> > "Only the swizzle is different, which means we can just blit"
> > "R32 -> BGRA8 is realized as RGBA8 -> BGRA8"
> > "BGRA8 -> R32 is realized as BGRA8 -> RGBA8"
> >
> > 2) That seems to imply glCopyImageSubData does per-channel memcpy too
> > when the formats are compatible (???).
> >
> > Marek, Ilia -- is this understanding correct?
>
> glCopyImageSubData views all images as having the RGBA component
> order. BGRA doesn't exist. If Mesa uses BGRA internally and blits to
> R32, it has to swap B and R, so that R32 looks like the source was
> RGBA.
>
> pipe->blit is a textured blit. It does exactly what drawing a textured
> quad would do. There are no tricks there.

Thanks for the clarifications.  gert.wollny@, would it be possible to
get rid of vrend_canonical_formats_of_same_size /
vrend_is_canonical_format, and do the following instead in
vrend_renderer_blit?

const struct util_format_description *src_desc =
util_format_description(info->src.format);
const struct util_format_description *dst_desc =
util_format_description(info->dest.format);
if (util_is_format_compatible(src_desc, dst_desc) && ...)
      vrend_copy_sub_image(..)

Gallium seems to make the formats equivalent in swizzled_copy:

If it's a RGBA8 -> R32 memcpy blit, has_identity_swizzle(src_desc) is
true, making it a R32 -> R32 blit
If it's a RG16 -> RGBA8 memcpy blit, has_identity_swizzle(src_desc) is
true, making it a RGBA8 -> RGBA8 blit

We shouldn't need to worry about BGRA8 --> RGBA8 blits -- that'll be
handled on the host side.

> Marek