[Mesa-dev] [PATCH] i965: Add GPU BLIT of texture image to PBO in Intel driver

Ian Romanick idr at freedesktop.org
Fri Feb 28 11:56:59 PST 2014


On 02/28/2014 08:08 AM, Jon Ashburn wrote:
> Add Intel driver hook for glGetTexImage to accelerate the case of reading
> texture image into a PBO.  This case gets huge performance gains by using
> GPU BLIT directly to PBO rather than GPU BLIT to temporary texture followed
> by memcpy.
>
> No regressions on Piglit tests  with Intel driver.
> Performance gain (1280 x 800 FBO, Ivybridge):
> glGetTexImage + glMapBufferRange  with patch 1.45 msec
> glGetTexImage + glMapBufferRange without patch 4.68 msec

Depending on the exact paths they're hitting, it may be worth 
investigating methods that will use the 3D engine.  We have some 
evidence that for larger than very small size, the 3D engine is much 
faster than the blit engine.  Maybe add a driver hook to use a BO as the 
backing store for a texture.  Texture-from-buffer-object, if you will. 
Then use that texture with meta's CopyTexImage path or something.  That 
might also avoid the fallbacks for array textures.

> ---
>   src/mesa/drivers/dri/i965/intel_tex_image.c | 114 ++++++++++++++++++++++++++++
>   1 file changed, 114 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index ee02e68..1d0d72d 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -15,6 +15,8 @@
>   #include "main/teximage.h"
>   #include "main/texstore.h"
>
> +#include "drivers/common/meta.h"
> +
>   #include "intel_mipmap_tree.h"
>   #include "intel_buffer_objects.h"
>   #include "intel_batchbuffer.h"
> @@ -415,10 +417,122 @@ intel_image_target_texture_2d(struct gl_context *ctx, GLenum target,
>                                     image->tile_x, image->tile_y);
>   }
>
> +static bool
> +IntelBlitTexToPbo(struct gl_context * ctx,
> +                   GLenum format, GLenum type,
> +                   GLvoid * pixels, struct gl_texture_image *texImage)
> +{
> +   struct intel_texture_image *intelImage = intel_texture_image(texImage);
> +   struct brw_context *brw = brw_context(ctx);
> +   const struct gl_pixelstore_attrib *pack = &(ctx->Pack);
> +   struct intel_buffer_object *dst = intel_buffer_object(pack->BufferObj);
> +   GLuint dst_offset;
> +   drm_intel_bo *dst_buffer;
> +   GLenum target = texImage->TexObject->Target;
> +
> +   DBG("%s\n", __FUNCTION__);
> +
> +   /*
> +    * Check if we can use GPU blit to copy from the hardware texture
> +    * format to the user's format/type.
> +    * Note that GL's pixel transfer ops don't apply to glGetTexImage()
> +    */
> +
> +   if (!_mesa_format_matches_format_and_type(
> +           intelImage->mt->format, format, type, false) ||
> +         (_mesa_is_format_compressed(texImage->TexFormat) &&
> +          _mesa_get_format_datatype(texImage->TexFormat) == GL_UNSIGNED_NORMALIZED)) {

I'm confused about this check.  You can't get the compressed data using 
glGetTexImage.  You have to use glGetCompressedTexImage.  The blitter 
can't do decompression, so shouldn't this check just be

    if (!_mesa_format_matches_format_and_type(
            intelImage->mt->format, format, type, false)) {
       ...
    }

This will let us hit the fast path for ETC textures that we internally 
store as uncompressed RGB.

> +      DBG("%s - bad format for blit to PBO\n", __FUNCTION__);

I think these should be perf_debug.  For most of the performance 
warnings we want to tell the application developer why they didn't get 
the fast path.  That gives them some indication how to change their code 
to get back on the fast path.

> +      return false;
> +   }
> +
> +   if (ctx->_ImageTransferState) {
> +      DBG("%s - bad transfer state for blit to PBO\n", __FUNCTION__);
> +      return false;
> +   }
> +
> +   if (pack->SkipPixels  || pack->SkipRows) {
> +      DBG("%s - bad skip params for blit to PBO\n", __FUNCTION__);
> +      return false;
> +   }

Does pack->RowLength need to be checked?

> +   if (pack->SwapBytes || pack->LsbFirst) {
> +      DBG("%s: bad packing params\n", __FUNCTION__);
> +      return false;
> +   }
> +
> +   if (target == GL_TEXTURE_1D_ARRAY || target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +       target == GL_TEXTURE_2D_ARRAY) {
> +      DBG("%s: no support for array textures\n", __FUNCTION__);

Do regular cubemaps actually work?  3D textures?  I don't know whether 
we have piglit tests that would hit that path...

> +      return false;
> +   }
> +
> +   int dst_stride = _mesa_image_row_stride(pack, texImage->Width, format, type);
> +   bool dst_flip = false;
> +   /* Mesa flips the dst_stride for ctx->Pack.Invert, our mt must have a
> +    * normal dst_stride.
> +    */
> +   struct gl_pixelstore_attrib uninverted_pack = *pack;
> +   if (ctx->Pack.Invert) {
> +      dst_stride = -dst_stride;
> +      dst_flip = true;
> +      uninverted_pack.Invert = false;
> +   }
> +   dst_offset = (GLintptr) pixels;
> +   dst_offset += _mesa_image_offset(2, &uninverted_pack, texImage->Width,
> +                                    texImage->Height, format, type, 0, 0, 0);
> +   dst_buffer = intel_bufferobj_buffer(brw, dst, dst_offset,
> +                                       texImage->Height * dst_stride);
> +
> +   struct intel_mipmap_tree *pbo_mt =
> +            intel_miptree_create_for_bo(brw,
> +                                        dst_buffer,
> +                                        intelImage->mt->format,
> +                                        dst_offset,
> +                                        texImage->Width, texImage->Height,
> +                                        dst_stride, I915_TILING_NONE);
> +
> +   if (!pbo_mt)
> +      return false;
> +
> +   if (!intel_miptree_blit(brw,
> +                           intelImage->mt, texImage->Level, texImage->Face,
> +                           0, 0, false,
> +                           pbo_mt, 0, 0,
> +                           0, 0, dst_flip,
> +                           texImage->Width, texImage->Height, GL_COPY))
> +      return false;
> +
> +   intel_miptree_release(&pbo_mt);
> +
> +   DBG("%s - DONE\n", __FUNCTION__);
> +
> +   return true;
> +}
> +
> +static void
> +intel_get_tex_image(struct gl_context *ctx,
> +                       GLenum format, GLenum type, GLvoid *pixels,
> +                       struct gl_texture_image *texImage) {
> +   struct brw_context *brw = brw_context(ctx);
> +   DBG("%s\n", __FUNCTION__);
> +
> +   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
> +      /* Using PBOs, so try the BLT based path. */
> +      if (IntelBlitTexToPbo(ctx, format, type, pixels, texImage))
> +         return;
> +
> +      perf_debug("%s: fallback to CPU mapping in PBO case\n", __FUNCTION__);

If the DBG messages in IntelBlitTexToPbo are changed to perf_debug 
messages, this one should get removed.

> +   }
> +
> +   _mesa_meta_GetTexImage(ctx, format, type, pixels, texImage);
> +}
> +
>   void
>   intelInitTextureImageFuncs(struct dd_function_table *functions)
>   {
>      functions->TexImage = intelTexImage;
>      functions->EGLImageTargetTexture2D = intel_image_target_texture_2d;
>      functions->BindRenderbufferTexImage = intel_bind_renderbuffer_tex_image;
> +   functions->GetTexImage = intel_get_tex_image;
>   }
>



More information about the mesa-dev mailing list