[Mesa-dev] [PATCH 4/6] i965: Use blorp+userptr for GPU readback

Chris Wilson chris at chris-wilson.co.uk
Fri Oct 13 09:34:54 UTC 2017


The primary benefit for this is that we get format conversion for
"free", along with detiling and cache flushing (most relevant for !llc).
Using the GPU does impose a bandwidth cost that is presumably better
used for rendering, hence we limit the use to readback into client
memory (not pbo) where we would need to stall on the GPU anyway.
(Uploads remain direct/staged to avoid the synchronisation cost.)
And we only use the GPU path if a direct read into client memory from
video memory is unavailable.

The ultimate user of this is Xorg/glamor! On byt, bsw, bxt (and
presumably but not measured ilk), x11perf -shmget500 is improved by
15-fold. Though conversely the overhead of executing and waiting upon an
additional blorp batch is shown by x11perf -shmget10 being reduced by a
factor of 2. I think it is fair to presume that large copies will
dominate (and that the overhead of a single batch is something that we
can iteratively reduce, for the benefit of all.) llc machines continue to
use direct access where there is no format changes (which one hopes is
the typical use case).

Cc: Jason Ekstrand <jason.ekstrand at intel.com>
Cc: Topi Pohjolainen <topi.pohjolainen at intel.com>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_blorp.c        | 34 ++++++++++++++++++++++------
 src/mesa/drivers/dri/i965/intel_pixel_read.c | 21 ++++++++---------
 src/mesa/drivers/dri/i965/intel_tex_image.c  | 27 +++++++++++-----------
 3 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c
index ed4f9870f2..c2a549b204 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -36,6 +36,7 @@
 #include "brw_defines.h"
 #include "brw_meta_util.h"
 #include "brw_state.h"
+#include "intel_batchbuffer.h"
 #include "intel_buffer_objects.h"
 #include "intel_fbo.h"
 #include "common/gen_debug.h"
@@ -806,13 +807,10 @@ blorp_get_client_bo(struct brw_context *brw,
 
       *offset_out = offset;
       return bo;
-   } else {
+   } else if (read_only) {
       /* Someone should have already checked that there is data to upload. */
       assert(pixels);
 
-      /* Creating a temp buffer currently only works for upload */
-      assert(read_only);
-
       /* This is not a user-provided PBO.  Instead, pixels is a pointer to CPU
        * data which we need to copy into a BO.
        */
@@ -832,6 +830,23 @@ blorp_get_client_bo(struct brw_context *brw,
 
       *offset_out = 0;
       return bo;
+   } else if (brw->screen->kernel_featuers & KERNEL_ALLOWS_USERPTR) {
+      void *addr = (void *)pixels + first_pixel;
+      void *first_page = (void *)((GLintptr)addr & -4096);
+      void *last_page = (void *)(ALIGN((GLintptr)(pixels + last_pixel), 4096));
+
+      struct brw_bo *bo =
+	 brw_bo_alloc_userptr(brw->bufmgr, "tex_subimage_userptr",
+			      first_page, last_page - first_page, 0);
+      if (bo == NULL) {
+         perf_debug("intel_texsubimage: userptr mapping failed\n");
+         return NULL;
+      }
+
+      *offset_out = addr - first_page;
+      return bo;
+   } else {
+      return NULL;
    }
 }
 
@@ -974,6 +989,10 @@ brw_blorp_upload_miptree(struct brw_context *brw,
    result = true;
 
 err:
+   if (src_bo->userptr) {
+      intel_batchbuffer_flush(brw);
+      brw_bo_wait_rendering(src_bo);
+   }
    brw_bo_unreference(src_bo);
 
    return result;
@@ -1019,9 +1038,6 @@ brw_blorp_download_miptree(struct brw_context *brw,
       break;
    }
 
-   /* This pass only works for PBOs */
-   assert(_mesa_is_bufferobj(packing->BufferObj));
-
    uint32_t dst_offset, dst_row_stride, dst_image_stride;
    struct brw_bo *dst_bo =
       blorp_get_client_bo(brw, width, height, depth,
@@ -1117,6 +1133,10 @@ brw_blorp_download_miptree(struct brw_context *brw,
    brw_emit_mi_flush(brw);
 
 err:
+   if (dst_bo->userptr) {
+      intel_batchbuffer_flush(brw);
+      brw_bo_wait_rendering(dst_bo);
+   }
    brw_bo_unreference(dst_bo);
 
    return result;
diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c
index 4528d6d265..699ce73b0f 100644
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -259,8 +259,6 @@ intelReadPixels(struct gl_context * ctx,
                 GLenum format, GLenum type,
                 const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
 {
-   bool ok;
-
    struct brw_context *brw = brw_context(ctx);
    bool dirty;
 
@@ -273,18 +271,19 @@ intelReadPixels(struct gl_context * ctx,
    intel_prepare_render(brw);
    brw->front_buffer_dirty = dirty;
 
-   if (_mesa_is_bufferobj(pack->BufferObj)) {
-      if (intel_readpixels_blorp(ctx, x, y, width, height,
-                                 format, type, pixels, pack))
-         return;
-
-      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
+   if (!_mesa_is_bufferobj(pack->BufferObj) &&
+       intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
+                                     format, type, pixels, pack)) {
+      return;
    }
 
-   ok = intel_readpixels_tiled_memcpy(ctx, x, y, width, height,
-                                      format, type, pixels, pack);
-   if(ok)
+   if (intel_readpixels_blorp(ctx, x, y, width, height,
+                              format, type, pixels, pack)) {
       return;
+   }
+
+   perf_debug("%s: fallback to CPU mapping for %s\n", __func__,
+              _mesa_is_bufferobj(pack->BufferObj) ? "PBO" : "memory");
 
    /* Update Mesa state before calling _mesa_readpixels().
     * XXX this may not be needed since ReadPixels no longer uses the
diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
index 5396e0a43b..74d10f29f1 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -739,26 +739,25 @@ intel_get_tex_sub_image(struct gl_context *ctx,
                         struct gl_texture_image *texImage)
 {
    struct brw_context *brw = brw_context(ctx);
-   bool ok;
 
    DBG("%s\n", __func__);
 
-   if (_mesa_is_bufferobj(ctx->Pack.BufferObj)) {
-      if (intel_gettexsubimage_blorp(brw, texImage,
-                                     xoffset, yoffset, zoffset,
-                                     width, height, depth, format, type,
-                                     pixels, &ctx->Pack))
-         return;
-
-      perf_debug("%s: fallback to CPU mapping in PBO case\n", __func__);
+   if (!_mesa_is_bufferobj(ctx->Pack.BufferObj) &&
+       intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
+                                         width, height,
+                                         format, type, pixels, &ctx->Pack)) {
+      return;
    }
 
-   ok = intel_gettexsubimage_tiled_memcpy(ctx, texImage, xoffset, yoffset,
-                                          width, height,
-                                          format, type, pixels, &ctx->Pack);
-
-   if(ok)
+   if (intel_gettexsubimage_blorp(brw, texImage,
+                                  xoffset, yoffset, zoffset,
+                                  width, height, depth, format, type,
+                                  pixels, &ctx->Pack)) {
       return;
+   }
+
+   perf_debug("%s: fallback to CPU mapping for %s\n", __func__,
+              _mesa_is_bufferobj(ctx->Pack.BufferObj) ? "PBO" : "memory");
 
    _mesa_meta_GetTexSubImage(ctx, xoffset, yoffset, zoffset,
                              width, height, depth,
-- 
2.15.0.rc0



More information about the mesa-dev mailing list