Mesa (master): vc4: Only render tiles where the scissor ever intersected them.

Eric Anholt anholt at kemper.freedesktop.org
Tue Dec 30 22:35:05 UTC 2014


Module: Mesa
Branch: master
Commit: 3ba57bae47666ada1145259755fc326b1b9f9463
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3ba57bae47666ada1145259755fc326b1b9f9463

Author: Eric Anholt <eric at anholt.net>
Date:   Sun Dec 28 08:14:19 2014 -1000

vc4: Only render tiles where the scissor ever intersected them.

This gives a 2.7x improvement in x11perf -rect100, since we only end up
load/storing the x11perf window, not the whole screen.

---

 src/gallium/drivers/vc4/vc4_context.c |   37 ++++++++++++++++++++++++++-------
 src/gallium/drivers/vc4/vc4_context.h |   10 +++++++++
 src/gallium/drivers/vc4/vc4_draw.c    |    4 ++++
 src/gallium/drivers/vc4/vc4_emit.c    |   11 ++++++++--
 4 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 401eb21..e4e4330 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -94,8 +94,15 @@ vc4_setup_rcl(struct vc4_context *vc4)
         uint32_t resolve_uncleared = vc4->resolve & ~vc4->cleared;
         uint32_t width = vc4->framebuffer.width;
         uint32_t height = vc4->framebuffer.height;
-        uint32_t xtiles = align(width, 64) / 64;
-        uint32_t ytiles = align(height, 64) / 64;
+        uint32_t stride_in_tiles = align(width, 64) / 64;
+
+        assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
+        uint32_t min_x_tile = vc4->draw_min_x / 64;
+        uint32_t min_y_tile = vc4->draw_min_y / 64;
+        uint32_t max_x_tile = (vc4->draw_max_x - 1) / 64;
+        uint32_t max_y_tile = (vc4->draw_max_y - 1) / 64;
+        uint32_t xtiles = max_x_tile - min_x_tile + 1;
+        uint32_t ytiles = max_y_tile - min_y_tile + 1;
 
 #if 0
         fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
@@ -171,10 +178,10 @@ vc4_setup_rcl(struct vc4_context *vc4)
         uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
         uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
 
-        for (int y = 0; y < ytiles; y++) {
-                for (int x = 0; x < xtiles; x++) {
-                        bool end_of_frame = (x == xtiles - 1 &&
-                                             y == ytiles - 1);
+        for (int y = min_y_tile; y <= max_y_tile; y++) {
+                for (int x = min_x_tile; x <= max_x_tile; x++) {
+                        bool end_of_frame = (x == max_x_tile &&
+                                             y == max_y_tile);
                         bool coords_emitted = false;
 
                         /* Note that the load doesn't actually occur until the
@@ -225,13 +232,13 @@ vc4_setup_rcl(struct vc4_context *vc4)
                         /* Wait for the binner before jumping to the first
                          * tile's lists.
                          */
-                        if (x == 0 && y == 0)
+                        if (x == min_x_tile && y == min_y_tile)
                                 cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
 
                         cl_start_reloc(&vc4->rcl, 1);
                         cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
                         cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
-                                        (y * xtiles + x) * 32);
+                                        (y * stride_in_tiles + x) * 32);
 
                         if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
                                 vc4_tile_coordinates(vc4, x, y, &coords_emitted);
@@ -313,6 +320,11 @@ vc4_draw_reset(struct vc4_context *vc4)
         vc4->dirty = ~0;
         vc4->resolve = 0;
         vc4->cleared = 0;
+
+        vc4->draw_min_x = ~0;
+        vc4->draw_min_y = ~0;
+        vc4->draw_max_x = 0;
+        vc4->draw_max_y = 0;
 }
 
 void
@@ -323,6 +335,15 @@ vc4_flush(struct pipe_context *pctx)
         if (!vc4->needs_flush)
                 return;
 
+        /* The RCL setup would choke if the draw bounds cause no drawing, so
+         * just drop the drawing if that's the case.
+         */
+        if (vc4->draw_max_x <= vc4->draw_min_x ||
+            vc4->draw_max_y <= vc4->draw_min_y) {
+                vc4_draw_reset(vc4);
+                return;
+        }
+
         /* Increment the semaphore indicating that binning is done and
          * unblocking the render thread.  Note that this doesn't act until the
          * FLUSH completes.
diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 962abbf..7e18a75 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -173,6 +173,16 @@ struct vc4_context {
         struct vc4_cl bo_handles;
         struct vc4_cl bo_pointers;
         uint32_t shader_rec_count;
+        /** @{
+         * Bounding box of the scissor across all queued drawing.
+         *
+         * Note that the max values are exclusive.
+         */
+        uint32_t draw_min_x;
+        uint32_t draw_min_y;
+        uint32_t draw_max_x;
+        uint32_t draw_max_y;
+        /** @} */
 
         struct vc4_bo *tile_alloc;
         struct vc4_bo *tile_state;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c
index d99faa4..0d91504 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -316,6 +316,10 @@ vc4_clear(struct pipe_context *pctx, unsigned buffers,
         if (buffers & PIPE_CLEAR_STENCIL)
                 vc4->clear_stencil = stencil;
 
+        vc4->draw_min_x = 0;
+        vc4->draw_min_y = 0;
+        vc4->draw_max_x = vc4->framebuffer.width;
+        vc4->draw_max_y = vc4->framebuffer.height;
         vc4->cleared |= buffers;
         vc4->resolve |= buffers;
 
diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c
index 6856441..d2b54fc 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -37,12 +37,19 @@ vc4_emit_state(struct pipe_context *pctx)
                 float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
                 uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
                 uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
+                uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
+                uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
 
                 cl_u8(&vc4->bcl, VC4_PACKET_CLIP_WINDOW);
                 cl_u16(&vc4->bcl, minx);
                 cl_u16(&vc4->bcl, miny);
-                cl_u16(&vc4->bcl, MIN2(vc4->scissor.maxx, vp_maxx) - minx);
-                cl_u16(&vc4->bcl, MIN2(vc4->scissor.maxy, vp_maxy) - miny);
+                cl_u16(&vc4->bcl, maxx - minx);
+                cl_u16(&vc4->bcl, maxy - miny);
+
+                vc4->draw_min_x = MIN2(vc4->draw_min_x, minx);
+                vc4->draw_min_y = MIN2(vc4->draw_min_y, miny);
+                vc4->draw_max_x = MAX2(vc4->draw_max_x, maxx);
+                vc4->draw_max_y = MAX2(vc4->draw_max_y, maxy);
         }
 
         if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {




More information about the mesa-commit mailing list