Mesa (master): llvmpipe: simplify address calculation for 4x4 blocks

Roland Scheidegger sroland at kemper.freedesktop.org
Sat Mar 28 02:01:51 UTC 2015


Module: Mesa
Branch: master
Commit: b2424fb0304cf4afd363b35c1dab49fb7edddb08
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b2424fb0304cf4afd363b35c1dab49fb7edddb08

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Fri Mar 27 16:49:54 2015 +0100

llvmpipe: simplify address calculation for 4x4 blocks

These functions looked quite complicated, even though what they actually did
was trivial (ever since we dropped swizzled rendering). Also drop lookup of
format block per bytes done for each block, and do it once per scene instead.
This improves everybody's favorite "benchmark" by 3% or so, though
lp_rast_shade_quads_all() which calls this shows up still quite high for a
function which does little more than call the jit function.
(This would most likely be much better handled by the jit function itself,
the strides are passed through anyway already, though for being able to
handle layers it would definitely add some complexity.)

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/drivers/llvmpipe/lp_rast.c      |   20 ++++--
 src/gallium/drivers/llvmpipe/lp_rast_priv.h |   87 +++++----------------------
 src/gallium/drivers/llvmpipe/lp_scene.c     |    3 +
 src/gallium/drivers/llvmpipe/lp_scene.h     |    1 +
 4 files changed, 35 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 903e7c5..7019acb 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
                    const struct cmd_bin *bin,
                    int x, int y)
 {
+   unsigned i;
+   struct lp_scene *scene = task->scene;
+
    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
 
    task->bin = bin;
@@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->thread_data.vis_counter = 0;
    task->ps_invocations = 0;
 
-   /* reset pointers to color and depth tile(s) */
-   memset(task->color_tiles, 0, sizeof(task->color_tiles));
-   task->depth_tile = NULL;
+   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
+      if (task->scene->fb.cbufs[i]) {
+         task->color_tiles[i] = scene->cbufs[i].map +
+                                scene->cbufs[i].stride * task->y +
+                                scene->cbufs[i].format_bytes * task->x;
+      }
+   }
+   if (task->scene->fb.zsbuf) {
+      task->depth_tile = scene->zsbuf.map +
+                         scene->zsbuf.stride * task->y +
+                         scene->zsbuf.format_bytes * task->x;
+   }
 }
 
 
@@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    if (scene->fb.zsbuf) {
       unsigned layer;
-      uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+      uint8_t *dst_layer = task->depth_tile;
       block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
 
       clear_value &= clear_mask;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index d92230d..e6ebbcd 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                          unsigned mask);
 
 
-
-/**
- * Get pointer to the color tile
- */
-static INLINE uint8_t *
-lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
-                               unsigned buf, enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-   assert(buf < scene->fb.nr_cbufs);
-
-   if (!task->color_tiles[buf]) {
-      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
-      assert(cbuf);
-
-      format_bytes = util_format_get_blocksize(cbuf->format);
-      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y +
-                               format_bytes * task->x;
-   }
-
-   return task->color_tiles[buf];
-}
-
-
-/**
- * Get pointer to the depth tile
- */
-static INLINE uint8_t *
-lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
-                               enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-
-   if (!task->depth_tile) {
-      struct pipe_surface *dbuf = scene->fb.zsbuf;
-      assert(dbuf);
-
-      format_bytes = util_format_get_blocksize(dbuf->format);
-      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y +
-                         format_bytes * task->x;
-   }
-
-   return task->depth_tile;
-}
-
-
 /**
  * Get the pointer to a 4x4 color block (within a 64x64 tile).
  * \param x, y location of 4x4 block in window coords
@@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned buf, unsigned x, unsigned y,
                                 unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *color;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
    assert((y % TILE_VECTOR_HEIGHT) == 0);
    assert(buf < task->scene->fb.nr_cbufs);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format);
-
-   color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
-   assert(color);
+   assert(task->color_tiles[buf]);
 
+   /*
+    * We don't actually benefit from having per tile cbuf/zsbuf pointers,
+    * it's just extra work - the mul/add would be exactly the same anyway.
+    * Fortunately the extra work (modulo) here is very cheap at least...
+    */
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
 
-   color = color + pixel_offset;
+   pixel_offset = px * task->scene->cbufs[buf].format_bytes +
+                  py * task->scene->cbufs[buf].stride;
+   color = task->color_tiles[buf] + pixel_offset;
 
    if (layer) {
       color += layer * task->scene->cbufs[buf].layer_stride;
@@ -245,7 +190,7 @@ static INLINE uint8_t *
 lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned x, unsigned y, unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *depth;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
    assert((x % TILE_VECTOR_WIDTH) == 0);
    assert((y % TILE_VECTOR_HEIGHT) == 0);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
-
-   depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
-   assert(depth);
+   assert(task->depth_tile);
 
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
 
-   depth = depth + pixel_offset;
+   pixel_offset = px * task->scene->zsbuf.format_bytes +
+                  py * task->scene->zsbuf.stride;
+   depth = task->depth_tile + pixel_offset;
 
    if (layer) {
       depth += layer * task->scene->zsbuf.layer_stride;
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index e95d76a..2441b3c 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                      cbuf->u.tex.level,
                                                      cbuf->u.tex.first_layer,
                                                      LP_TEX_USAGE_READ_WRITE);
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
       else {
          struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
@@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
          scene->cbufs[i].layer_stride = 0;
          scene->cbufs[i].map = lpr->data;
          scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
    }
 
@@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                zsbuf->u.tex.level,
                                                zsbuf->u.tex.first_layer,
                                                LP_TEX_USAGE_READ_WRITE);
+      scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
    }
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 19a3811..ad23c20 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -142,6 +142,7 @@ struct lp_scene {
       uint8_t *map;
       unsigned stride;
       unsigned layer_stride;
+      unsigned format_bytes;
    } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
 
    /* The amount of layers in the fb (minimum of all attachments) */




More information about the mesa-commit mailing list