[Mesa-dev] [PATCH] llvmpipe: simplify address calculation for 4x4 blocks

sroland at vmware.com sroland at vmware.com
Fri Mar 27 08:53:53 PDT 2015


From: Roland Scheidegger <sroland at vmware.com>

These functions looked quite complicated, even though what they actually did
was trivial (ever since we dropped swizzled rendering). Also drop lookup of
format block per bytes done for each block, and do it once per scene instead.
This improves everybody's favorite "benchmark" by 3% or so, though
lp_rast_shade_quads_all() which calls this shows up still quite high for a
function which does little more than call the jit function.
(This would most likely be much better handled by the jit function itself,
the strides are passed through anyway already, though for being able to
handle layers it would definitely add some complexity.)
---
 src/gallium/drivers/llvmpipe/lp_rast.c      | 20 +++++--
 src/gallium/drivers/llvmpipe/lp_rast_priv.h | 87 +++++------------------------
 src/gallium/drivers/llvmpipe/lp_scene.c     |  3 +
 src/gallium/drivers/llvmpipe/lp_scene.h     |  1 +
 4 files changed, 35 insertions(+), 76 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 903e7c5..7019acb 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
                    const struct cmd_bin *bin,
                    int x, int y)
 {
+   unsigned i;
+   struct lp_scene *scene = task->scene;
+
    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
 
    task->bin = bin;
@@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->thread_data.vis_counter = 0;
    task->ps_invocations = 0;
 
-   /* reset pointers to color and depth tile(s) */
-   memset(task->color_tiles, 0, sizeof(task->color_tiles));
-   task->depth_tile = NULL;
+   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
+      if (task->scene->fb.cbufs[i]) {
+         task->color_tiles[i] = scene->cbufs[i].map +
+                                scene->cbufs[i].stride * task->y +
+                                scene->cbufs[i].format_bytes * task->x;
+      }
+   }
+   if (task->scene->fb.zsbuf) {
+      task->depth_tile = scene->zsbuf.map +
+                         scene->zsbuf.stride * task->y +
+                         scene->zsbuf.format_bytes * task->x;
+   }
 }
 
 
@@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    if (scene->fb.zsbuf) {
       unsigned layer;
-      uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+      uint8_t *dst_layer = task->depth_tile;
       block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
 
       clear_value &= clear_mask;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index d92230d..e6ebbcd 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                          unsigned mask);
 
 
-
-/**
- * Get pointer to the color tile
- */
-static INLINE uint8_t *
-lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
-                               unsigned buf, enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-   assert(buf < scene->fb.nr_cbufs);
-
-   if (!task->color_tiles[buf]) {
-      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
-      assert(cbuf);
-
-      format_bytes = util_format_get_blocksize(cbuf->format);
-      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y +
-                               format_bytes * task->x;
-   }
-
-   return task->color_tiles[buf];
-}
-
-
-/**
- * Get pointer to the depth tile
- */
-static INLINE uint8_t *
-lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
-                               enum lp_texture_usage usage)
-{
-   const struct lp_scene *scene = task->scene;
-   unsigned format_bytes;
-
-   assert(task->x < scene->tiles_x * TILE_SIZE);
-   assert(task->y < scene->tiles_y * TILE_SIZE);
-   assert(task->x % TILE_SIZE == 0);
-   assert(task->y % TILE_SIZE == 0);
-
-   if (!task->depth_tile) {
-      struct pipe_surface *dbuf = scene->fb.zsbuf;
-      assert(dbuf);
-
-      format_bytes = util_format_get_blocksize(dbuf->format);
-      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y +
-                         format_bytes * task->x;
-   }
-
-   return task->depth_tile;
-}
-
-
 /**
  * Get the pointer to a 4x4 color block (within a 64x64 tile).
  * \param x, y location of 4x4 block in window coords
@@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned buf, unsigned x, unsigned y,
                                 unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *color;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
    assert((y % TILE_VECTOR_HEIGHT) == 0);
    assert(buf < task->scene->fb.nr_cbufs);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format);
-
-   color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
-   assert(color);
+   assert(task->color_tiles[buf]);
 
+   /*
+    * We don't actually benefit from having per tile cbuf/zsbuf pointers,
+    * it's just extra work - the mul/add would be exactly the same anyway.
+    * Fortunately the extra work (modulo) here is very cheap at least...
+    */
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
 
-   color = color + pixel_offset;
+   pixel_offset = px * task->scene->cbufs[buf].format_bytes +
+                  py * task->scene->cbufs[buf].stride;
+   color = task->color_tiles[buf] + pixel_offset;
 
    if (layer) {
       color += layer * task->scene->cbufs[buf].layer_stride;
@@ -245,7 +190,7 @@ static INLINE uint8_t *
 lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned x, unsigned y, unsigned layer)
 {
-   unsigned px, py, pixel_offset, format_bytes;
+   unsigned px, py, pixel_offset;
    uint8_t *depth;
 
    assert(x < task->scene->tiles_x * TILE_SIZE);
@@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
    assert((x % TILE_VECTOR_WIDTH) == 0);
    assert((y % TILE_VECTOR_HEIGHT) == 0);
 
-   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
-
-   depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
-   assert(depth);
+   assert(task->depth_tile);
 
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
-   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
 
-   depth = depth + pixel_offset;
+   pixel_offset = px * task->scene->zsbuf.format_bytes +
+                  py * task->scene->zsbuf.stride;
+   depth = task->depth_tile + pixel_offset;
 
    if (layer) {
       depth += layer * task->scene->zsbuf.layer_stride;
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index e95d76a..2441b3c 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                      cbuf->u.tex.level,
                                                      cbuf->u.tex.first_layer,
                                                      LP_TEX_USAGE_READ_WRITE);
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
       else {
          struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
@@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
          scene->cbufs[i].layer_stride = 0;
          scene->cbufs[i].map = lpr->data;
          scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
+         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
       }
    }
 
@@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
                                                zsbuf->u.tex.level,
                                                zsbuf->u.tex.first_layer,
                                                LP_TEX_USAGE_READ_WRITE);
+      scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
    }
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index 19a3811..ad23c20 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -142,6 +142,7 @@ struct lp_scene {
       uint8_t *map;
       unsigned stride;
       unsigned layer_stride;
+      unsigned format_bytes;
    } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
 
    /* The amount of layers in the fb (minimum of all attachments) */
-- 
1.9.1



More information about the mesa-dev mailing list