[Mesa-dev] [PATCH] llvmpipe: simplify address calculation for 4x4 blocks

Jose Fonseca jfonseca at vmware.com
Fri Mar 27 14:26:30 PDT 2015


Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

On 27/03/15 15:53, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> These functions looked quite complicated, even though what they actually did
> was trivial (ever since we dropped swizzled rendering). Also drop lookup of
> format block per bytes done for each block, and do it once per scene instead.
> This improves everybody's favorite "benchmark" by 3% or so, though
> lp_rast_shade_quads_all() which calls this shows up still quite high for a
> function which does little more than call the jit function.
> (This would most likely be much better handled by the jit function itself,
> the strides are passed through anyway already, though for being able to
> handle layers it would definitely add some complexity.)
> ---
>   src/gallium/drivers/llvmpipe/lp_rast.c      | 20 +++++--
>   src/gallium/drivers/llvmpipe/lp_rast_priv.h | 87 +++++------------------------
>   src/gallium/drivers/llvmpipe/lp_scene.c     |  3 +
>   src/gallium/drivers/llvmpipe/lp_scene.h     |  1 +
>   4 files changed, 35 insertions(+), 76 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
> index 903e7c5..7019acb 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.c
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.c
> @@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
>                      const struct cmd_bin *bin,
>                      int x, int y)
>   {
> +   unsigned i;
> +   struct lp_scene *scene = task->scene;
> +
>      LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
>
>      task->bin = bin;
> @@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
>      task->thread_data.vis_counter = 0;
>      task->ps_invocations = 0;
>
> -   /* reset pointers to color and depth tile(s) */
> -   memset(task->color_tiles, 0, sizeof(task->color_tiles));
> -   task->depth_tile = NULL;
> +   for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
> +      if (task->scene->fb.cbufs[i]) {
> +         task->color_tiles[i] = scene->cbufs[i].map +
> +                                scene->cbufs[i].stride * task->y +
> +                                scene->cbufs[i].format_bytes * task->x;
> +      }
> +   }
> +   if (task->scene->fb.zsbuf) {
> +      task->depth_tile = scene->zsbuf.map +
> +                         scene->zsbuf.stride * task->y +
> +                         scene->zsbuf.format_bytes * task->x;
> +   }
>   }
>
>
> @@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
>
>      if (scene->fb.zsbuf) {
>         unsigned layer;
> -      uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
> +      uint8_t *dst_layer = task->depth_tile;
>         block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
>
>         clear_value &= clear_mask;
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> index d92230d..e6ebbcd 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> @@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
>                            unsigned mask);
>
>
> -
> -/**
> - * Get pointer to the color tile
> - */
> -static INLINE uint8_t *
> -lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
> -                               unsigned buf, enum lp_texture_usage usage)
> -{
> -   const struct lp_scene *scene = task->scene;
> -   unsigned format_bytes;
> -
> -   assert(task->x < scene->tiles_x * TILE_SIZE);
> -   assert(task->y < scene->tiles_y * TILE_SIZE);
> -   assert(task->x % TILE_SIZE == 0);
> -   assert(task->y % TILE_SIZE == 0);
> -   assert(buf < scene->fb.nr_cbufs);
> -
> -   if (!task->color_tiles[buf]) {
> -      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
> -      assert(cbuf);
> -
> -      format_bytes = util_format_get_blocksize(cbuf->format);
> -      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y +
> -                               format_bytes * task->x;
> -   }
> -
> -   return task->color_tiles[buf];
> -}
> -
> -
> -/**
> - * Get pointer to the depth tile
> - */
> -static INLINE uint8_t *
> -lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
> -                               enum lp_texture_usage usage)
> -{
> -   const struct lp_scene *scene = task->scene;
> -   unsigned format_bytes;
> -
> -   assert(task->x < scene->tiles_x * TILE_SIZE);
> -   assert(task->y < scene->tiles_y * TILE_SIZE);
> -   assert(task->x % TILE_SIZE == 0);
> -   assert(task->y % TILE_SIZE == 0);
> -
> -   if (!task->depth_tile) {
> -      struct pipe_surface *dbuf = scene->fb.zsbuf;
> -      assert(dbuf);
> -
> -      format_bytes = util_format_get_blocksize(dbuf->format);
> -      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y +
> -                         format_bytes * task->x;
> -   }
> -
> -   return task->depth_tile;
> -}
> -
> -
>   /**
>    * Get the pointer to a 4x4 color block (within a 64x64 tile).
>    * \param x, y location of 4x4 block in window coords
> @@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
>                                   unsigned buf, unsigned x, unsigned y,
>                                   unsigned layer)
>   {
> -   unsigned px, py, pixel_offset, format_bytes;
> +   unsigned px, py, pixel_offset;
>      uint8_t *color;
>
>      assert(x < task->scene->tiles_x * TILE_SIZE);
> @@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
>      assert((y % TILE_VECTOR_HEIGHT) == 0);
>      assert(buf < task->scene->fb.nr_cbufs);
>
> -   format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format);
> -
> -   color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
> -   assert(color);
> +   assert(task->color_tiles[buf]);
>
> +   /*
> +    * We don't actually benefit from having per tile cbuf/zsbuf pointers,
> +    * it's just extra work - the mul/add would be exactly the same anyway.
> +    * Fortunately the extra work (modulo) here is very cheap at least...
> +    */
>      px = x % TILE_SIZE;
>      py = y % TILE_SIZE;
> -   pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
>
> -   color = color + pixel_offset;
> +   pixel_offset = px * task->scene->cbufs[buf].format_bytes +
> +                  py * task->scene->cbufs[buf].stride;
> +   color = task->color_tiles[buf] + pixel_offset;
>
>      if (layer) {
>         color += layer * task->scene->cbufs[buf].layer_stride;
> @@ -245,7 +190,7 @@ static INLINE uint8_t *
>   lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
>                                   unsigned x, unsigned y, unsigned layer)
>   {
> -   unsigned px, py, pixel_offset, format_bytes;
> +   unsigned px, py, pixel_offset;
>      uint8_t *depth;
>
>      assert(x < task->scene->tiles_x * TILE_SIZE);
> @@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
>      assert((x % TILE_VECTOR_WIDTH) == 0);
>      assert((y % TILE_VECTOR_HEIGHT) == 0);
>
> -   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
> -
> -   depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
> -   assert(depth);
> +   assert(task->depth_tile);
>
>      px = x % TILE_SIZE;
>      py = y % TILE_SIZE;
> -   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
>
> -   depth = depth + pixel_offset;
> +   pixel_offset = px * task->scene->zsbuf.format_bytes +
> +                  py * task->scene->zsbuf.stride;
> +   depth = task->depth_tile + pixel_offset;
>
>      if (layer) {
>         depth += layer * task->scene->zsbuf.layer_stride;
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
> index e95d76a..2441b3c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.c
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.c
> @@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
>                                                        cbuf->u.tex.level,
>                                                        cbuf->u.tex.first_layer,
>                                                        LP_TEX_USAGE_READ_WRITE);
> +         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
>         }
>         else {
>            struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
> @@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
>            scene->cbufs[i].layer_stride = 0;
>            scene->cbufs[i].map = lpr->data;
>            scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
> +         scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
>         }
>      }
>
> @@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
>                                                  zsbuf->u.tex.level,
>                                                  zsbuf->u.tex.first_layer,
>                                                  LP_TEX_USAGE_READ_WRITE);
> +      scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
>      }
>   }
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
> index 19a3811..ad23c20 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.h
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.h
> @@ -142,6 +142,7 @@ struct lp_scene {
>         uint8_t *map;
>         unsigned stride;
>         unsigned layer_stride;
> +      unsigned format_bytes;
>      } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
>
>      /* The amount of layers in the fb (minimum of all attachments) */
>



More information about the mesa-dev mailing list