[Mesa-dev] [PATCH] llvmpipe: simplify address calculation for 4x4 blocks
Jose Fonseca
jfonseca at vmware.com
Fri Mar 27 14:26:30 PDT 2015
Reviewed-by: Jose Fonseca <jfonseca at vmware.com>
On 27/03/15 15:53, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> These functions looked quite complicated, even though what they actually did
> was trivial (ever since we dropped swizzled rendering). Also drop lookup of
> format block per bytes done for each block, and do it once per scene instead.
> This improves everybody's favorite "benchmark" by 3% or so, though
> lp_rast_shade_quads_all() which calls this shows up still quite high for a
> function which does little more than call the jit function.
> (This would most likely be much better handled by the jit function itself,
> the strides are passed through anyway already, though for being able to
> handle layers it would definitely add some complexity.)
> ---
> src/gallium/drivers/llvmpipe/lp_rast.c | 20 +++++--
> src/gallium/drivers/llvmpipe/lp_rast_priv.h | 87 +++++------------------------
> src/gallium/drivers/llvmpipe/lp_scene.c | 3 +
> src/gallium/drivers/llvmpipe/lp_scene.h | 1 +
> 4 files changed, 35 insertions(+), 76 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
> index 903e7c5..7019acb 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.c
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.c
> @@ -91,6 +91,9 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
> const struct cmd_bin *bin,
> int x, int y)
> {
> + unsigned i;
> + struct lp_scene *scene = task->scene;
> +
> LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y);
>
> task->bin = bin;
> @@ -104,9 +107,18 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
> task->thread_data.vis_counter = 0;
> task->ps_invocations = 0;
>
> - /* reset pointers to color and depth tile(s) */
> - memset(task->color_tiles, 0, sizeof(task->color_tiles));
> - task->depth_tile = NULL;
> + for (i = 0; i < task->scene->fb.nr_cbufs; i++) {
> + if (task->scene->fb.cbufs[i]) {
> + task->color_tiles[i] = scene->cbufs[i].map +
> + scene->cbufs[i].stride * task->y +
> + scene->cbufs[i].format_bytes * task->x;
> + }
> + }
> + if (task->scene->fb.zsbuf) {
> + task->depth_tile = scene->zsbuf.map +
> + scene->zsbuf.stride * task->y +
> + scene->zsbuf.format_bytes * task->x;
> + }
> }
>
>
> @@ -186,7 +198,7 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
>
> if (scene->fb.zsbuf) {
> unsigned layer;
> - uint8_t *dst_layer = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
> + uint8_t *dst_layer = task->depth_tile;
> block_size = util_format_get_blocksize(scene->fb.zsbuf->format);
>
> clear_value &= clear_mask;
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> index d92230d..e6ebbcd 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
> @@ -141,64 +141,6 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
> unsigned mask);
>
>
> -
> -/**
> - * Get pointer to the color tile
> - */
> -static INLINE uint8_t *
> -lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
> - unsigned buf, enum lp_texture_usage usage)
> -{
> - const struct lp_scene *scene = task->scene;
> - unsigned format_bytes;
> -
> - assert(task->x < scene->tiles_x * TILE_SIZE);
> - assert(task->y < scene->tiles_y * TILE_SIZE);
> - assert(task->x % TILE_SIZE == 0);
> - assert(task->y % TILE_SIZE == 0);
> - assert(buf < scene->fb.nr_cbufs);
> -
> - if (!task->color_tiles[buf]) {
> - struct pipe_surface *cbuf = scene->fb.cbufs[buf];
> - assert(cbuf);
> -
> - format_bytes = util_format_get_blocksize(cbuf->format);
> - task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y +
> - format_bytes * task->x;
> - }
> -
> - return task->color_tiles[buf];
> -}
> -
> -
> -/**
> - * Get pointer to the depth tile
> - */
> -static INLINE uint8_t *
> -lp_rast_get_depth_tile_pointer(struct lp_rasterizer_task *task,
> - enum lp_texture_usage usage)
> -{
> - const struct lp_scene *scene = task->scene;
> - unsigned format_bytes;
> -
> - assert(task->x < scene->tiles_x * TILE_SIZE);
> - assert(task->y < scene->tiles_y * TILE_SIZE);
> - assert(task->x % TILE_SIZE == 0);
> - assert(task->y % TILE_SIZE == 0);
> -
> - if (!task->depth_tile) {
> - struct pipe_surface *dbuf = scene->fb.zsbuf;
> - assert(dbuf);
> -
> - format_bytes = util_format_get_blocksize(dbuf->format);
> - task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y +
> - format_bytes * task->x;
> - }
> -
> - return task->depth_tile;
> -}
> -
> -
> /**
> * Get the pointer to a 4x4 color block (within a 64x64 tile).
> * \param x, y location of 4x4 block in window coords
> @@ -208,7 +150,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
> unsigned buf, unsigned x, unsigned y,
> unsigned layer)
> {
> - unsigned px, py, pixel_offset, format_bytes;
> + unsigned px, py, pixel_offset;
> uint8_t *color;
>
> assert(x < task->scene->tiles_x * TILE_SIZE);
> @@ -217,16 +159,19 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
> assert((y % TILE_VECTOR_HEIGHT) == 0);
> assert(buf < task->scene->fb.nr_cbufs);
>
> - format_bytes = util_format_get_blocksize(task->scene->fb.cbufs[buf]->format);
> -
> - color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
> - assert(color);
> + assert(task->color_tiles[buf]);
>
> + /*
> + * We don't actually benefit from having per tile cbuf/zsbuf pointers,
> + * it's just extra work - the mul/add would be exactly the same anyway.
> + * Fortunately the extra work (modulo) here is very cheap at least...
> + */
> px = x % TILE_SIZE;
> py = y % TILE_SIZE;
> - pixel_offset = px * format_bytes + py * task->scene->cbufs[buf].stride;
>
> - color = color + pixel_offset;
> + pixel_offset = px * task->scene->cbufs[buf].format_bytes +
> + py * task->scene->cbufs[buf].stride;
> + color = task->color_tiles[buf] + pixel_offset;
>
> if (layer) {
> color += layer * task->scene->cbufs[buf].layer_stride;
> @@ -245,7 +190,7 @@ static INLINE uint8_t *
> lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
> unsigned x, unsigned y, unsigned layer)
> {
> - unsigned px, py, pixel_offset, format_bytes;
> + unsigned px, py, pixel_offset;
> uint8_t *depth;
>
> assert(x < task->scene->tiles_x * TILE_SIZE);
> @@ -253,16 +198,14 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
> assert((x % TILE_VECTOR_WIDTH) == 0);
> assert((y % TILE_VECTOR_HEIGHT) == 0);
>
> - format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
> -
> - depth = lp_rast_get_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
> - assert(depth);
> + assert(task->depth_tile);
>
> px = x % TILE_SIZE;
> py = y % TILE_SIZE;
> - pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
>
> - depth = depth + pixel_offset;
> + pixel_offset = px * task->scene->zsbuf.format_bytes +
> + py * task->scene->zsbuf.stride;
> + depth = task->depth_tile + pixel_offset;
>
> if (layer) {
> depth += layer * task->scene->zsbuf.layer_stride;
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
> index e95d76a..2441b3c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.c
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.c
> @@ -174,6 +174,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
> cbuf->u.tex.level,
> cbuf->u.tex.first_layer,
> LP_TEX_USAGE_READ_WRITE);
> + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
> }
> else {
> struct llvmpipe_resource *lpr = llvmpipe_resource(cbuf->texture);
> @@ -182,6 +183,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
> scene->cbufs[i].layer_stride = 0;
> scene->cbufs[i].map = lpr->data;
> scene->cbufs[i].map += cbuf->u.buf.first_element * pixstride;
> + scene->cbufs[i].format_bytes = util_format_get_blocksize(cbuf->format);
> }
> }
>
> @@ -194,6 +196,7 @@ lp_scene_begin_rasterization(struct lp_scene *scene)
> zsbuf->u.tex.level,
> zsbuf->u.tex.first_layer,
> LP_TEX_USAGE_READ_WRITE);
> + scene->zsbuf.format_bytes = util_format_get_blocksize(zsbuf->format);
> }
> }
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
> index 19a3811..ad23c20 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.h
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.h
> @@ -142,6 +142,7 @@ struct lp_scene {
> uint8_t *map;
> unsigned stride;
> unsigned layer_stride;
> + unsigned format_bytes;
> } zsbuf, cbufs[PIPE_MAX_COLOR_BUFS];
>
> /* The amount of layers in the fb (minimum of all attachments) */
>
More information about the mesa-dev
mailing list