Mesa (master): llvmpipe: use single swizzled tile

Jose Fonseca jrfonseca at kemper.freedesktop.org
Fri Jul 16 16:24:26 UTC 2010


Module: Mesa
Branch: master
Commit: 2f6d47a7c8d6e69e5154de44115aab9ba35a41d2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f6d47a7c8d6e69e5154de44115aab9ba35a41d2

Author: Keith Whitwell <keithw at vmware.com>
Date:   Fri Jul 16 14:40:30 2010 +0100

llvmpipe: use single swizzled tile

Use a single swizzled tile per colorbuf (and per thread) to avoid
accumulating large amounts of cached swizzled data.

Now that the SSE3 code has been merged to master, the performance delta
of this change is minimal, the main benefit is reduced memory usage
due to no longer keeping swizzled copies of render targets.

It's clear from the performance of the in-place version of this code
that there is still quite a bit of time being spent swizzling &
unswizzling, but it's not clear exactly how to reduce that.

---

 src/gallium/drivers/llvmpipe/lp_memory.c    |   41 ++++---------
 src/gallium/drivers/llvmpipe/lp_memory.h    |   13 +---
 src/gallium/drivers/llvmpipe/lp_rast.c      |   55 ++++++----------
 src/gallium/drivers/llvmpipe/lp_rast_priv.h |   24 +++----
 src/gallium/drivers/llvmpipe/lp_setup.c     |   16 +-----
 src/gallium/drivers/llvmpipe/lp_texture.c   |   88 +++++++++++++++++++++++++++
 src/gallium/drivers/llvmpipe/lp_texture.h   |   11 +++
 7 files changed, 148 insertions(+), 100 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
index 61d1666..0f55d4a 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.c
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -29,32 +29,17 @@
 #include "lp_limits.h"
 #include "lp_memory.h"
 
-
-/** 32bpp RGBA dummy tile to use in out of memory conditions */
-static PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
-
-static unsigned lp_out_of_memory = 0;
-
-
-uint8_t *
-lp_get_dummy_tile(void)
-{
-   if (lp_out_of_memory++ < 10) {
-      debug_printf("llvmpipe: out of memory.  Using dummy tile memory.\n");
-   }
-   return lp_dummy_tile;
-}
-
-uint8_t *
-lp_get_dummy_tile_silent(void)
-{
-   return lp_dummy_tile;
-}
-
-
-boolean
-lp_is_dummy_tile(void *tile)
-{
-   return tile == lp_dummy_tile;
-}
+/**
+ * 32bpp RGBA swizzled tiles.  One for for each thread and each
+ * possible colorbuf.  Adds up to quite a bit 8*8*64*64*4 == 1MB.
+ * Several schemes exist to reduce this, such as scaling back the
+ * number of threads or using a smaller tilesize when multiple
+ * colorbuffers are bound.
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+
+
+/* A single dummy tile used in a couple of out-of-memory situations. 
+ */
+PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
index 1d0e5eb..f7418f5 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.h
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -30,16 +30,11 @@
 
 
 #include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "lp_limits.h"
 
+extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
 
-extern uint8_t *
-lp_get_dummy_tile(void);
-
-uint8_t *
-lp_get_dummy_tile_silent(void);
-
-extern boolean
-lp_is_dummy_tile(void *tile);
-
+extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
 #endif /* LP_MEMORY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index a023d2b..654f4ea 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -67,7 +67,7 @@ lp_rast_begin( struct lp_rasterizer *rast,
                             cbuf->level,
                             cbuf->zslice,
                             LP_TEX_USAGE_READ_WRITE,
-                            LP_TEX_LAYOUT_NONE);
+                            LP_TEX_LAYOUT_LINEAR);
    }
 
    if (fb->zsbuf) {
@@ -271,11 +271,6 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
 
    dst = task->depth_tile;
 
-   if (lp_is_dummy_tile(dst))
-      return;
-
-   assert(dst == lp_rast_get_depth_block_pointer(task, task->x, task->y));
-
    switch (block_size) {
    case 1:
       memset(dst, (uint8_t) clear_value, height * width);
@@ -375,10 +370,15 @@ lp_rast_store_linear_color( struct lp_rasterizer_task *task,
       struct pipe_surface *cbuf = scene->fb.cbufs[buf];
       const unsigned face = cbuf->face, level = cbuf->level;
       struct llvmpipe_resource *lpt = llvmpipe_resource(cbuf->texture);
-      /* this will convert the tiled data to linear if needed */
-      (void) llvmpipe_get_texture_tile_linear(lpt, face, level,
-                                              LP_TEX_USAGE_READ,
-                                              task->x, task->y);
+
+      if (!task->color_tiles[buf])
+         continue;
+
+      llvmpipe_unswizzle_cbuf_tile(lpt,
+                                   face,
+                                   level,
+                                   task->x, task->y,
+                                   task->color_tiles[buf]);
    }
 }
 
@@ -589,6 +589,11 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
    (void) outline_subtiles;
 #endif
 
+   {
+      union lp_rast_cmd_arg dummy = {0};
+      lp_rast_store_linear_color(task, dummy);
+   }
+
    /* debug */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
    task->depth_tile = NULL;
@@ -751,30 +756,8 @@ debug_bin( const struct cmd_bin *bin )
 static boolean
 is_empty_bin( const struct cmd_bin *bin )
 {
-   const struct cmd_block *head = bin->commands.head;
-   int i;
-   
-   if (0)
-      debug_bin(bin);
-   
-   /* We emit at most two load-tile commands at the start of the first
-    * command block.  In addition we seem to emit a couple of
-    * set-state commands even in empty bins.
-    *
-    * As a heuristic, if a bin has more than 4 commands, consider it
-    * non-empty.
-    */
-   if (head->next != NULL ||
-       head->count > 4) {
-      return FALSE;
-   }
-
-   for (i = 0; i < head->count; i++)
-      if (head->cmd[i] != lp_rast_store_linear_color) {
-         return FALSE;
-      }
-
-   return TRUE;
+   if (0) debug_bin(bin);
+   return bin->commands.head->count == 0;
 }
 
 
@@ -984,6 +967,10 @@ lp_rast_create( unsigned num_threads )
    /* for synchronizing rasterization threads */
    pipe_barrier_init( &rast->barrier, rast->num_threads );
 
+   memset(lp_swizzled_cbuf, 0, sizeof lp_swizzled_cbuf);
+
+   memset(lp_dummy_tile, 0, sizeof lp_dummy_tile);
+
    return rast;
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index 8044927..b4a48cf 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -148,7 +148,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
        * the oom warning as this most likely because there is no
        * zsbuf.
        */
-      return lp_get_dummy_tile_silent();
+      return lp_dummy_tile;
    }
 
    depth = (rast->zsbuf.map +
@@ -178,15 +178,14 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
       struct llvmpipe_resource *lpt;
       assert(cbuf);
       lpt = llvmpipe_resource(cbuf->texture);
-      task->color_tiles[buf] = llvmpipe_get_texture_tile(lpt,
-                                                         cbuf->face + cbuf->zslice,
-                                                         cbuf->level,
-                                                         usage,
-                                                         task->x,
-                                                         task->y);
-      if (!task->color_tiles[buf]) {
-         /* out of memory - use dummy tile memory */
-         return lp_get_dummy_tile();
+      task->color_tiles[buf] = lp_swizzled_cbuf[task->thread_index][buf];
+
+      if (usage != LP_TEX_USAGE_WRITE_ALL) {
+         llvmpipe_swizzle_cbuf_tile(lpt,
+                                    cbuf->face + cbuf->zslice,
+                                    cbuf->level,
+                                    task->x, task->y,
+                                    task->color_tiles[buf]);
       }
    }
 
@@ -212,10 +211,7 @@ lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
    assert((y % TILE_VECTOR_HEIGHT) == 0);
 
    color = lp_rast_get_color_tile_pointer(task, buf, LP_TEX_USAGE_READ_WRITE);
-   if (!color) {
-      /* out of memory - use dummy tile memory */
-      return lp_get_dummy_tile();
-   }
+   assert(color);
 
    px = x % TILE_SIZE;
    py = y % TILE_SIZE;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 7d48ad8..556e571 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -280,20 +280,6 @@ lp_setup_flush( struct lp_setup_context *setup,
    LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__);
 
    if (setup->scene) {
-      struct lp_scene *scene = lp_setup_get_current_scene(setup);
-      union lp_rast_cmd_arg dummy = {0};
-
-      if (flags & (PIPE_FLUSH_SWAPBUFFERS |
-                   PIPE_FLUSH_FRAME)) {
-         /* Store colors in the linear color buffer(s).
-          * If we don't do this here, we'll end up converting the tiled
-          * data to linear in the texture_unmap() function, which will
-          * not be a parallel/threaded operation as here.
-          */
-         lp_scene_bin_everywhere(scene, lp_rast_store_linear_color, dummy);
-      }
-
-
       if (fence) {
          /* if we're going to flush the setup/rasterization modules, emit
           * a fence.
@@ -642,7 +628,7 @@ lp_setup_set_fragment_sampler_views(struct lp_setup_context *setup,
 
                if (!jit_tex->data[j]) {
                   /* out of memory - use dummy tile memory */
-                  jit_tex->data[j] = lp_get_dummy_tile();
+                  jit_tex->data[j] = lp_dummy_tile;
                   jit_tex->width = TILE_SIZE;
                   jit_tex->height = TILE_SIZE;
                   jit_tex->depth = 1;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index d236bad..bbd8345 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -1209,6 +1209,94 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
 
 
 /**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                             unsigned face_slice, unsigned level,
+                             unsigned x, unsigned y,
+                             uint8_t *tile)
+{
+   struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
+   const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
+   uint8_t *linear_image;
+
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+
+   if (!linear_img->data) {
+      /* allocate memory for the linear image now */
+      alloc_image_data(lpr, level, LP_TEX_LAYOUT_LINEAR);
+   }
+
+   /* compute address of the slice/face of the image that contains the tile */
+   linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+                                                     LP_TEX_LAYOUT_LINEAR);
+
+   {
+      uint ii = x, jj = y;
+      uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+      
+      /* Note that lp_tiled_to_linear expects the tile parameter to
+       * point at the first tile in a whole-image sized array.  In
+       * this code, we have only a single tile and have to do some
+       * pointer arithmetic to figure out where the "image" would have
+       * started.
+       */
+      lp_tiled_to_linear(tile - byte_offset, linear_image,
+                         x, y, TILE_SIZE, TILE_SIZE,
+                         lpr->base.format,
+                         lpr->row_stride[level],
+                         1);       /* tiles per row */
+   }
+
+   llvmpipe_set_texture_tile_layout(lpr, face_slice, level, tx, ty,
+                                    LP_TEX_LAYOUT_LINEAR);
+}
+
+
+/**
+ * Get pointer to tiled data for rendering.
+ * \return pointer to the tiled data at the given tile position
+ */
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                           unsigned face_slice, unsigned level,
+                           unsigned x, unsigned y,
+                           uint8_t *tile)
+{
+   uint8_t *linear_image;
+
+   assert(x % TILE_SIZE == 0);
+   assert(y % TILE_SIZE == 0);
+
+   /* compute address of the slice/face of the image that contains the tile */
+   linear_image = llvmpipe_get_texture_image_address(lpr, face_slice, level,
+                                                     LP_TEX_LAYOUT_LINEAR);
+
+   if (linear_image) {
+      uint ii = x, jj = y;
+      uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
+      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+
+      /* Note that lp_linear_to_tiled expects the tile parameter to
+       * point at the first tile in a whole-image sized array.  In
+       * this code, we have only a single tile and have to do some
+       * pointer arithmetic to figure out where the "image" would have
+       * started.
+       */
+      lp_linear_to_tiled(linear_image, tile - byte_offset,
+                         x, y, TILE_SIZE, TILE_SIZE,
+                         lpr->base.format,
+                         lpr->row_stride[level],
+                         1);       /* tiles per row */
+   }
+}
+
+
+/**
  * Return size of resource in bytes
  */
 unsigned
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 503b6a1..4e4a65d 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -223,6 +223,17 @@ llvmpipe_get_texture_tile(struct llvmpipe_resource *lpr,
                            unsigned x, unsigned y);
 
 
+void
+llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                             unsigned face_slice, unsigned level,
+                             unsigned x, unsigned y,
+                             uint8_t *tile);
+
+void
+llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
+                           unsigned face_slice, unsigned level,
+                           unsigned x, unsigned y,
+                           uint8_t *tile);
 
 extern void
 llvmpipe_print_resources(void);




More information about the mesa-commit mailing list