Mesa (floating): llvmpipe: switch from byte-filled tiles to float-filled tiles

Luca Barbieri lb at kemper.freedesktop.org
Fri Aug 27 17:26:51 UTC 2010


Module: Mesa
Branch: floating
Commit: 698271f31d9bebcde04a7afbb3f46ff9daf4e8b3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=698271f31d9bebcde04a7afbb3f46ff9daf4e8b3

Author: Luca Barbieri <luca at luca-barbieri.com>
Date:   Thu Aug 26 20:59:02 2010 +0200

llvmpipe: switch from byte-filled tiles to float-filled tiles

This is probably undesirable in this form, since it will
quadruplicate the memory bandwidth and cache occupation for all
resources. It might however reduce the number of instructions.

(It also quadruplicates RAM usage, but we are fine with this, since
system RAM is typically four times the size of VRAM)

Making a byte-based llvmpipe and a float-based one coexist should
be the appropriate solution, but is not done in this patch currently.

---

 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c |    3 -
 src/gallium/drivers/llvmpipe/lp_jit.h           |    2 +-
 src/gallium/drivers/llvmpipe/lp_memory.c        |    4 +-
 src/gallium/drivers/llvmpipe/lp_memory.h        |    4 +-
 src/gallium/drivers/llvmpipe/lp_rast.c          |   41 ++++++++------
 src/gallium/drivers/llvmpipe/lp_rast.h          |    2 +-
 src/gallium/drivers/llvmpipe/lp_rast_priv.h     |   10 ++--
 src/gallium/drivers/llvmpipe/lp_screen.c        |    2 +-
 src/gallium/drivers/llvmpipe/lp_setup.c         |    8 ++--
 src/gallium/drivers/llvmpipe/lp_setup_context.h |    2 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c      |   67 ++++++++++++-----------
 src/gallium/drivers/llvmpipe/lp_texture.c       |   16 +++---
 src/gallium/drivers/llvmpipe/lp_texture.h       |    4 +-
 src/gallium/drivers/llvmpipe/lp_tile_image.c    |   14 +++---
 src/gallium/drivers/llvmpipe/lp_tile_soa.h      |   15 +++++-
 src/gallium/drivers/llvmpipe/lp_tile_soa.py     |   15 +++---
 16 files changed, 115 insertions(+), 94 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index b9c7a6c..2f1bc77 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -243,9 +243,6 @@ lp_build_blend_soa(LLVMBuilderRef builder,
             unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
             boolean func_commutative = lp_build_blend_func_commutative(func);
 
-            /* It makes no sense to blend unless values are normalized */
-            assert(type.norm);
-
             /*
              * Compute src/dst factors.
              */
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index c941894..5db1073 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -139,7 +139,7 @@ typedef void
                     const void *a0,
                     const void *dadx,
                     const void *dady,
-                    uint8_t **color,
+                    float **color,
                     void *depth,
                     uint32_t mask,
                     uint32_t *counter);
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.c b/src/gallium/drivers/llvmpipe/lp_memory.c
index 0f55d4a..33c9bfb 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.c
+++ b/src/gallium/drivers/llvmpipe/lp_memory.c
@@ -36,10 +36,10 @@
  * number of threads or using a smaller tilesize when multiple
  * colorbuffers are bound.
  */
-PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+PIPE_ALIGN_VAR(16) float lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
 
 
 /* A single dummy tile used in a couple of out-of-memory situations. 
  */
-PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+PIPE_ALIGN_VAR(16) float lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
diff --git a/src/gallium/drivers/llvmpipe/lp_memory.h b/src/gallium/drivers/llvmpipe/lp_memory.h
index f7418f5..8dcf78d 100644
--- a/src/gallium/drivers/llvmpipe/lp_memory.h
+++ b/src/gallium/drivers/llvmpipe/lp_memory.h
@@ -33,8 +33,8 @@
 #include "pipe/p_state.h"
 #include "lp_limits.h"
 
-extern PIPE_ALIGN_VAR(16) uint8_t lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
+extern PIPE_ALIGN_VAR(16) float lp_swizzled_cbuf[LP_MAX_THREADS][PIPE_MAX_COLOR_BUFS][TILE_SIZE * TILE_SIZE * 4];
 
-extern PIPE_ALIGN_VAR(16) uint8_t lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
+extern PIPE_ALIGN_VAR(16) float lp_dummy_tile[TILE_SIZE * TILE_SIZE * 4];
 
 #endif /* LP_MEMORY_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index b1c306b..327a4d4 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -197,7 +197,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
                     const union lp_rast_cmd_arg arg)
 {
    struct lp_rasterizer *rast = task->rast;
-   const uint8_t *clear_color = arg.clear_color;
+   const float *clear_color = arg.clear_color;
 
    unsigned i;
 
@@ -212,9 +212,11 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
        clear_color[2] == clear_color[3]) {
       /* clear to grayscale value {x, x, x, x} */
       for (i = 0; i < rast->state.nr_cbufs; i++) {
-         uint8_t *ptr =
+         unsigned k;
+         float *ptr =
             lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
-	 memset(ptr, clear_color[0], TILE_SIZE * TILE_SIZE * 4);
+         for(k = 0; k < TILE_SIZE * TILE_SIZE * 4; ++k)
+            ptr[k] = clear_color[0];
       }
    }
    else {
@@ -223,20 +225,25 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
        * will need to change.  It'll be pretty obvious when clearing no longer
        * works.
        */
-      const unsigned chunk = TILE_SIZE / 4;
+      const unsigned chunk = TILE_VECTOR_WIDTH * TILE_VECTOR_HEIGHT;
       for (i = 0; i < rast->state.nr_cbufs; i++) {
-         uint8_t *c =
+         float *c =
             lp_rast_get_color_tile_pointer(task, i, LP_TEX_USAGE_WRITE_ALL);
          unsigned j;
 
-         for (j = 0; j < 4 * TILE_SIZE; j++) {
-            memset(c, clear_color[0], chunk);
+         for (j = 0; j <  TILE_SIZE * TILE_SIZE * 4 / (chunk * 4); j++) {
+            unsigned k;
+            for(k = 0; k < chunk; ++k)
+               c[k] = clear_color[0];
             c += chunk;
-            memset(c, clear_color[1], chunk);
+            for(k = 0; k < chunk; ++k)
+               c[k] = clear_color[1];
             c += chunk;
-            memset(c, clear_color[2], chunk);
+            for(k = 0; k < chunk; ++k)
+               c[k] = clear_color[2];
             c += chunk;
-            memset(c, clear_color[3], chunk);
+            for(k = 0; k < chunk; ++k)
+               c[k] = clear_color[3];
             c += chunk;
          }
       }
@@ -375,7 +382,7 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
    /* render the whole 64x64 tile in 4x4 chunks */
    for (y = 0; y < TILE_SIZE; y += 4){
       for (x = 0; x < TILE_SIZE; x += 4) {
-         uint8_t *color[PIPE_MAX_COLOR_BUFS];
+         float *color[PIPE_MAX_COLOR_BUFS];
          uint32_t *depth;
          unsigned i;
 
@@ -443,7 +450,7 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    const struct lp_rast_state *state = inputs->state;
    struct lp_fragment_shader_variant *variant = state->variant;
    struct lp_rasterizer *rast = task->rast;
-   uint8_t *color[PIPE_MAX_COLOR_BUFS];
+   float *color[PIPE_MAX_COLOR_BUFS];
    void *depth;
    unsigned i;
 
@@ -489,9 +496,9 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
  * Set top row and left column of the tile's pixels to white.  For debugging.
  */
 static void
-outline_tile(uint8_t *tile)
+outline_tile(float *tile)
 {
-   const uint8_t val = 0xff;
+   const float val = 1.0f;
    unsigned i;
 
    for (i = 0; i < TILE_SIZE; i++) {
@@ -513,9 +520,9 @@ outline_tile(uint8_t *tile)
  * show the sub-tile boundaries.  For debugging.
  */
 static void
-outline_subtiles(uint8_t *tile)
+outline_subtiles(float *tile)
 {
-   const uint8_t val = 0x80;
+   const float val = 0.5f;
    const unsigned step = 16;
    unsigned i, j;
 
@@ -550,7 +557,7 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
       unsigned buf;
 
       for (buf = 0; buf < rast->state.nr_cbufs; buf++) {
-         uint8_t *color = lp_rast_get_color_block_pointer(task, buf,
+         float *color = lp_rast_get_color_block_pointer(task, buf,
                                                           task->x, task->y);
 
          if (LP_DEBUG & DEBUG_SHOW_SUBTILES)
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 102e902..b7611e7 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -149,7 +149,7 @@ union lp_rast_cmd_arg {
       unsigned plane_mask;
    } triangle;
    const struct lp_rast_state *set_state;
-   uint8_t clear_color[4];
+   float clear_color[4];
    const struct lp_rast_clearzs *clear_zstencil;
    struct lp_fence *fence;
    struct llvmpipe_query *query_obj;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index fae7f6d..d58f3ae 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -78,7 +78,7 @@ struct lp_rasterizer_task
 {
    unsigned x, y;          /**< Pos of this tile in framebuffer, in pixels */
 
-   uint8_t *color_tiles[PIPE_MAX_COLOR_BUFS];
+   float *color_tiles[PIPE_MAX_COLOR_BUFS];
    uint8_t *depth_tile;
 
    /** "back" pointer */
@@ -191,7 +191,7 @@ lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
 /**
  * Get pointer to the swizzled color tile
  */
-static INLINE uint8_t *
+static INLINE float *
 lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
                                unsigned buf, enum lp_texture_usage usage)
 {
@@ -228,12 +228,12 @@ lp_rast_get_color_tile_pointer(struct lp_rasterizer_task *task,
  * NULL in that case.
  * \param x, y location of 4x4 block in window coords
  */
-static INLINE uint8_t *
+static INLINE float *
 lp_rast_get_color_block_pointer(struct lp_rasterizer_task *task,
                                 unsigned buf, unsigned x, unsigned y)
 {
    unsigned px, py, pixel_offset;
-   uint8_t *color;
+   float *color;
 
    assert((x % TILE_VECTOR_WIDTH) == 0);
    assert((y % TILE_VECTOR_HEIGHT) == 0);
@@ -266,7 +266,7 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    const struct lp_rasterizer *rast = task->rast;
    const struct lp_rast_state *state = inputs->state;
    struct lp_fragment_shader_variant *variant = state->variant;
-   uint8_t *color[PIPE_MAX_COLOR_BUFS];
+   float *color[PIPE_MAX_COLOR_BUFS];
    void *depth;
    unsigned i;
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index aba27af..075e1a0 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -78,7 +78,7 @@ llvmpipe_get_vendor(struct pipe_screen *screen)
 static const char *
 llvmpipe_get_name(struct pipe_screen *screen)
 {
-   return "llvmpipe";
+   return "llvmpipe<float>";
 }
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 9aa6c4b..ad764db 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -340,7 +340,7 @@ lp_setup_clear( struct lp_setup_context *setup,
 
    if (flags & PIPE_CLEAR_COLOR) {
       for (i = 0; i < 4; ++i)
-         setup->clear.color.clear_color[i] = float_to_ubyte(color[i]);
+         setup->clear.color.clear_color[i] = color[i];
    }
 
    if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
@@ -733,15 +733,15 @@ lp_setup_update_state( struct lp_setup_context *setup )
    }
 
    if(setup->dirty & LP_SETUP_NEW_BLEND_COLOR) {
-      uint8_t *stored;
+      float *stored;
       unsigned i, j;
 
-      stored = lp_scene_alloc_aligned(scene, 4 * 16, 16);
+      stored = lp_scene_alloc_aligned(scene, 4 * 16 * sizeof(float), 16);
 
       if (stored) {
          /* smear each blend color component across 16 ubyte elements */
          for (i = 0; i < 4; ++i) {
-            uint8_t c = float_to_ubyte(setup->blend_color.current.color[i]);
+            float c = setup->blend_color.current.color[i];
             for (j = 0; j < 16; ++j)
                stored[i*16 + j] = c;
          }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 1a147e0..0e28382 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -128,7 +128,7 @@ struct lp_setup_context
 
    struct {
       struct pipe_blend_color current;
-      uint8_t *stored;
+      float *stored;
    } blend_color;
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 33c1a49..4ae7485 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -416,7 +416,7 @@ generate_blend(const struct pipe_blend_state *blend,
 
    /* load constant blend color and colors from the dest color buffer */
    for(chan = 0; chan < 4; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan * 4, 0);
       con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
 
       dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
@@ -431,7 +431,7 @@ generate_blend(const struct pipe_blend_state *blend,
    /* store results to color buffer */
    for(chan = 0; chan < 4; ++chan) {
       if(blend->rt[rt].colormask & (1 << chan)) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan * 4, 0);
          lp_build_name(res[chan], "res.%c", "rgba"[chan]);
          res[chan] = lp_build_select(&bld, mask, res[chan], dst[chan]);
          LLVMBuildStore(builder, res[chan], LLVMBuildGEP(builder, dst_ptr, &index, 1, ""));
@@ -481,7 +481,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_build_interp_soa_context interp;
    LLVMValueRef fs_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][LP_MAX_VECTOR_LENGTH];
-   LLVMValueRef blend_mask;
+   LLVMValueRef blend_mask[LP_MAX_VECTOR_LENGTH];
    LLVMValueRef function;
    LLVMValueRef facing;
    unsigned num_fs;
@@ -501,12 +501,7 @@ generate_fragment(struct llvmpipe_context *lp,
    fs_type.length = 4;      /* 4 elements per vector */
    num_fs = 4;              /* number of quads per block */
 
-   memset(&blend_type, 0, sizeof blend_type);
-   blend_type.floating = FALSE; /* values are integers */
-   blend_type.sign = FALSE;     /* values are unsigned */
-   blend_type.norm = TRUE;      /* values are in [0,1] or [-1,1] */
-   blend_type.width = 8;        /* 8-bit ubyte values */
-   blend_type.length = 16;      /* 16 elements per vector */
+   blend_type = fs_type;
 
    /* 
     * Generate the function prototype. Any change here must be reflected in
@@ -635,46 +630,56 @@ generate_fragment(struct llvmpipe_context *lp,
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
       LLVMValueRef color_ptr;
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), cbuf, 0);
-      LLVMValueRef blend_in_color[NUM_CHANNELS];
+      LLVMValueRef blend_in_color[LP_MAX_VECTOR_LENGTH][NUM_CHANNELS];
       unsigned rt;
 
-      /* 
-       * Convert the fs's output color and mask to fit to the blending type. 
+      color_ptr = LLVMBuildLoad(builder,
+                                LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
+                                "");
+      lp_build_name(color_ptr, "color_ptr%d", cbuf);
+
+      /*
+       * Convert the fs's output color and mask to fit to the blending type.
        */
       for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-	 lp_build_conv(builder, fs_type, blend_type,
-		       fs_out_color[cbuf][chan], num_fs,
-		       &blend_in_color[chan], 1);
-	 lp_build_name(blend_in_color[chan], "color%d.%c", cbuf, "rgba"[chan]);
+         for(i = 0; i < num_fs; ++i)
+         {
+            blend_in_color[i][chan] = fs_out_color[cbuf][chan][i];
+
+	    lp_build_name(blend_in_color[i][chan], "color%d.%c", cbuf, "rgba"[chan]);
+         }
       }
 
       if (partial_mask || !variant->opaque) {
-         lp_build_conv_mask(builder, fs_type, blend_type,
-                            fs_mask, num_fs,
-                            &blend_mask, 1);
+         for(i = 0; i < num_fs; ++i)
+            lp_build_conv_mask(builder, fs_type, blend_type,
+                            &fs_mask[i], 1,
+                            &blend_mask[i], 1);
       } else {
-         blend_mask = lp_build_const_int_vec(blend_type, ~0);
+         for(i = 0; i < num_fs; ++i)
+            blend_mask[i] = lp_build_const_int_vec(blend_type, ~0);
       }
 
-      color_ptr = LLVMBuildLoad(builder, 
-				LLVMBuildGEP(builder, color_ptr_ptr, &index, 1, ""),
-				"");
-      lp_build_name(color_ptr, "color_ptr%d", cbuf);
-
       /* which blend/colormask state to use */
       rt = key->blend.independent_blend_enable ? cbuf : 0;
 
       /*
        * Blending.
        */
-      generate_blend(&key->blend,
+      for(i = 0; i < num_fs; ++i)
+      {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+         LLVMValueRef quad_color_ptr = LLVMBuildGEP(builder, color_ptr, &index, 1, "");
+
+         generate_blend(&key->blend,
                      rt,
 		     builder,
 		     blend_type,
 		     context_ptr,
-		     blend_mask,
-		     blend_in_color,
-		     color_ptr);
+		     blend_mask[i],
+		     blend_in_color[i],
+		     quad_color_ptr);
+      }
    }
 
 #ifdef PIPE_ARCH_X86
@@ -765,8 +770,8 @@ dump_fs_variant_key(const struct lp_fragment_shader_variant_key *key)
    for (i = 0; i < PIPE_MAX_SAMPLERS; ++i) {
       if (key->sampler[i].format) {
          debug_printf("sampler[%u] = \n", i);
-         debug_printf("  .format = %s\n",
-                      util_format_name(key->sampler[i].format));
+         //debug_printf("  .format = %s\n",
+         //             util_format_name(key->sampler[i].format));
          debug_printf("  .target = %s\n",
                       util_dump_tex_target(key->sampler[i].target, TRUE));
          debug_printf("  .pot = %u %u %u\n",
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 5832ea2..be2db3e 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -762,8 +762,8 @@ tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level,
           layout == LP_TEX_LAYOUT_LINEAR);
 
    if (layout == LP_TEX_LAYOUT_TILED) {
-      /* for tiled layout, force a 32bpp format */
-      const enum pipe_format format = PIPE_FORMAT_B8G8R8A8_UNORM;
+      /* for tiled layout, force a float32 format */
+      const enum pipe_format format = PIPE_FORMAT_R32G32B32A32_FLOAT;
       const unsigned block_size = util_format_get_blocksize(format);
       const unsigned nblocksy =
          util_format_get_nblocksy(format, align(height, TILE_SIZE));
@@ -1252,7 +1252,7 @@ void
 llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
                              unsigned face_slice, unsigned level,
                              unsigned x, unsigned y,
-                             uint8_t *tile)
+                             float *tile)
 {
    struct llvmpipe_texture_image *linear_img = &lpr->linear[level];
    const unsigned tx = x / TILE_SIZE, ty = y / TILE_SIZE;
@@ -1273,7 +1273,7 @@ llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
    {
       uint ii = x, jj = y;
       uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
-      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+      uint subpixel_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
       
       /* Note that lp_tiled_to_linear expects the tile parameter to
        * point at the first tile in a whole-image sized array.  In
@@ -1281,7 +1281,7 @@ llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
        * pointer arithmetic to figure out where the "image" would have
        * started.
        */
-      lp_tiled_to_linear(tile - byte_offset, linear_image,
+      lp_tiled_to_linear(tile - subpixel_offset, linear_image,
                          x, y, TILE_SIZE, TILE_SIZE,
                          lpr->base.format,
                          lpr->row_stride[level],
@@ -1301,7 +1301,7 @@ void
 llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
                            unsigned face_slice, unsigned level,
                            unsigned x, unsigned y,
-                           uint8_t *tile)
+                           float *tile)
 {
    uint8_t *linear_image;
 
@@ -1315,7 +1315,7 @@ llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
    if (linear_image) {
       uint ii = x, jj = y;
       uint tile_offset = jj / TILE_SIZE + ii / TILE_SIZE;
-      uint byte_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
+      uint subpixel_offset = tile_offset * TILE_SIZE * TILE_SIZE * 4;
 
       /* Note that lp_linear_to_tiled expects the tile parameter to
        * point at the first tile in a whole-image sized array.  In
@@ -1323,7 +1323,7 @@ llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
        * pointer arithmetic to figure out where the "image" would have
        * started.
        */
-      lp_linear_to_tiled(linear_image, tile - byte_offset,
+      lp_linear_to_tiled(linear_image, tile - subpixel_offset,
                          x, y, TILE_SIZE, TILE_SIZE,
                          lpr->base.format,
                          lpr->row_stride[level],
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index 4e4a65d..030a9ad 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -227,13 +227,13 @@ void
 llvmpipe_unswizzle_cbuf_tile(struct llvmpipe_resource *lpr,
                              unsigned face_slice, unsigned level,
                              unsigned x, unsigned y,
-                             uint8_t *tile);
+                             float *tile);
 
 void
 llvmpipe_swizzle_cbuf_tile(struct llvmpipe_resource *lpr,
                            unsigned face_slice, unsigned level,
                            unsigned x, unsigned y,
-                           uint8_t *tile);
+                           float *tile);
 
 extern void
 llvmpipe_print_resources(void);
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_image.c b/src/gallium/drivers/llvmpipe/lp_tile_image.c
index 0938f7a..fc626bf 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_image.c
+++ b/src/gallium/drivers/llvmpipe/lp_tile_image.c
@@ -189,7 +189,7 @@ lp_tiled_to_linear(const void *src, void *dst,
    }
    else {
       /* color image */
-      const uint bpp = 4;
+      const uint bpp = 4 * sizeof(float);
       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
       const uint bytes_per_tile = tile_w * tile_h * bpp;
       uint i, j;
@@ -201,8 +201,8 @@ lp_tiled_to_linear(const void *src, void *dst,
             uint byte_offset = tile_offset * bytes_per_tile;
             const uint8_t *src_tile = (uint8_t *) src + byte_offset;
 
-            lp_tile_unswizzle_4ub(format,
-                              src_tile,
+            lp_tile_unswizzle_4f(format,
+                              (float*)src_tile,
                               dst, dst_stride,
                               ii, jj);
          }
@@ -278,7 +278,7 @@ lp_linear_to_tiled(const void *src, void *dst,
       }
    }
    else {
-      const uint bpp = 4;
+      const uint bpp = 4 * sizeof(float);
       const uint tile_w = TILE_SIZE, tile_h = TILE_SIZE;
       const uint bytes_per_tile = tile_w * tile_h * bpp;
       uint i, j;
@@ -290,8 +290,8 @@ lp_linear_to_tiled(const void *src, void *dst,
             uint byte_offset = tile_offset * bytes_per_tile;
             uint8_t *dst_tile = (uint8_t *) dst + byte_offset;
 
-            lp_tile_swizzle_4ub(format,
-                             dst_tile,
+            lp_tile_swizzle_4f(format,
+                             (float*)dst_tile,
                              src, src_stride,
                              ii, jj);
          }
@@ -313,7 +313,7 @@ test_tiled_linear_conversion(void *data,
    unsigned wt = (width + TILE_SIZE - 1) / TILE_SIZE;
    unsigned ht = (height + TILE_SIZE - 1) / TILE_SIZE;
 
-   uint8_t *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4);
+   float *tiled = malloc(wt * ht * TILE_SIZE * TILE_SIZE * 4 * sizeof(float));
 
    /*unsigned tiled_stride = wt * TILE_SIZE * TILE_SIZE * 4;*/
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.h b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
index 12dac1d..de8434a 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.h
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.h
@@ -59,7 +59,7 @@ extern unsigned lp_tile_swizzle_count;
 
 /**
  * Return offset of the given pixel (and color channel) from the start
- * of a tile, in bytes.
+ * of a tile, in pixel-sized units
  */
 static INLINE unsigned
 tile_pixel_offset(unsigned x, unsigned y, unsigned c)
@@ -88,6 +88,19 @@ lp_tile_unswizzle_4ub(enum pipe_format format,
                   void *dst, unsigned dst_stride,
                   unsigned x, unsigned y);
 
+void
+lp_tile_swizzle_4f(enum pipe_format format,
+                 float *dst,
+                 const void *src, unsigned src_stride,
+                 unsigned x, unsigned y);
+
+
+void
+lp_tile_unswizzle_4f(enum pipe_format format,
+                  const float *src,
+                  void *dst, unsigned dst_stride,
+                  unsigned x, unsigned y);
+
 
 
 #ifdef __cplusplus
diff --git a/src/gallium/drivers/llvmpipe/lp_tile_soa.py b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
index 2ba3905..63567c6 100644
--- a/src/gallium/drivers/llvmpipe/lp_tile_soa.py
+++ b/src/gallium/drivers/llvmpipe/lp_tile_soa.py
@@ -206,10 +206,10 @@ def emit_unrolled_unswizzle_code(format, src_channel):
     print '      const unsigned py = y0 + qy;'
     print '      for (qx = 0; qx < TILE_SIZE; qx += TILE_VECTOR_WIDTH) {'
     print '         const unsigned px = x0 + qx;'
-    print '         const uint8_t *r = src + 0 * TILE_C_STRIDE;'
-    print '         const uint8_t *g = src + 1 * TILE_C_STRIDE;'
-    print '         const uint8_t *b = src + 2 * TILE_C_STRIDE;'
-    print '         const uint8_t *a = src + 3 * TILE_C_STRIDE;'
+    print '         const float *r = src + 0 * TILE_C_STRIDE;'
+    print '         const float *g = src + 1 * TILE_C_STRIDE;'
+    print '         const float *b = src + 2 * TILE_C_STRIDE;'
+    print '         const float *a = src + 3 * TILE_C_STRIDE;'
     print '         (void) r; (void) g; (void) b; (void) a; /* silence warnings */'
     print '         for (i = 0; i < TILE_C_STRIDE; i += 2) {'
     print '            const uint32_t pixel0 = %s;' % pack_rgba(format, src_channel, "r[i+0]", "g[i+0]", "b[i+0]", "a[i+0]")
@@ -579,13 +579,12 @@ def main():
 
     generate_ssse3()
 
-    channel = Channel(UNSIGNED, True, 8)
-    native_type = 'uint8_t'
-    suffix = '4ub'
+    channel = Channel(FLOAT, False, 32)
+    native_type = 'float'
+    suffix = '4f'
 
     generate_swizzle(formats, channel, native_type, suffix)
     generate_unswizzle(formats, channel, native_type, suffix)
 
-
 if __name__ == '__main__':
     main()




More information about the mesa-commit mailing list