Mesa (master): llvmpipe: reduce alignment requirement for 1d resources from 4x4 to 4x1

Roland Scheidegger sroland at kemper.freedesktop.org
Tue Jun 4 22:44:04 UTC 2013


Module: Mesa
Branch: master
Commit: ffe2a1ca3c097661dd3f6e3ca5cfd72be184426c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ffe2a1ca3c097661dd3f6e3ca5cfd72be184426c

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Wed Jun  5 00:17:22 2013 +0200

llvmpipe: reduce alignment requirement for 1d resources from 4x4 to 4x1

For rendering to buffers, we cannot have any y alignment.
So make sure that tile clear commands only clear up to the fb width/height,
not more (do this for all resources actually as clearing more seems
pointless for other resources too). For the jit fs function, skip execution
of the lower half of the fragment shader for the 4x4 stamp completely,
for depth/stencil only load/store the values from the first row
(replace other row with undef).
For the blend function, also only load half the values from fs output,
replace the rest with undefs so that everything still operates on the
full 4x4 block to keep code the same between 4x1 and 4x4 (except for
load/store of course which also needs to skip (store) or replace these
values with undefs (load))., at the cost of slightly less optimal code
being produced in some cases.
Also reduce 1d and 1d array alignment too, because they can be handled the
same as buffers so don't need to waste memory.

v2: don't try to run special blend code for 4x1, (very) slightly less
complexity if we just use the same code as for 4x4 which may or may not
make it easier to optimize in the future (as we care a lot more about 4x4
performance than 1d).

v2: don't use undef values for unused fs src outputs with llvm 3.1 as it
apparently can trigger a bug in llvm.

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/drivers/llvmpipe/lp_bld_depth.c |   19 ++++-
 src/gallium/drivers/llvmpipe/lp_bld_depth.h |    2 +
 src/gallium/drivers/llvmpipe/lp_rast.c      |    8 +-
 src/gallium/drivers/llvmpipe/lp_scene.c     |    2 -
 src/gallium/drivers/llvmpipe/lp_scene.h     |    4 -
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |  121 ++++++++++++++++++++++-----
 src/gallium/drivers/llvmpipe/lp_state_fs.h  |    1 +
 src/gallium/drivers/llvmpipe/lp_texture.c   |   24 ++++--
 src/gallium/drivers/llvmpipe/lp_texture.h   |   21 +++++
 9 files changed, 158 insertions(+), 44 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index df6a6c4..a8bd15f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -525,6 +525,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
+ * \param is_1d  whether this resource has only one dimension
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
@@ -535,6 +536,7 @@ void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
+                                     boolean is_1d,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef *z_fb,
@@ -592,9 +594,14 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
-   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
-   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   if (is_1d) {
+      zs_dst2 = lp_build_undef(gallivm, zs_load_type);
+   }
+   else {
+      zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+      zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   }
 
    *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
                                   LLVMConstVector(shuffles, zs_type.length), "");
@@ -648,6 +655,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
+ * \param is_1d  whether this resource has only one dimension
  * \param mask  the alive/dead pixel mask for the quad (vector)
  * \param z_fb  z values read from fb (with padding)
  * \param s_fb  s values read from fb (with padding)
@@ -661,6 +669,7 @@ void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
+                                      boolean is_1d,
                                       struct lp_build_mask_context *mask,
                                       LLVMValueRef z_fb,
                                       LLVMValueRef s_fb,
@@ -791,7 +800,9 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
    }
 
    LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
-   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+   if (!is_1d) {
+      LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+   }
 }
 
 /**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 2534dc3..d169c89 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -74,6 +74,7 @@ void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
+                                     boolean is_1d,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
                                      LLVMValueRef *z_fb,
@@ -84,6 +85,7 @@ void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
+                                      boolean is_1d,
                                       struct lp_build_mask_context *mask,
                                       LLVMValueRef z_fb,
                                       LLVMValueRef s_fb,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index be5a286..981dd71 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -95,10 +95,10 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->bin = bin;
    task->x = x * TILE_SIZE;
    task->y = y * TILE_SIZE;
-   task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ?
-                    task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE;
-   task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ?
-                    task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE;
+   task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
+                    task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
+   task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
+                    task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
 
    /* reset pointers to color and depth tile(s) */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 2dfc7ff..771ad08 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -505,8 +505,6 @@ void lp_scene_begin_binning( struct lp_scene *scene,
 
    scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
    scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
-   scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE);
-   scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE);
 
    assert(scene->tiles_x <= TILES_X);
    assert(scene->tiles_y <= TILES_Y);
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index bc6c448..fa5bbca 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -144,10 +144,6 @@ struct lp_scene {
    /** list of resources referenced by the scene commands */
    struct resource_ref *resources;
 
-   /** aligned scene width, height */
-   unsigned width_aligned;
-   unsigned height_aligned;
-
    /** Total memory used by the scene (in bytes).  This sums all the
     * data blocks and counts all bins, state, resource references and
     * other random allocations within the scene.
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index a7bd836..260d93c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -353,7 +353,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    if (depth_mode & EARLY_DEPTH_TEST) {
       lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                           zs_format_desc,
+                                           zs_format_desc, key->resource_1d,
                                            depth_ptr, depth_stride,
                                            &z_fb, &s_fb, loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
@@ -369,7 +369,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
-         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+         lp_build_depth_stencil_write_swizzled(gallivm, type,
+                                               zs_format_desc, key->resource_1d,
                                                NULL, NULL, NULL, loop_state.counter,
                                                depth_ptr, depth_stride,
                                                z_value, s_value);
@@ -424,7 +425,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
       }
 
       lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                           zs_format_desc,
+                                           zs_format_desc, key->resource_1d,
                                            depth_ptr, depth_stride,
                                            &z_fb, &s_fb, loop_state.counter);
 
@@ -441,7 +442,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
-         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+         lp_build_depth_stencil_write_swizzled(gallivm, type,
+                                               zs_format_desc, key->resource_1d,
                                                NULL, NULL, NULL, loop_state.counter,
                                                depth_ptr, depth_stride,
                                                z_value, s_value);
@@ -454,7 +456,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
-      lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+      lp_build_depth_stencil_write_swizzled(gallivm, type,
+                                            zs_format_desc, key->resource_1d,
                                             &mask, z_fb, s_fb, loop_state.counter,
                                             depth_ptr, depth_stride,
                                             z_value, s_value);
@@ -508,6 +511,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
  *
  * @param type            fragment shader type (4x or 8x float)
  * @param num_fs          number of fs_src
+ * @param is_1d           whether we're outputting to a 1d resource
  * @param dst_channels    number of output channels
  * @param fs_src          output from fragment shader
  * @param dst             pointer to store result
@@ -1345,6 +1349,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    LLVMValueRef blend_alpha;
    LLVMValueRef i32_zero;
    LLVMValueRef check_mask;
+   LLVMValueRef undef_src_val;
 
    struct lp_build_mask_context mask_ctx;
    struct lp_type mask_type;
@@ -1369,9 +1374,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable &&
                                      util_blend_state_is_dual(&variant->key.blend, 0);
 
+   const boolean is_1d = variant->key.resource_1d;
+   unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
+
    mask_type = lp_int32_vec4_type();
    mask_type.length = fs_type.length;
 
+   for (i = num_fs; i < num_fullblock_fs; i++) {
+      fs_mask[i] = lp_build_zero(gallivm, mask_type);
+   }
+
    /* Compute the alignment of the destination pointer in bytes */
 #if 0
    dst_alignment = (block_width * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
@@ -1388,7 +1400,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    if (do_branch) {
       check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type));
 
-      for (i = 0; i < num_fs; ++i) {
+      for (i = 0; i < num_fullblock_fs; ++i) {
          check_mask = LLVMBuildOr(builder, check_mask, fs_mask[i], "");
       }
 
@@ -1399,6 +1411,17 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    partial_mask |= !variant->opaque;
    i32_zero = lp_build_const_int32(gallivm, 0);
 
+#if HAVE_LLVM < 0x0302
+   /*
+    * undef triggers a crash in LLVMBuildTrunc in convert_from_blend_type in some
+    * cases (seen with r10g10b10a2, 128bit wide vectors) (only used for 1d case).
+    */
+   undef_src_val = lp_build_zero(gallivm, fs_type);
+#else
+   undef_src_val = lp_build_undef(gallivm, fs_type);
+#endif
+
+
    /* Get type from output format */
    lp_blend_type_from_format_desc(out_format_desc, &row_type);
    lp_mem_type_from_format_desc(out_format_desc, &dst_type);
@@ -1459,14 +1482,25 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    /*
     * Load shader output
     */
-   for (i = 0; i < num_fs; ++i) {
+   for (i = 0; i < num_fullblock_fs; ++i) {
       /* Always load alpha for use in blending */
-      LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
+      LLVMValueRef alpha;
+      if (i < num_fs) {
+         alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
+      }
+      else {
+         alpha = undef_src_val;
+      }
 
       /* Load each channel */
       for (j = 0; j < dst_channels; ++j) {
          assert(swizzle[j] < 4);
-         fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
+         if (i < num_fs) {
+            fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
+         }
+         else {
+            fs_src[i][j] = undef_src_val;
+         }
       }
 
       /* If 3 channels then pad to include alpha for 4 element transpose */
@@ -1492,12 +1526,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    }
    if (dual_source_blend) {
       /* same as above except different src/dst, skip masks and comments... */
-      for (i = 0; i < num_fs; ++i) {
-         LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+      for (i = 0; i < num_fullblock_fs; ++i) {
+         LLVMValueRef alpha;
+         if (i < num_fs) {
+            alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+         }
+         else {
+            alpha = undef_src_val;
+         }
 
          for (j = 0; j < dst_channels; ++j) {
             assert(swizzle[j] < 4);
-            fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+            if (i < num_fs) {
+               fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+            }
+            else {
+               fs_src1[i][j] = undef_src_val;
+            }
          }
          if (dst_channels == 3 && !has_alpha) {
             fs_src1[i][3] = alpha;
@@ -1518,7 +1563,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
        */
       fs_type.floating = 0;
       fs_type.sign = dst_type.sign;
-      for (i = 0; i < num_fs; ++i) {
+      for (i = 0; i < num_fullblock_fs; ++i) {
          for (j = 0; j < dst_channels; ++j) {
             fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j],
                                             lp_build_vec_type(gallivm, fs_type), "");
@@ -1533,16 +1578,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    /*
     * Pixel twiddle from fragment shader order to memory order
     */
-   src_count = generate_fs_twiddle(gallivm, fs_type, num_fs,
+   src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs,
                                    dst_channels, fs_src, src, pad_inline);
    if (dual_source_blend) {
-      generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels,
+      generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels,
                           fs_src1, src1, pad_inline);
    }
 
    src_channels = dst_channels < 3 ? dst_channels : 4;
-   if (src_count != num_fs * src_channels) {
-      unsigned ds = src_count / (num_fs * src_channels);
+   if (src_count != num_fullblock_fs * src_channels) {
+      unsigned ds = src_count / (num_fullblock_fs * src_channels);
       row_type.length /= ds;
       fs_type.length = row_type.length;
    }
@@ -1685,8 +1730,18 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       dst_type.length = block_width;
    }
 
-   load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
-                         dst, dst_type, dst_count, dst_alignment);
+   if (is_1d) {
+      load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+                            dst, dst_type, dst_count / 4, dst_alignment);
+      for (i = dst_count / 4; i < dst_count; i++) {
+         dst[i] = lp_build_undef(gallivm, dst_type);
+      }
+
+   }
+   else {
+      load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+                            dst, dst_type, dst_count, dst_alignment);
+   }
 
 
    /*
@@ -1761,8 +1816,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    /*
     * Store blend result to memory
     */
-   store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
-                          dst, dst_type, dst_count, dst_alignment);
+   if (is_1d) {
+      store_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+                             dst, dst_type, dst_count / 4, dst_alignment);
+   }
+   else {
+      store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+                             dst, dst_type, dst_count, dst_alignment);
+   }
 
    if (do_branch) {
       lp_build_mask_end(&mask_ctx);
@@ -1855,7 +1916,6 @@ generate_fragment(struct llvmpipe_context *lp,
    fs_type.norm = FALSE;         /* values are not limited to [0,1] or [-1,1] */
    fs_type.width = 32;           /* 32-bit float */
    fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
-   num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
 
    memset(&blend_type, 0, sizeof blend_type);
    blend_type.floating = FALSE; /* values are integers */
@@ -1944,6 +2004,11 @@ generate_fragment(struct llvmpipe_context *lp,
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
 
+   num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
+   /* for 1d resources only run "upper half" of stamp */
+   if (key->resource_1d)
+      num_fs /= 2;
+
    {
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
@@ -2533,6 +2598,9 @@ make_variant_key(struct llvmpipe_context *lp,
          key->zsbuf_format = zsbuf_format;
          memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
       }
+      if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
+         key->resource_1d = TRUE;
+      }
    }
 
    /* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
@@ -2570,6 +2638,15 @@ make_variant_key(struct llvmpipe_context *lp,
 
       key->cbuf_format[i] = format;
 
+      /*
+       * Figure out if this is a 1d resource. Note that OpenGL allows crazy
+       * mixing of 2d textures with height 1 and 1d textures, so make sure
+       * we pick 1d if any cbuf or zsbuf is 1d.
+       */
+      if (llvmpipe_resource_is_1d(lp->framebuffer.cbufs[0]->texture)) {
+         key->resource_1d = TRUE;
+      }
+
       format_desc = util_format_description(format);
       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
              format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index c8dc1c3..3314090 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -75,6 +75,7 @@ struct lp_fragment_shader_variant_key
    unsigned nr_sampler_views:8; /* actually derivable from just the shader */
    unsigned flatshade:1;
    unsigned occlusion_count:1;
+   unsigned resource_1d:1;
 
    enum pipe_format zsbuf_format;
    enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS];
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 22f952c..f1a1ed0 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -83,22 +83,30 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 
       /* Row stride and image stride */
       {
-         unsigned alignment, nblocksx, nblocksy, block_size;
+         unsigned align_x, align_y, nblocksx, nblocksy, block_size;
 
          /* For non-compressed formats we need 4x4 pixel alignment
-          * (for now). We also want cache line size in x direction,
+          * so we can read/write LP_RASTER_BLOCK_SIZE when rendering to them.
+          * We also want cache line size in x direction,
           * otherwise same cache line could end up in multiple threads.
-          * XXX this blows up 1d/1d array textures by a factor of 4.
+          * For explicit 1d resources however we reduce this to 4x1 and
+          * handle specially in render output code (as we need to do special
+          * handling there for buffers in any case).
           */
          if (util_format_is_compressed(pt->format))
-            alignment = 1;
-         else
-            alignment = LP_RASTER_BLOCK_SIZE;
+            align_x = align_y = 1;
+         else {
+            align_x = LP_RASTER_BLOCK_SIZE;
+            if (llvmpipe_resource_is_1d(&lpr->base))
+               align_y = 1;
+            else
+               align_y = LP_RASTER_BLOCK_SIZE;
+         }
 
          nblocksx = util_format_get_nblocksx(pt->format,
-                                             align(width, alignment));
+                                             align(width, align_x));
          nblocksy = util_format_get_nblocksy(pt->format,
-                                             align(height, alignment));
+                                             align(height, align_y));
          block_size = util_format_get_blocksize(pt->format);
 
          if (util_format_is_compressed(pt->format))
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index faba6f2..e73d449 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -159,6 +159,27 @@ llvmpipe_resource_is_texture(const struct pipe_resource *resource)
 }
 
 
+static INLINE boolean
+llvmpipe_resource_is_1d(const struct pipe_resource *resource)
+{
+   switch (resource->target) {
+   case PIPE_BUFFER:
+   case PIPE_TEXTURE_1D:
+   case PIPE_TEXTURE_1D_ARRAY:
+      return TRUE;
+   case PIPE_TEXTURE_2D:
+   case PIPE_TEXTURE_2D_ARRAY:
+   case PIPE_TEXTURE_RECT:
+   case PIPE_TEXTURE_3D:
+   case PIPE_TEXTURE_CUBE:
+      return FALSE;
+   default:
+      assert(0);
+      return FALSE;
+   }
+}
+
+
 static INLINE unsigned
 llvmpipe_resource_stride(struct pipe_resource *resource,
                          unsigned level)




More information about the mesa-commit mailing list