Mesa (lp-binning): llvmpipe: execute shaders on 4x4 blocks instead of 8x2

Brian Paul brianp at kemper.freedesktop.org
Wed Dec 2 22:16:14 UTC 2009


Module: Mesa
Branch: lp-binning
Commit: 866e6856d39efe9b1ec739587f420a640ad8618e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=866e6856d39efe9b1ec739587f420a640ad8618e

Author: Brian Paul <brianp at vmware.com>
Date:   Wed Dec  2 15:13:45 2009 -0700

llvmpipe: execute shaders on 4x4 blocks instead of 8x2

This matches the convention used by the recursive rasterizer.
Also fixed assorted typos, comments, etc.
Now tri-z.c, gears.c, etc look basically right but there's still some
cracks in triangle rasterization.

---

 src/gallium/drivers/llvmpipe/lp_bld_interp.c |  135 +++++++++++++++++---------
 src/gallium/drivers/llvmpipe/lp_bld_interp.h |   10 +--
 src/gallium/drivers/llvmpipe/lp_rast.c       |   22 +++--
 src/gallium/drivers/llvmpipe/lp_state_fs.c   |   12 ++-
 4 files changed, 116 insertions(+), 63 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.c b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
index 338dbca..affeeca 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.c
@@ -45,6 +45,36 @@
 #include "lp_bld_interp.h"
 
 
+/*
+ * The shader JIT function operates on blocks of quads.
+ * Each block has 2x2 quads and each quad has 2x2 pixels.
+ *
+ * We iterate over the quads in order 0, 1, 2, 3:
+ *
+ * #################
+ * #   |   #   |   #
+ * #---0---#---1---#
+ * #   |   #   |   #
+ * #################
+ * #   |   #   |   #
+ * #---2---#---3---#
+ * #   |   #   |   #
+ * #################
+ *
+ * Within each quad, we have four pixels which are represented in SOA
+ * order:
+ *
+ * #########
+ * # 0 | 1 #
+ * #---+---#
+ * # 2 | 3 #
+ * #########
+ *
+ * So the green channel (for example) of the four pixels is stored in
+ * a single vector register: {g0, g1, g2, g3}.
+ */
+
+
 static void
 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
 {
@@ -55,6 +85,10 @@ attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix
 }
 
 
+/**
+ * Initialize the bld->a0, dadx, dady fields.  This involves fetching
+ * those values from the arrays which are passed into the JIT function.
+ */
 static void
 coeffs_init(struct lp_build_interp_soa_context *bld,
             LLVMValueRef a0_ptr,
@@ -91,7 +125,7 @@ coeffs_init(struct lp_build_interp_soa_context *bld,
             case TGSI_INTERPOLATE_CONSTANT:
                a0 = LLVMBuildLoad(builder, LLVMBuildGEP(builder, a0_ptr, &index, 1, ""), "");
                a0 = lp_build_broadcast_scalar(&bld->base, a0);
-               attrib_name(a0, attrib, chan, ".dady");
+               attrib_name(a0, attrib, chan, ".a0");
                break;
 
             default:
@@ -135,30 +169,13 @@ coeff_multiply(struct lp_build_interp_soa_context *bld,
 
 
 /**
- * Multiply the dadx and dady with the xstep and ystep respectively.
+ * Emit LLVM code to compute the fragment shader input attribute values.
+ * For example, for a color input, we'll compute red, green, blue and alpha
+ * values for the four pixels in a quad.
+ * Recall that we're operating on 4-element vectors so each arithmetic
+ * operation is operating on the four pixels in a quad.
  */
 static void
-coeffs_update(struct lp_build_interp_soa_context *bld)
-{
-   unsigned attrib;
-   unsigned chan;
-
-   for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
-      unsigned mask = bld->mask[attrib];
-      unsigned mode = bld->mode[attrib];
-      if (mode != TGSI_INTERPOLATE_CONSTANT) {
-         for(chan = 0; chan < NUM_CHANNELS; ++chan) {
-            if(mask & (1 << chan)) {
-               bld->dadx[attrib][chan] = coeff_multiply(bld, bld->dadx[attrib][chan], bld->xstep);
-               bld->dady[attrib][chan] = coeff_multiply(bld, bld->dady[attrib][chan], bld->ystep);
-            }
-         }
-      }
-   }
-}
-
-
-static void
 attribs_init(struct lp_build_interp_soa_context *bld)
 {
    LLVMValueRef x = bld->pos[0];
@@ -180,7 +197,9 @@ attribs_init(struct lp_build_interp_soa_context *bld)
             res = a0;
 
             if (mode != TGSI_INTERPOLATE_CONSTANT) {
+               /* res = res + x * dadx */
                res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, x, dadx));
+               /* res = res + y * dady */
                res = lp_build_add(&bld->base, res, lp_build_mul(&bld->base, y, dady));
             }
 
@@ -204,13 +223,19 @@ attribs_init(struct lp_build_interp_soa_context *bld)
 }
 
 
+/**
+ * Increment the shader input attribute values.
+ * This is called when we move from one quad to the next.
+ */
 static void
-attribs_update(struct lp_build_interp_soa_context *bld)
+attribs_update(struct lp_build_interp_soa_context *bld, int quad_index)
 {
    LLVMValueRef oow = NULL;
    unsigned attrib;
    unsigned chan;
 
+   assert(quad_index < 4);
+
    for(attrib = 0; attrib < bld->num_attribs; ++attrib) {
       unsigned mask = bld->mask[attrib];
       unsigned mode = bld->mode[attrib];
@@ -224,13 +249,21 @@ attribs_update(struct lp_build_interp_soa_context *bld)
 
                res = bld->attribs_pre[attrib][chan];
 
-               if(bld->xstep)
+               if (quad_index == 1 || quad_index == 3) {
+                  /* top-right or bottom-right quad */
+                  /* build res = res + dadx + dadx */
                   res = lp_build_add(&bld->base, res, dadx);
+                  res = lp_build_add(&bld->base, res, dadx);
+               }
 
-               if(bld->ystep)
+               if (quad_index == 2 || quad_index == 3) {
+                  /* bottom-left or bottom-right quad */
+                  /* build res = res + dady + dady */
                   res = lp_build_add(&bld->base, res, dady);
+                  res = lp_build_add(&bld->base, res, dady);
+               }
 
-               bld->attribs_pre[attrib][chan] = res;
+               //XXX bld->attribs_pre[attrib][chan] = res;
 
                if (mode == TGSI_INTERPOLATE_PERSPECTIVE) {
                   LLVMValueRef w = bld->pos[3];
@@ -268,17 +301,32 @@ pos_init(struct lp_build_interp_soa_context *bld,
 }
 
 
+/**
+ * Update quad position values when moving to the next quad.
+ */
 static void
-pos_update(struct lp_build_interp_soa_context *bld)
+pos_update(struct lp_build_interp_soa_context *bld, int quad_index)
 {
    LLVMValueRef x = bld->attribs[0][0];
    LLVMValueRef y = bld->attribs[0][1];
+   const int xstep = 2, ystep = 2;
 
-   if(bld->xstep)
-      x = lp_build_add(&bld->base, x, lp_build_const_scalar(bld->base.type, bld->xstep));
+   if (quad_index == 1 || quad_index == 3) {
+      /* top-right or bottom-right quad in block */
+      /* build x += xstep */
+      x = lp_build_add(&bld->base, x,
+                       lp_build_const_scalar(bld->base.type, xstep));
+   }
 
-   if(bld->ystep)
-      y = lp_build_add(&bld->base, y, lp_build_const_scalar(bld->base.type, bld->ystep));
+   if (quad_index == 2) {
+      /* bottom-left quad in block */
+      /* build y += ystep */
+      y = lp_build_add(&bld->base, y,
+                       lp_build_const_scalar(bld->base.type, ystep));
+      /* build x -= xstep */
+      x = lp_build_sub(&bld->base, x,
+                       lp_build_const_scalar(bld->base.type, xstep));
+   }
 
    lp_build_name(x, "pos.x");
    lp_build_name(y, "pos.y");
@@ -288,6 +336,9 @@ pos_update(struct lp_build_interp_soa_context *bld)
 }
 
 
+/**
+ * Initialize fragment shader input attribute info.
+ */
 void
 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          const struct tgsi_token *tokens,
@@ -297,9 +348,7 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          LLVMValueRef dadx_ptr,
                          LLVMValueRef dady_ptr,
                          LLVMValueRef x0,
-                         LLVMValueRef y0,
-                         int xstep,
-                         int ystep)
+                         LLVMValueRef y0)
 {
    struct tgsi_parse_context parse;
    struct tgsi_full_declaration *decl;
@@ -357,21 +406,19 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
    pos_init(bld, x0, y0);
 
    attribs_init(bld);
-
-   bld->xstep = xstep;
-   bld->ystep = ystep;
-
-   coeffs_update(bld);
 }
 
 
 /**
- * Advance the position and inputs with the xstep and ystep.
+ * Advance the position and inputs to the given quad within the block.
  */
 void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld)
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+                           int quad_index)
 {
-   pos_update(bld);
+   assert(quad_index < 4);
+
+   pos_update(bld, quad_index);
 
-   attribs_update(bld);
+   attribs_update(bld, quad_index);
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9c57a10..e2b3bc1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -63,9 +63,6 @@ struct lp_build_interp_soa_context
    LLVMValueRef dadx[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
    LLVMValueRef dady[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
 
-   int xstep;
-   int ystep;
-
    /* Attribute values before perspective divide */
    LLVMValueRef attribs_pre[1 + PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
 
@@ -88,12 +85,11 @@ lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
                          LLVMValueRef dadx_ptr,
                          LLVMValueRef dady_ptr,
                          LLVMValueRef x0,
-                         LLVMValueRef y0,
-                         int xstep,
-                         int ystep);
+                         LLVMValueRef y0);
 
 void
-lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld);
+lp_build_interp_soa_update(struct lp_build_interp_soa_context *bld,
+                           int quad_index);
 
 
 #endif /* LP_BLD_INTERP_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 09495f6..f88dd4a 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -126,8 +126,6 @@ void lp_rast_end( struct lp_rasterizer *rast )
 }
 
 
-
-
 /**
  * Begining rasterization of a tile.
  * \param x  window X position of the tile, in pixels
@@ -152,7 +150,7 @@ void lp_rast_clear_color( struct lp_rasterizer *rast,
 {
    const uint8_t *clear_color = arg.clear_color;
    
-   RAST_DEBUG("%s %x,%x,%x,%x\n", __FUNCTION__, 
+   RAST_DEBUG("%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, 
               clear_color[0],
               clear_color[1],
               clear_color[2],
@@ -181,7 +179,7 @@ void lp_rast_clear_zstencil( struct lp_rasterizer *rast,
 {
    unsigned i, j;
    
-   RAST_DEBUG("%s\n", __FUNCTION__);
+   RAST_DEBUG("%s 0x%x\n", __FUNCTION__, arg.clear_zstencil);
 
    for (i = 0; i < TILE_SIZE; i++)
       for (j = 0; j < TILE_SIZE; j++)
@@ -225,6 +223,9 @@ void lp_rast_shade_tile( struct lp_rasterizer *rast,
 }
 
 
+/**
+ * Compute shading for a 4x4 block of pixels.
+ */
 void lp_rast_shade_quads( struct lp_rasterizer *rast,
                           const struct lp_rast_shader_inputs *inputs,
                           unsigned x, unsigned y,
@@ -237,6 +238,7 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
    void *depth;
    uint32_t ALIGN16_ATTRIB masks[2][2][2][2];
    unsigned ix, iy;
+   int block_offset;
 
    /* Sanity checks */
    assert(x % TILE_VECTOR_WIDTH == 0);
@@ -275,16 +277,20 @@ void lp_rast_shade_quads( struct lp_rasterizer *rast,
    masks[1][1][1][1] = mask & (1 << (1*8+1*4+1*2+1)) ? ~0 : 0;
 #endif
 
+   assert((x % 2) == 0);
+   assert((y % 2) == 0);
+
    ix = x % TILE_SIZE;
    iy = y % TILE_SIZE;
 
+   /* offset of the 16x16 pixel block within the tile */
+   block_offset = ((iy/4)*(16*16) + (ix/4)*16);
+
    /* color buffer */
-   color = &TILE_PIXEL(tile->color, ix, iy, 0);
+   color = tile->color + 4 * block_offset;
 
    /* depth buffer */
-   assert((x % 2) == 0);
-   assert((y % 2) == 0);
-   depth = tile->depth + (iy/4)*(16*16) + (ix/4)*16;
+   depth = tile->depth + block_offset;
 
    /* XXX: This will most likely fail on 32bit x86 without -mstackrealign */
    assert(lp_check_alignment(masks, 16));
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 0541d36..aa9c006 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -358,6 +358,9 @@ generate_blend(const struct pipe_blend_state *blend,
 
 /**
  * Generate the runtime callable function for the whole fragment pipeline.
+ * Note that the function which we generate operates on a block of 16
+ * pixels at at time.  The block contains 2x2 quads.  Each quad contains
+ * 2x2 pixels.
  */
 static struct lp_fragment_shader_variant *
 generate_fragment(struct llvmpipe_context *lp,
@@ -437,8 +440,8 @@ generate_fragment(struct llvmpipe_context *lp,
    fs_type.sign = TRUE;     /* values are signed */
    fs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
    fs_type.width = 32;      /* 32-bit float */
-   fs_type.length = 4;      /* 4 element per vector */
-   num_fs = 4;
+   fs_type.length = 4;      /* 4 elements per vector */
+   num_fs = 4;              /* number of quads per block */
 
    memset(&blend_type, 0, sizeof blend_type);
    blend_type.floating = FALSE; /* values are integers */
@@ -509,18 +512,19 @@ generate_fragment(struct llvmpipe_context *lp,
 
    lp_build_interp_soa_init(&interp, shader->base.tokens, builder, fs_type,
                             a0_ptr, dadx_ptr, dady_ptr,
-                            x0, y0, 2, 0);
+                            x0, y0);
 
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->sampler, context_ptr);
 
+   /* loop over quads in the block */
    for(i = 0; i < num_fs; ++i) {
       LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
       LLVMValueRef out_color[NUM_CHANNELS];
       LLVMValueRef depth_ptr_i;
 
       if(i != 0)
-         lp_build_interp_soa_update(&interp);
+         lp_build_interp_soa_update(&interp, i);
 
       fs_mask[i] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, mask_ptr, &index, 1, ""), "");
       depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &index, 1, "");




More information about the mesa-commit mailing list