[Mesa-dev] [PATCH 04/15] i965/blorp: Use MSDISPMODE_PERSAMPLE rendering when necessary

Paul Berry stereotype441 at gmail.com
Tue May 22 16:11:58 PDT 2012


On 22 May 2012 11:31, Kenneth Graunke <kenneth at whitecape.org> wrote:

> On 05/11/2012 11:03 AM, Paul Berry wrote:
>
>> This patch modifies the "blorp" WM program so that it can be run in
>> MSDISPMODE_PERSAMPLE (which means that every single sample of a
>> multisampled render target is dispatched to the WM program, not just
>> every pixel).
>>
>> Previously we were using the ugly hack of configuring multisampled
>> destination surfaces as single-sampled, and generating sample indices
>> other than zero by swizzling the pixel coordinates in the WM program.
>> ---
>>  src/mesa/drivers/dri/i965/brw_**blorp.h        |   12 ++++
>>  src/mesa/drivers/dri/i965/brw_**blorp_blit.cpp |   87
>> +++++++++++++++++++-------
>>  src/mesa/drivers/dri/i965/**gen6_blorp.cpp     |    5 +-
>>  src/mesa/drivers/dri/i965/**gen7_blorp.cpp     |   10 ++-
>>  4 files changed, 87 insertions(+), 27 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp.h
>> b/src/mesa/drivers/dri/i965/**brw_blorp.h
>> index f14a5c7..b911356 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_blorp.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_blorp.h
>> @@ -132,6 +132,12 @@ const unsigned int BRW_BLORP_NUM_PUSH_CONST_REGS =
>>  struct brw_blorp_prog_data
>>  {
>>     unsigned int first_curbe_grf;
>> +
>> +   /**
>> +    * True if the WM program should be run in MSDISPMODE_PERSAMPLE with
>> more
>> +    * than one sample per pixel.
>> +    */
>> +   bool persample_msaa_dispatch;
>>  };
>>
>>  class brw_blorp_params
>> @@ -218,6 +224,12 @@ struct brw_blorp_blit_prog_key
>>      * pixels that are outside the destination rectangle.
>>      */
>>     bool use_kill;
>> +
>> +   /**
>> +    * True if the WM program should be run in MSDISPMODE_PERSAMPLE with
>> more
>> +    * than one sample per pixel.
>> +    */
>> +   bool persample_msaa_dispatch;
>>  };
>>
>>  class brw_blorp_blit_params : public brw_blorp_params
>> diff --git a/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp
>> b/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp
>> index e985fad..07e9dd7 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp
>> +++ b/src/mesa/drivers/dri/i965/**brw_blorp_blit.cpp
>> @@ -437,13 +437,14 @@ brw_blorp_blit_program::**compile(struct
>> brw_context *brw,
>>                                  GLuint *program_size)
>>  {
>>     /* Sanity checks */
>> -   if (key->dst_tiled_w) {
>> -      /* If the destination image is W tiled, then dst_samples must be 0.
>> -       * Otherwise, after conversion between W and Y tiling, there's no
>> +   if (key->dst_tiled_w&&  key->rt_samples>  0) {
>>
>> +      /* If the destination image is W tiled and multisampled, then the
>> thread
>> +       * must be dispatched once per sample, not once per pixel.  This is
>> +       * necessary because after conversion between W and Y tiling,
>> there's no
>>         * guarantee that all samples corresponding to a single pixel will
>> still
>>         * be together.
>>         */
>> -      assert(key->rt_samples == 0);
>> +      assert(key->persample_msaa_**dispatch);
>>     }
>>
>>     if (key->blend) {
>> @@ -459,6 +460,17 @@ brw_blorp_blit_program::**compile(struct
>> brw_context *brw,
>>        assert(key->tex_samples>  0);
>>     }
>>
>> +   if (key->persample_msaa_dispatch) {
>> +      /* It only makes sense to do persample dispatch if the render
>> target is
>> +       * configured as multisampled.
>> +       */
>> +      assert(key->rt_samples>  0);
>> +   }
>> +
>> +   /* Set up prog_data */
>> +   memset(&prog_data, 0, sizeof(prog_data));
>> +   prog_data.persample_msaa_**dispatch = key->persample_msaa_dispatch;
>> +
>>     brw_set_compression_control(&**func, BRW_COMPRESSION_NONE);
>>
>>     alloc_regs();
>> @@ -658,11 +670,29 @@ brw_blorp_blit_program::**compute_frag_coords()
>>      */
>>     brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0),
>> brw_imm_v(0x11001100));
>>
>> -   /* Since we always run the WM in a mode that causes a single fragment
>> -    * dispatch per pixel, it's not meaningful to compute a sample value.
>>  Just
>> -    * set it to 0.
>> -    */
>> -   s_is_zero = true;
>> +   if (key->persample_msaa_dispatch) {
>> +      /* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples>  0.
>> +       * Therefore, subspan 0 will represent sample 0, subspan 1 will
>> +       * represent sample 1, and so on.
>> +       *
>> +       * So we need to populate S with the sequence (0, 0, 0, 0, 1, 1,
>> 1, 1,
>> +       * 2, 2, 2, 2, 3, 3, 3, 3).  The easiest way to do this is to
>> populate a
>> +       * temporary variable with the sequence (0, 1, 2, 3), and then
>> copy from
>> +       * it using vstride=1, width=4, hstride=0.
>> +       *
>> +       * TODO: implement appropriate calculation for Gen7.
>> +       */
>>
>
> I think you mean:
>
> TODO: implement the necessary calculation for 8x multisampling.
>

You're right.  Good catch.


>
> Otherwise,
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
>
>  +      brw_MOV(&func, t1, brw_imm_v(0x3210));
>> +      brw_MOV(&func, S, stride(t1, 1, 4, 0));
>> +      s_is_zero = false;
>> +   } else {
>> +      /* Either the destination surface is single-sampled, or the WM
>> will be
>> +       * run in MSDISPMODE_PERPIXEL (which causes a single fragment
>> dispatch
>> +       * per pixel).  In either case, it's not meaningful to compute a
>> sample
>> +       * value.  Just set it to 0.
>> +       */
>> +      s_is_zero = true;
>> +   }
>>  }
>>
>>  /**
>> @@ -1065,22 +1095,23 @@ brw_blorp_blit_params::brw_**blorp_blit_params(struct
>> intel_mipmap_tree *src_mt,
>>     use_wm_prog = true;
>>     memset(&wm_prog_key, 0, sizeof(wm_prog_key));
>>
>> -   if (dst.map_stencil_as_y_tiled) {
>> -      /* If the destination surface is a W-tiled stencil buffer that
>> we're
>> -       * mapping as Y tiled, then we need to set up the surface state as
>> -       * single-sampled, because the memory layout of related samples
>> doesn't
>> -       * match between W and Y tiling.
>> +   if (dst.map_stencil_as_y_tiled&&  dst.num_samples>  0) {
>>
>> +      /* If the destination surface is a W-tiled multisampled stencil
>> buffer
>> +       * that we're mapping as Y tiled, then we need to arrange for the
>> WM
>> +       * program to run once per sample rather than once per pixel,
>> because
>> +       * the memory layout of related samples doesn't match between W
>> and Y
>> +       * tiling.
>>         */
>> -      dst.num_samples = 0;
>> +      wm_prog_key.persample_msaa_**dispatch = true;
>>     }
>>
>> -   if (src_mt->num_samples>  0&&  dst_mt->num_samples>  0) {
>> +   if (src.num_samples>  0&&  dst.num_samples>  0) {
>>
>>        /* We are blitting from a multisample buffer to a multisample
>> buffer, so
>>         * we must preserve samples within a pixel.  This means we have to
>> -       * configure the render target as single-sampled, so that the WM
>> program
>> -       * generate each sample separately.
>> +       * arrange for the WM program to run once per sample rather than
>> once
>> +       * per pixel.
>>         */
>> -      dst.num_samples = 0;
>> +      wm_prog_key.persample_msaa_**dispatch = true;
>>     }
>>
>>     /* The render path must be configured to use the same number of
>> samples as
>> @@ -1138,12 +1169,22 @@ brw_blorp_blit_params::brw_**blorp_blit_params(struct
>> intel_mipmap_tree *src_mt,
>>         * dimensions 64x64.  We must also align it to a multiple of the
>> tile
>>         * size, because the differences between W and Y tiling formats
>> will
>>         * mean that pixels are scrambled within the tile.
>> +       *
>> +       * Note: if the destination surface configured as an MSAA surface,
>> then
>> +       * the effective tile size we need to align it to is smaller,
>> because
>> +       * each pixel covers a 2x2 or a 4x2 block of samples.
>> +       *
>>         * TODO: what if this makes the coordinates too large?
>>         */
>> -      x0 = (x0 * 2)&  ~127;
>> -      y0 = (y0 / 2)&  ~31;
>>
>> -      x1 = ALIGN(x1 * 2, 128);
>> -      y1 = ALIGN(y1 / 2, 32);
>> +      unsigned x_align = 64, y_align = 64;
>> +      if (dst_mt->num_samples>  0) {
>> +         x_align /= (dst_mt->num_samples == 4 ? 2 : 4);
>> +         y_align /= 2;
>> +      }
>> +      x0 = (x0&  ~(x_align - 1)) * 2;
>> +      y0 = (y0&  ~(y_align - 1)) / 2;
>>
>> +      x1 = ALIGN(x1, x_align) * 2;
>> +      y1 = ALIGN(y1, y_align) / 2;
>>        wm_prog_key.use_kill = true;
>>     }
>>  }
>> diff --git a/src/mesa/drivers/dri/i965/**gen6_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**gen6_blorp.cpp
>> index 8eed9dc..85a8ee6 100644
>> --- a/src/mesa/drivers/dri/i965/**gen6_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**gen6_blorp.cpp
>> @@ -742,7 +742,10 @@ gen6_blorp_emit_wm_config(**struct brw_context *brw,
>>
>>     if (params->num_samples>  0) {
>>        dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
>> -      dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
>> +      if (prog_data&&  prog_data->persample_msaa_**dispatch)
>>
>> +         dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
>> +      else
>> +         dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
>>     } else {
>>        dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
>>        dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> index fbb94df..e5b27dd 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> @@ -399,7 +399,8 @@ gen7_blorp_emit_sf_config(**struct brw_context *brw,
>>   */
>>  static void
>>  gen7_blorp_emit_wm_config(**struct brw_context *brw,
>> -                          const brw_blorp_params *params)
>> +                          const brw_blorp_params *params,
>> +                          brw_blorp_prog_data *prog_data)
>>  {
>>     struct intel_context *intel =&brw->intel;
>>
>>
>> @@ -431,7 +432,10 @@ gen7_blorp_emit_wm_config(**struct brw_context *brw,
>>
>>        if (params->num_samples>  0) {
>>           dw1 |= GEN7_WM_MSRAST_ON_PATTERN;
>> -         dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
>> +         if (prog_data&&  prog_data->persample_msaa_**dispatch)
>>
>> +            dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
>> +         else
>> +            dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL;
>>        } else {
>>           dw1 |= GEN7_WM_MSRAST_OFF_PIXEL;
>>           dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE;
>> @@ -757,7 +761,7 @@ gen7_blorp_exec(struct intel_context *intel,
>>     gen7_blorp_emit_streamout_**disable(brw, params);
>>     gen6_blorp_emit_clip_disable(**brw, params);
>>     gen7_blorp_emit_sf_config(brw, params);
>> -   gen7_blorp_emit_wm_config(brw, params);
>> +   gen7_blorp_emit_wm_config(brw, params, prog_data);
>>     if (params->use_wm_prog) {
>>        gen7_blorp_emit_binding_table_**pointers_ps(brw, params,
>>                                                  wm_bind_bo_offset);
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20120522/ab1818bf/attachment.html>


More information about the mesa-dev mailing list