[Mesa-dev] [PATCH 2/2] llvmpipe: fix clearing of individual color buffers in a fb

Fri Apr 18 17:02:33 PDT 2014

Looks good, but some comments below.

On 04/17/2014 04:34 PM, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> GL (3.0) allows you to clear individual color buffers in a fb. In fact
> for fbs containing both int and float/normalized color buffers this is
> required (because the clearing values are otherwise undefined if applied
> to all buffers). The gallium interface was changed a while ago, but llvmpipe
> ignored it (hence doing such individual clears always resulted in clearing
> all buffers, plus some assorted asserts due to the mixed fbs).
> So change the clear command to indicate the buffer to be cleared. Also, because
> indicating the buffer to be cleared would have made lp_rast_arg_cmd larger
> which is unacceptable (we're trying to shrink it some day) allocate the clear
> value in the scene and just pass a pointer.
> There's several advantages and disadvantages here:
> + clearing individual buffers works (we could also actually bin such clears now
> if they'd come through clear_render_target() if the surface is in the current
> fb, though we didn't do this before for the single rb case and still don't try).
> + since there's one clear per rb, we do the format conversion in setup rather
> than per bin. Aside from the (drop in the ocean...) performance advantage this
> means that clearing to very small values (that is, denormal when converted to
> the format) should work for small float (fp16 etc.) formats, as the util code
> couldn't handle it correctly before (because cpu denorms are disabled when
> executing the bin commands, screwing up the magic conversion and flushing
> the values to 0, though this was not verified).
> - there's some overhead for traditional old-style clear-all MRT cases, since
> there's one rast clear command per rb instead of one for all rbs.
>
> This fixes https://bugs.freedesktop.org/show_bug.cgi?id=76976.
> ---
>   src/gallium/drivers/llvmpipe/lp_rast.c          |  122 ++++---------
>   src/gallium/drivers/llvmpipe/lp_rast.h          |    8 +-
>   src/gallium/drivers/llvmpipe/lp_scene.c         |    1 -
>   src/gallium/drivers/llvmpipe/lp_scene.h         |    1 -
>   src/gallium/drivers/llvmpipe/lp_setup.c         |  216 ++++++++++++++++-------
>   src/gallium/drivers/llvmpipe/lp_setup_context.h |    2 +-
>   6 files changed, 189 insertions(+), 161 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
> index 0ae5976..8667a6c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.c
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.c
> @@ -110,25 +110,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
>
>
>   /**
> - * Examine a framebuffer object to determine if any of the colorbuffers
> - * use a pure integer format.
> - * XXX this could be a gallium utility function if useful elsewhere.
> - */
> -static boolean
> -is_fb_pure_integer(const struct pipe_framebuffer_state *fb)
> -{
> -   unsigned i;
> -   for (i = 0; i < fb->nr_cbufs; i++) {
> -      if (fb->cbufs[i] &&
> -          util_format_is_pure_integer(fb->cbufs[i]->format)) {
> -         return TRUE;
> -      }
> -   }
> -   return FALSE;
> -}
> -
> -
> -/**
>    * Clear the rasterizer's current color tile.
>    * This is a bin command called during bin processing.
>    * Clear commands always clear all bound layers.
> @@ -138,81 +119,40 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
>                       const union lp_rast_cmd_arg arg)
>   {
>      const struct lp_scene *scene = task->scene;
> +   unsigned cbuf = arg.clear_rb->cbuf;
> +   union util_color uc;
> +   enum pipe_format format;
>
> -   if (scene->fb.nr_cbufs) {
> -      unsigned i;
> -      union util_color uc;
> -
> -      if (is_fb_pure_integer(&scene->fb)) {
> -         /*
> -          * We expect int/uint clear values here, though some APIs
> -          * might disagree (but in any case util_pack_color()
> -          * couldn't handle it)...
> -          */
> -         LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
> -                    arg.clear_color.ui[0],
> -                    arg.clear_color.ui[1],
> -                    arg.clear_color.ui[2],
> -                    arg.clear_color.ui[3]);
> -
> -         for (i = 0; i < scene->fb.nr_cbufs; i++) {
> -            enum pipe_format format = scene->fb.cbufs[i]->format;
> -
> -            if (util_format_is_pure_sint(format)) {
> -               util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
> -            }
> -            else {
> -               assert(util_format_is_pure_uint(format));
> -               util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
> -            }
> -
> -            util_fill_box(scene->cbufs[i].map,
> -                          format,
> -                          scene->cbufs[i].stride,
> -                          scene->cbufs[i].layer_stride,
> -                          task->x,
> -                          task->y,
> -                          0,
> -                          task->width,
> -                          task->height,
> -                          scene->fb_max_layer + 1,
> -                          &uc);
> -         }
> -      }
> -      else {
> -         uint8_t clear_color[4];
> +   /* we never bin clear commands for non-existing buffers */
> +   assert(cbuf < scene->fb.nr_cbufs);
> +   assert(scene->fb.cbufs[cbuf]);
>
> -         for (i = 0; i < 4; ++i) {
> -            clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
> -         }
> -
> -         LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
> -                    clear_color[0],
> -                    clear_color[1],
> -                    clear_color[2],
> -                    clear_color[3]);
> -
> -         for (i = 0; i < scene->fb.nr_cbufs; i++) {
> -            if (scene->fb.cbufs[i]) {
> -               util_pack_color(arg.clear_color.f,
> -                               scene->fb.cbufs[i]->format, &uc);
> -
> -               util_fill_box(scene->cbufs[i].map,
> -                             scene->fb.cbufs[i]->format,
> -                             scene->cbufs[i].stride,
> -                             scene->cbufs[i].layer_stride,
> -                             task->x,
> -                             task->y,
> -                             0,
> -                             task->width,
> -                             task->height,
> -                             scene->fb_max_layer + 1,
> -                             &uc);
> -            }
> -         }
> -      }
> -   }
> +   format = scene->fb.cbufs[cbuf]->format;
> +   memcpy(&uc, &arg.clear_rb->color_val[0], 16);
>
> +   /*
> +    * this is pretty rough since we have target format (bunch of bytes...) here.
> +    * dump it as raw 4 dwords.
> +    */
> +   LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
> +          __FUNCTION__, format,
> +          arg.clear_rb->color_val[0], arg.clear_rb->color_val[1],
> +          arg.clear_rb->color_val[2], arg.clear_rb->color_val[3]);
> +
> +
> +   util_fill_box(scene->cbufs[cbuf].map,
> +                 format,
> +                 scene->cbufs[cbuf].stride,
> +                 scene->cbufs[cbuf].layer_stride,
> +                 task->x,
> +                 task->y,
> +                 0,
> +                 task->width,
> +                 task->height,
> +                 scene->fb_max_layer + 1,
> +                 &uc);
> +
> +   /* this will increase for each rb which probably doesn't mean much */
>      LP_COUNT(nr_color_tile_clear);
>   }
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
> index e2a9ec2..21b1bd0 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.h
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.h
> @@ -136,6 +136,12 @@ struct lp_rast_triangle {
>   };
>
>
> +struct lp_rast_clear_rb {
> +   unsigned cbuf;
> +   uint32_t color_val[4];
> +};
> +
> +
>   #define GET_A0(inputs) ((float (*)[4])((inputs)+1))
>   #define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
>   #define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
> @@ -164,7 +170,7 @@ union lp_rast_cmd_arg {
>         unsigned plane_mask;
>      } triangle;
>      const struct lp_rast_state *set_state;
> -   union pipe_color_union clear_color;
> +   const struct lp_rast_clear_rb *clear_rb;
>      struct {
>         uint64_t value;
>         uint64_t mask;
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
> index 9ba5f1a..9a8df27 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.c
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.c
> @@ -292,7 +292,6 @@ lp_scene_end_rasterization(struct lp_scene *scene )
>      scene->scene_size = 0;
>      scene->resource_reference_size = 0;
>
> -   scene->has_depthstencil_clear = FALSE;
>      scene->alloc_failed = FALSE;
>
>      util_unreference_framebuffer_state( &scene->fb );
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
> index 94b1779..19a3811 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.h
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.h
> @@ -165,7 +165,6 @@ struct lp_scene {
>      unsigned resource_reference_size;
>
>      boolean alloc_failed;
> -   boolean has_depthstencil_clear;
>      boolean discard;
>      /**
>       * Number of active tiles in each dimension.
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
> index b4ce925..ee200c5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
> @@ -202,25 +202,38 @@ begin_binning( struct lp_setup_context *setup )
>           util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
>         need_zsload = TRUE;
>
> -   LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__,
> -          (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load",
> +   LP_DBG(DEBUG_SETUP, "%s color clear bufs: %x depth: %s\n", __FUNCTION__,
> +          setup->clear.flags >> 2,
>             need_zsload ? "clear": "load");
>
> -   if (setup->fb.nr_cbufs) {
> -      if (setup->clear.flags & PIPE_CLEAR_COLOR) {
> -         ok = lp_scene_bin_everywhere( scene,
> -                                       LP_RAST_OP_CLEAR_COLOR,
> -                                       setup->clear.color );
> -         if (!ok)
> -            return FALSE;
> +   if (setup->clear.flags & PIPE_CLEAR_COLOR) {
> +      unsigned cbuf;
> +      for (cbuf = 0; cbuf < setup->fb.nr_cbufs; cbuf++) {
> +         assert(PIPE_CLEAR_COLOR0 == 1 << 2);
> +         if (setup->clear.flags & (1 << (2 + cbuf))) {
> +            union lp_rast_cmd_arg clearrb_arg;
> +            struct lp_rast_clear_rb *cc_scene =
> +               (struct lp_rast_clear_rb *)
> +                  lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
> +
> +            if (!cc_scene) {
> +               return FALSE;
> +            }
> +
> +            cc_scene->cbuf = cbuf;
> +            memcpy(cc_scene->color_val, &setup->clear.color_val[cbuf][0], 16);
> +            clearrb_arg.clear_rb = cc_scene;
> +
> +            if (!lp_scene_bin_everywhere(scene,
> +                                         LP_RAST_OP_CLEAR_COLOR,
> +                                         clearrb_arg))
> +               return FALSE;
> +         }
>         }
>      }
>
>      if (setup->fb.zsbuf) {
>         if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
> -         if (!need_zsload)
> -            scene->has_depthstencil_clear = TRUE;
> -
>            ok = lp_scene_bin_everywhere( scene,
>                                          LP_RAST_OP_CLEAR_ZSTENCIL,
>                                          lp_rast_arg_clearzs(
> @@ -368,40 +381,105 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
>
>
>   static boolean
> -lp_setup_try_clear( struct lp_setup_context *setup,
> -                    const union pipe_color_union *color,
> -                    double depth,
> -                    unsigned stencil,
> -                    unsigned flags )
> +lp_setup_try_clear_colorrb(struct lp_setup_context *setup,
> +                           const union pipe_color_union *color,
> +                           unsigned cbuf)

Maybe call this lp_setup_try_clear_color_buffer() and put a comment on it?

>   {
> -   uint64_t zsmask = 0;
> -   uint64_t zsvalue = 0;
> -   union lp_rast_cmd_arg color_arg;
> -   unsigned i;
> +   union lp_rast_cmd_arg clearrb_arg;
> +   union util_color uc;
> +   enum pipe_format format = setup->fb.cbufs[cbuf]->format;
>
>      LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
>
> -   if (flags & PIPE_CLEAR_COLOR) {
> -      for (i = 0; i < 4; i++)
> -         color_arg.clear_color.i[i] = color->i[i];
> +   if (util_format_is_pure_integer(format)) {
> +      /*
> +       * We expect int/uint clear values here, though some APIs
> +       * might disagree (but in any case util_pack_color()
> +       * couldn't handle it)...
> +       */
> +      if (util_format_is_pure_sint(format)) {
> +         util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
> +      }
> +      else {
> +         assert(util_format_is_pure_uint(format));
> +         util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
> +      }
> +   }
> +   else {
> +      util_pack_color(color->f, format, &uc);
>      }
>
> -   if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> -      uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
> -      uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
> +   /* double formats won't work anyway, we just copy 16 bytes */
> +   assert(util_format_get_blocksizebits(format) <= 128);
> +
> +   if (setup->state == SETUP_ACTIVE) {
> +      struct lp_scene *scene = setup->scene;
> +
> +      /* Add the clear to existing scene.  In the unusual case where
> +       * both color and depth-stencil are being cleared when there's
> +       * already been some rendering, we could discard the currently
> +       * binned scene and start again, but I don't see that as being
> +       * a common usage.
> +       */
> +      struct lp_rast_clear_rb *cc_scene =
> +         (struct lp_rast_clear_rb *)
> +            lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
>
> -      zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
> -                                      depth,
> -                                      stencil);
> +      if (!cc_scene) {
> +         return FALSE;
> +      }
>
> +      cc_scene->cbuf = cbuf;
> +      memcpy(cc_scene->color_val, &uc, 16);

s/16/sizeof(uc)/

> +      clearrb_arg.clear_rb = cc_scene;
>
> -      zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
> -                                          zmask,
> -                                          smask);
> +      if (!lp_scene_bin_everywhere(scene,
> +                                   LP_RAST_OP_CLEAR_COLOR,
> +                                   clearrb_arg))
> +         return FALSE;
> +   }
> +   else {
> +      /* Put ourselves into the 'pre-clear' state, specifically to try
> +       * and accumulate multiple clears to color and depth_stencil
> +       * buffers which the app or state-tracker might issue
> +       * separately.
> +       */
> +      set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
>
> -      zsvalue &= zsmask;
> +      assert(PIPE_CLEAR_COLOR0 == (1 << 2));
> +      setup->clear.flags |= 1 << (cbuf + 2);
> +      memcpy(&setup->clear.color_val[cbuf][0], &uc, 16);

s/16/sizeof(uc)/

or, see my last comment at the end.

>      }
>
> +   return TRUE;
> +}
> +
> +static boolean
> +lp_setup_try_clear_zs(struct lp_setup_context *setup,
> +                      double depth,
> +                      unsigned stencil,
> +                      unsigned flags)
> +{
> +   uint64_t zsmask = 0;
> +   uint64_t zsvalue = 0;
> +   uint32_t zmask32;
> +   uint8_t smask8;
> +
> +   LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
> +
> +   zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
> +   smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
> +
> +   zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
> +                                   depth,
> +                                   stencil);
> +
> +   zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
> +                                       zmask32,
> +                                       smask8);
> +
> +   zsvalue &= zsmask;
> +
>      if (setup->state == SETUP_ACTIVE) {
>         struct lp_scene *scene = setup->scene;
>
> @@ -411,19 +489,10 @@ lp_setup_try_clear( struct lp_setup_context *setup,
>          * binned scene and start again, but I don't see that as being
>          * a common usage.
>          */
> -      if (flags & PIPE_CLEAR_COLOR) {
> -         if (!lp_scene_bin_everywhere( scene,
> -                                       LP_RAST_OP_CLEAR_COLOR,
> -                                       color_arg ))
> -            return FALSE;
> -      }
> -
> -      if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> -         if (!lp_scene_bin_everywhere( scene,
> -                                       LP_RAST_OP_CLEAR_ZSTENCIL,
> -                                       lp_rast_arg_clearzs(zsvalue, zsmask) ))
> -            return FALSE;
> -      }
> +      if (!lp_scene_bin_everywhere(scene,
> +                                   LP_RAST_OP_CLEAR_ZSTENCIL,
> +                                   lp_rast_arg_clearzs(zsvalue, zsmask)))
> +         return FALSE;
>      }
>      else {
>         /* Put ourselves into the 'pre-clear' state, specifically to try
> @@ -435,19 +504,11 @@ lp_setup_try_clear( struct lp_setup_context *setup,
>
>         setup->clear.flags |= flags;
>
> -      if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> -         setup->clear.zsmask |= zsmask;
> -         setup->clear.zsvalue =
> -            (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
> -      }
> -
> -      if (flags & PIPE_CLEAR_COLOR) {
> -         memcpy(&setup->clear.color.clear_color,
> -                &color_arg,
> -                sizeof setup->clear.color.clear_color);
> -      }
> +      setup->clear.zsmask |= zsmask;
> +      setup->clear.zsvalue =
> +         (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
>      }
> -
> +
>      return TRUE;
>   }
>
> @@ -458,15 +519,38 @@ lp_setup_clear( struct lp_setup_context *setup,
>                   unsigned stencil,
>                   unsigned flags )
>   {
> -   if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) {
> -      lp_setup_flush(setup, NULL, __FUNCTION__);
> +   unsigned i;
>
> -      if (!lp_setup_try_clear( setup, color, depth, stencil, flags ))
> -         assert(0);
> -   }
> -}
> +   /*
> +    * Note any of these (max 9) clears could fail (but at most there should
> +    * be just one failure!). This avoids doing the previous succeeded
> +    * clears again (we still clear tiles twice if a clear command succeeded
> +    * partially for one buffer).
> +    */
> +   if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> +      unsigned flagszs = flags & PIPE_CLEAR_DEPTHSTENCIL;
> +      if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) {
> +         lp_setup_flush(setup, NULL, __FUNCTION__);
>
> +         if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs))
> +            assert(0);
> +      }
> +   }
>
> +   if (flags & PIPE_CLEAR_COLOR) {
> +      assert(PIPE_CLEAR_COLOR0 == (1 << 2));
> +      for (i = 0; i < setup->fb.nr_cbufs; i++) {
> +         if ((flags & (1 << (2 + i))) && setup->fb.cbufs[i]) {
> +            if (!lp_setup_try_clear_colorrb(setup, color, i)) {
> +               lp_setup_flush(setup, NULL, __FUNCTION__);
> +
> +               if (!lp_setup_try_clear_colorrb(setup, color, i))
> +                  assert(0);
> +            }
> +         }
> +      }
> +   }
> +}
>
>
>
> @@ -503,7 +587,7 @@ lp_setup_set_line_state( struct lp_setup_context *setup,
>
>   void
>   lp_setup_set_point_state( struct lp_setup_context *setup,
> -                          float point_size,
> +                          float point_size,
>                             boolean point_size_per_vertex,
>                             uint sprite_coord_enable,
>                             uint sprite_coord_origin)
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> index b3fb24a..6eddd44 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
> +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> @@ -116,7 +116,7 @@ struct lp_setup_context
>
>      struct {
>         unsigned flags;
> -      union lp_rast_cmd_arg color;    /**< lp_rast_clear_color() cmd */
> +      uint32_t color_val[PIPE_MAX_COLOR_BUFS][4];
>         uint64_t zsmask;
>         uint64_t zsvalue;               /**< lp_rast_clear_zstencil() cmd */
>      } clear;
>

Actually, instead of the memcpy(dst, src, 16) calls above, can we just 
make the 'color_val' field above a util_color then use simple 
assignments instead of memcpy()s?

-Brian