[Mesa-dev] [PATCH 2/2] llvmpipe: fix clearing of individual color buffers in a fb
Brian Paul
brianp at vmware.com
Fri Apr 18 17:02:33 PDT 2014
Looks good, but some comments below.
On 04/17/2014 04:34 PM, sroland at vmware.com wrote:
> From: Roland Scheidegger <sroland at vmware.com>
>
> GL (3.0) allows you to clear individual color buffers in a fb. In fact
> for fbs containing both int and float/normalized color buffers this is
> required (because the clearing values are otherwise undefined if applied
> to all buffers). The gallium interface was changed a while ago, but llvmpipe
> ignored it (hence doing such individual clears always resulted in clearing
> all buffers, plus some assorted asserts due to the mixed fbs).
> So change the clear command to indicate the buffer to be cleared. Also, because
> indicating the buffer to be cleared would have made lp_rast_arg_cmd larger
> which is unacceptable (we're trying to shrink it some day) allocate the clear
> value in the scene and just pass a pointer.
> There's several advantages and disadvantages here:
> + clearing individual buffers works (we could also actually bin such clears now
> if they'd come through clear_render_target() if the surface is in the current
> fb, though we didn't do this before for the single rb case and still don't try).
> + since there's one clear per rb, we do the format conversion in setup rather
> than per bin. Aside from the (drop in the ocean...) performance advantage this
> means that clearing to very small values (that is, denormal when converted to
> the format) should work for small float (fp16 etc.) formats, as the util code
> couldn't handle it correctly before (because cpu denorms are disabled when
> executing the bin commands, screwing up the magic conversion and flushing
> the values to 0, though this was not verified).
> - there's some overhead for traditional old-style clear-all MRT cases, since
> there's one rast clear command per rb instead of one for all rbs.
>
> This fixes https://bugs.freedesktop.org/show_bug.cgi?id=76976.
> ---
> src/gallium/drivers/llvmpipe/lp_rast.c | 122 ++++---------
> src/gallium/drivers/llvmpipe/lp_rast.h | 8 +-
> src/gallium/drivers/llvmpipe/lp_scene.c | 1 -
> src/gallium/drivers/llvmpipe/lp_scene.h | 1 -
> src/gallium/drivers/llvmpipe/lp_setup.c | 216 ++++++++++++++++-------
> src/gallium/drivers/llvmpipe/lp_setup_context.h | 2 +-
> 6 files changed, 189 insertions(+), 161 deletions(-)
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
> index 0ae5976..8667a6c 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.c
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.c
> @@ -110,25 +110,6 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
>
>
> /**
> - * Examine a framebuffer object to determine if any of the colorbuffers
> - * use a pure integer format.
> - * XXX this could be a gallium utility function if useful elsewhere.
> - */
> -static boolean
> -is_fb_pure_integer(const struct pipe_framebuffer_state *fb)
> -{
> - unsigned i;
> - for (i = 0; i < fb->nr_cbufs; i++) {
> - if (fb->cbufs[i] &&
> - util_format_is_pure_integer(fb->cbufs[i]->format)) {
> - return TRUE;
> - }
> - }
> - return FALSE;
> -}
> -
> -
> -/**
> * Clear the rasterizer's current color tile.
> * This is a bin command called during bin processing.
> * Clear commands always clear all bound layers.
> @@ -138,81 +119,40 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
> const union lp_rast_cmd_arg arg)
> {
> const struct lp_scene *scene = task->scene;
> + unsigned cbuf = arg.clear_rb->cbuf;
> + union util_color uc;
> + enum pipe_format format;
>
> - if (scene->fb.nr_cbufs) {
> - unsigned i;
> - union util_color uc;
> -
> - if (is_fb_pure_integer(&scene->fb)) {
> - /*
> - * We expect int/uint clear values here, though some APIs
> - * might disagree (but in any case util_pack_color()
> - * couldn't handle it)...
> - */
> - LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
> - arg.clear_color.ui[0],
> - arg.clear_color.ui[1],
> - arg.clear_color.ui[2],
> - arg.clear_color.ui[3]);
> -
> - for (i = 0; i < scene->fb.nr_cbufs; i++) {
> - enum pipe_format format = scene->fb.cbufs[i]->format;
> -
> - if (util_format_is_pure_sint(format)) {
> - util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
> - }
> - else {
> - assert(util_format_is_pure_uint(format));
> - util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
> - }
> -
> - util_fill_box(scene->cbufs[i].map,
> - format,
> - scene->cbufs[i].stride,
> - scene->cbufs[i].layer_stride,
> - task->x,
> - task->y,
> - 0,
> - task->width,
> - task->height,
> - scene->fb_max_layer + 1,
> - &uc);
> - }
> - }
> - else {
> - uint8_t clear_color[4];
> + /* we never bin clear commands for non-existing buffers */
> + assert(cbuf < scene->fb.nr_cbufs);
> + assert(scene->fb.cbufs[cbuf]);
>
> - for (i = 0; i < 4; ++i) {
> - clear_color[i] = float_to_ubyte(arg.clear_color.f[i]);
> - }
> -
> - LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__,
> - clear_color[0],
> - clear_color[1],
> - clear_color[2],
> - clear_color[3]);
> -
> - for (i = 0; i < scene->fb.nr_cbufs; i++) {
> - if (scene->fb.cbufs[i]) {
> - util_pack_color(arg.clear_color.f,
> - scene->fb.cbufs[i]->format, &uc);
> -
> - util_fill_box(scene->cbufs[i].map,
> - scene->fb.cbufs[i]->format,
> - scene->cbufs[i].stride,
> - scene->cbufs[i].layer_stride,
> - task->x,
> - task->y,
> - 0,
> - task->width,
> - task->height,
> - scene->fb_max_layer + 1,
> - &uc);
> - }
> - }
> - }
> - }
> + format = scene->fb.cbufs[cbuf]->format;
> + memcpy(&uc, &arg.clear_rb->color_val[0], 16);
>
> + /*
> + * this is pretty rough since we have target format (bunch of bytes...) here.
> + * dump it as raw 4 dwords.
> + */
> + LP_DBG(DEBUG_RAST, "%s clear value (target format %d) raw 0x%x,0x%x,0x%x,0x%x\n",
> + __FUNCTION__, format,
> + arg.clear_rb->color_val[0], arg.clear_rb->color_val[1],
> + arg.clear_rb->color_val[2], arg.clear_rb->color_val[3]);
> +
> +
> + util_fill_box(scene->cbufs[cbuf].map,
> + format,
> + scene->cbufs[cbuf].stride,
> + scene->cbufs[cbuf].layer_stride,
> + task->x,
> + task->y,
> + 0,
> + task->width,
> + task->height,
> + scene->fb_max_layer + 1,
> + &uc);
> +
> + /* this will increase for each rb which probably doesn't mean much */
> LP_COUNT(nr_color_tile_clear);
> }
>
> diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
> index e2a9ec2..21b1bd0 100644
> --- a/src/gallium/drivers/llvmpipe/lp_rast.h
> +++ b/src/gallium/drivers/llvmpipe/lp_rast.h
> @@ -136,6 +136,12 @@ struct lp_rast_triangle {
> };
>
>
> +struct lp_rast_clear_rb {
> + unsigned cbuf;
> + uint32_t color_val[4];
> +};
> +
> +
> #define GET_A0(inputs) ((float (*)[4])((inputs)+1))
> #define GET_DADX(inputs) ((float (*)[4])((char *)((inputs) + 1) + (inputs)->stride))
> #define GET_DADY(inputs) ((float (*)[4])((char *)((inputs) + 1) + 2 * (inputs)->stride))
> @@ -164,7 +170,7 @@ union lp_rast_cmd_arg {
> unsigned plane_mask;
> } triangle;
> const struct lp_rast_state *set_state;
> - union pipe_color_union clear_color;
> + const struct lp_rast_clear_rb *clear_rb;
> struct {
> uint64_t value;
> uint64_t mask;
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
> index 9ba5f1a..9a8df27 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.c
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.c
> @@ -292,7 +292,6 @@ lp_scene_end_rasterization(struct lp_scene *scene )
> scene->scene_size = 0;
> scene->resource_reference_size = 0;
>
> - scene->has_depthstencil_clear = FALSE;
> scene->alloc_failed = FALSE;
>
> util_unreference_framebuffer_state( &scene->fb );
> diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
> index 94b1779..19a3811 100644
> --- a/src/gallium/drivers/llvmpipe/lp_scene.h
> +++ b/src/gallium/drivers/llvmpipe/lp_scene.h
> @@ -165,7 +165,6 @@ struct lp_scene {
> unsigned resource_reference_size;
>
> boolean alloc_failed;
> - boolean has_depthstencil_clear;
> boolean discard;
> /**
> * Number of active tiles in each dimension.
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
> index b4ce925..ee200c5 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup.c
> +++ b/src/gallium/drivers/llvmpipe/lp_setup.c
> @@ -202,25 +202,38 @@ begin_binning( struct lp_setup_context *setup )
> util_format_is_depth_and_stencil(setup->fb.zsbuf->format))
> need_zsload = TRUE;
>
> - LP_DBG(DEBUG_SETUP, "%s color: %s depth: %s\n", __FUNCTION__,
> - (setup->clear.flags & PIPE_CLEAR_COLOR) ? "clear": "load",
> + LP_DBG(DEBUG_SETUP, "%s color clear bufs: %x depth: %s\n", __FUNCTION__,
> + setup->clear.flags >> 2,
> need_zsload ? "clear": "load");
>
> - if (setup->fb.nr_cbufs) {
> - if (setup->clear.flags & PIPE_CLEAR_COLOR) {
> - ok = lp_scene_bin_everywhere( scene,
> - LP_RAST_OP_CLEAR_COLOR,
> - setup->clear.color );
> - if (!ok)
> - return FALSE;
> + if (setup->clear.flags & PIPE_CLEAR_COLOR) {
> + unsigned cbuf;
> + for (cbuf = 0; cbuf < setup->fb.nr_cbufs; cbuf++) {
> + assert(PIPE_CLEAR_COLOR0 == 1 << 2);
> + if (setup->clear.flags & (1 << (2 + cbuf))) {
> + union lp_rast_cmd_arg clearrb_arg;
> + struct lp_rast_clear_rb *cc_scene =
> + (struct lp_rast_clear_rb *)
> + lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
> +
> + if (!cc_scene) {
> + return FALSE;
> + }
> +
> + cc_scene->cbuf = cbuf;
> + memcpy(cc_scene->color_val, &setup->clear.color_val[cbuf][0], 16);
> + clearrb_arg.clear_rb = cc_scene;
> +
> + if (!lp_scene_bin_everywhere(scene,
> + LP_RAST_OP_CLEAR_COLOR,
> + clearrb_arg))
> + return FALSE;
> + }
> }
> }
>
> if (setup->fb.zsbuf) {
> if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) {
> - if (!need_zsload)
> - scene->has_depthstencil_clear = TRUE;
> -
> ok = lp_scene_bin_everywhere( scene,
> LP_RAST_OP_CLEAR_ZSTENCIL,
> lp_rast_arg_clearzs(
> @@ -368,40 +381,105 @@ lp_setup_bind_framebuffer( struct lp_setup_context *setup,
>
>
> static boolean
> -lp_setup_try_clear( struct lp_setup_context *setup,
> - const union pipe_color_union *color,
> - double depth,
> - unsigned stencil,
> - unsigned flags )
> +lp_setup_try_clear_colorrb(struct lp_setup_context *setup,
> + const union pipe_color_union *color,
> + unsigned cbuf)
Maybe call this lp_setup_try_clear_color_buffer() and put a comment on it?
> {
> - uint64_t zsmask = 0;
> - uint64_t zsvalue = 0;
> - union lp_rast_cmd_arg color_arg;
> - unsigned i;
> + union lp_rast_cmd_arg clearrb_arg;
> + union util_color uc;
> + enum pipe_format format = setup->fb.cbufs[cbuf]->format;
>
> LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
>
> - if (flags & PIPE_CLEAR_COLOR) {
> - for (i = 0; i < 4; i++)
> - color_arg.clear_color.i[i] = color->i[i];
> + if (util_format_is_pure_integer(format)) {
> + /*
> + * We expect int/uint clear values here, though some APIs
> + * might disagree (but in any case util_pack_color()
> + * couldn't handle it)...
> + */
> + if (util_format_is_pure_sint(format)) {
> + util_format_write_4i(format, color->i, 0, &uc, 0, 0, 0, 1, 1);
> + }
> + else {
> + assert(util_format_is_pure_uint(format));
> + util_format_write_4ui(format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
> + }
> + }
> + else {
> + util_pack_color(color->f, format, &uc);
> }
>
> - if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> - uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
> - uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
> + /* double formats won't work anyway, we just copy 16 bytes */
> + assert(util_format_get_blocksizebits(format) <= 128);
> +
> + if (setup->state == SETUP_ACTIVE) {
> + struct lp_scene *scene = setup->scene;
> +
> + /* Add the clear to existing scene. In the unusual case where
> + * both color and depth-stencil are being cleared when there's
> + * already been some rendering, we could discard the currently
> + * binned scene and start again, but I don't see that as being
> + * a common usage.
> + */
> + struct lp_rast_clear_rb *cc_scene =
> + (struct lp_rast_clear_rb *)
> + lp_scene_alloc(scene, sizeof(struct lp_rast_clear_rb));
>
> - zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
> - depth,
> - stencil);
> + if (!cc_scene) {
> + return FALSE;
> + }
>
> + cc_scene->cbuf = cbuf;
> + memcpy(cc_scene->color_val, &uc, 16);
s/16/sizeof(uc)/
> + clearrb_arg.clear_rb = cc_scene;
>
> - zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
> - zmask,
> - smask);
> + if (!lp_scene_bin_everywhere(scene,
> + LP_RAST_OP_CLEAR_COLOR,
> + clearrb_arg))
> + return FALSE;
> + }
> + else {
> + /* Put ourselves into the 'pre-clear' state, specifically to try
> + * and accumulate multiple clears to color and depth_stencil
> + * buffers which the app or state-tracker might issue
> + * separately.
> + */
> + set_scene_state( setup, SETUP_CLEARED, __FUNCTION__ );
>
> - zsvalue &= zsmask;
> + assert(PIPE_CLEAR_COLOR0 == (1 << 2));
> + setup->clear.flags |= 1 << (cbuf + 2);
> + memcpy(&setup->clear.color_val[cbuf][0], &uc, 16);
s/16/sizeof(uc)/
or, see my last comment at the end.
> }
>
> + return TRUE;
> +}
> +
> +static boolean
> +lp_setup_try_clear_zs(struct lp_setup_context *setup,
> + double depth,
> + unsigned stencil,
> + unsigned flags)
> +{
> + uint64_t zsmask = 0;
> + uint64_t zsvalue = 0;
> + uint32_t zmask32;
> + uint8_t smask8;
> +
> + LP_DBG(DEBUG_SETUP, "%s state %d\n", __FUNCTION__, setup->state);
> +
> + zmask32 = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
> + smask8 = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
> +
> + zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
> + depth,
> + stencil);
> +
> + zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
> + zmask32,
> + smask8);
> +
> + zsvalue &= zsmask;
> +
> if (setup->state == SETUP_ACTIVE) {
> struct lp_scene *scene = setup->scene;
>
> @@ -411,19 +489,10 @@ lp_setup_try_clear( struct lp_setup_context *setup,
> * binned scene and start again, but I don't see that as being
> * a common usage.
> */
> - if (flags & PIPE_CLEAR_COLOR) {
> - if (!lp_scene_bin_everywhere( scene,
> - LP_RAST_OP_CLEAR_COLOR,
> - color_arg ))
> - return FALSE;
> - }
> -
> - if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> - if (!lp_scene_bin_everywhere( scene,
> - LP_RAST_OP_CLEAR_ZSTENCIL,
> - lp_rast_arg_clearzs(zsvalue, zsmask) ))
> - return FALSE;
> - }
> + if (!lp_scene_bin_everywhere(scene,
> + LP_RAST_OP_CLEAR_ZSTENCIL,
> + lp_rast_arg_clearzs(zsvalue, zsmask)))
> + return FALSE;
> }
> else {
> /* Put ourselves into the 'pre-clear' state, specifically to try
> @@ -435,19 +504,11 @@ lp_setup_try_clear( struct lp_setup_context *setup,
>
> setup->clear.flags |= flags;
>
> - if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> - setup->clear.zsmask |= zsmask;
> - setup->clear.zsvalue =
> - (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
> - }
> -
> - if (flags & PIPE_CLEAR_COLOR) {
> - memcpy(&setup->clear.color.clear_color,
> - &color_arg,
> - sizeof setup->clear.color.clear_color);
> - }
> + setup->clear.zsmask |= zsmask;
> + setup->clear.zsvalue =
> + (setup->clear.zsvalue & ~zsmask) | (zsvalue & zsmask);
> }
> -
> +
> return TRUE;
> }
>
> @@ -458,15 +519,38 @@ lp_setup_clear( struct lp_setup_context *setup,
> unsigned stencil,
> unsigned flags )
> {
> - if (!lp_setup_try_clear( setup, color, depth, stencil, flags )) {
> - lp_setup_flush(setup, NULL, __FUNCTION__);
> + unsigned i;
>
> - if (!lp_setup_try_clear( setup, color, depth, stencil, flags ))
> - assert(0);
> - }
> -}
> + /*
> + * Note any of these (max 9) clears could fail (but at most there should
> + * be just one failure!). This avoids doing the previous succeeded
> + * clears again (we still clear tiles twice if a clear command succeeded
> + * partially for one buffer).
> + */
> + if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
> + unsigned flagszs = flags & PIPE_CLEAR_DEPTHSTENCIL;
> + if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs)) {
> + lp_setup_flush(setup, NULL, __FUNCTION__);
>
> + if (!lp_setup_try_clear_zs(setup, depth, stencil, flagszs))
> + assert(0);
> + }
> + }
>
> + if (flags & PIPE_CLEAR_COLOR) {
> + assert(PIPE_CLEAR_COLOR0 == (1 << 2));
> + for (i = 0; i < setup->fb.nr_cbufs; i++) {
> + if ((flags & (1 << (2 + i))) && setup->fb.cbufs[i]) {
> + if (!lp_setup_try_clear_colorrb(setup, color, i)) {
> + lp_setup_flush(setup, NULL, __FUNCTION__);
> +
> + if (!lp_setup_try_clear_colorrb(setup, color, i))
> + assert(0);
> + }
> + }
> + }
> + }
> +}
>
>
>
> @@ -503,7 +587,7 @@ lp_setup_set_line_state( struct lp_setup_context *setup,
>
> void
> lp_setup_set_point_state( struct lp_setup_context *setup,
> - float point_size,
> + float point_size,
> boolean point_size_per_vertex,
> uint sprite_coord_enable,
> uint sprite_coord_origin)
> diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> index b3fb24a..6eddd44 100644
> --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
> +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
> @@ -116,7 +116,7 @@ struct lp_setup_context
>
> struct {
> unsigned flags;
> - union lp_rast_cmd_arg color; /**< lp_rast_clear_color() cmd */
> + uint32_t color_val[PIPE_MAX_COLOR_BUFS][4];
> uint64_t zsmask;
> uint64_t zsvalue; /**< lp_rast_clear_zstencil() cmd */
> } clear;
>
Actually, instead of the memcpy(dst, src, 16) calls above, can we just
make the 'color_val' field above a util_color then use simple
assignments instead of memcpy()s?
-Brian
More information about the mesa-dev
mailing list