[Mesa-dev] gallium: don't fallback on buffers clears for capable hw

Roland Scheidegger sroland at vmware.com
Mon Apr 12 11:52:11 PDT 2010


On 10.04.2010 19:52, Christoph Bumiller wrote:
> Hi,
> this might not be the best implementation, but I thought I'd add a patch
> to the suggestion.
> 
> Basically I want nv50 to do be able to do hw clears in gallium even if
> scissors or color masks are active, since it's nicer than drawing a quad,
> especially for the HiZ buffer.
> 
> The state change to deactivate scissors, should the state tracker not
> want them to affect clear, is probably less expensive than changing
> shaders and whipping up a vertex buffer.

I think this is generally a good idea, though I've got a couple of comments.
I think separating depth/stencil clears is a good idea, nowadays hw
often really has two hw buffers for them and can clear them separately
even with special path.
I don't especially like the cap PIPE_CAP_OPENGL_CLEAR, neither the name
(is opengl really the only api which would want to do that?) nor the
concept. What about having two clear functions instead, which would both
be mandatory for drivers but they could just use a util default one if
they don't want to implement for instance only the more complicated one not?
Also, as Keith has mentioned, we'd probably wanted to include the color
buffers to clear in the call itself rather than relying on the driver to
 trying to figure it out from the currently enabled buffers (and for
complicated case maybe the scissor box too as well as write masks?)
And, are you sure that in your nv50 patch you actually handle all state
correctly? I only see scissors, not color or stencil write masks.

I think I'm not convinced though that a "clear" including scissors,
stencil and color write masks really is a "clear" and not effectively a
"draw quad" for most hardware any more. I know that at least for old
radeons, clears using stencil write masks would be impossible with using
specialized path (as that would just set the per-tile depth/stencil regs
to "cleared" - nowadays I think they have separate flags for stencil and
depth clear but I'd be surprised if you could specify which bits are
cleared). Old radeons didn't have special color clears so obviously
color write masks were a non-issue. But scissoring again would be a
problem - though presumably you could still write a optimized
depth/stencil clear, by using register clearing for the tiles completely
inside the scissor and drawing (max) 4 quads for the tiles at the tile
boundaries. I bet that would be faster than a simple quad draw but
probably too complicated to implement that you'd bother...
If nvidia hw can do this though I wonder how their fast-z clear actually
works, or if there's some other hardware which can really do anything
like that (though I'm not sure what the bits the driver is using
currently actually means, for all I know the NV50TCL_CLEAR_BUFFERS clear
could just mean "draw a quad with default clear shader"...".


Hmm...


Roland


> 
> Christoph
> 
> 
> 
> ------------------------------------------------------------------------
> 
> From d39e09177ee8678349553d7204850c97a0cfd700 Mon Sep 17 00:00:00 2001
> From: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
> Date: Sat, 10 Apr 2010 19:40:28 +0200
> Subject: [PATCH 1/2] gallium: let drivers expose a fully capable clear function
> 
> ---
>  src/gallium/include/pipe/p_context.h |    6 +++-
>  src/gallium/include/pipe/p_defines.h |    5 +++-
>  src/mesa/state_tracker/st_cb_clear.c |   50 ++++++++++++++++++++++++++++++++-
>  src/mesa/state_tracker/st_cb_clear.h |    2 +-
>  src/mesa/state_tracker/st_context.c  |    8 +++--
>  src/mesa/state_tracker/st_context.h  |    3 +-
>  6 files changed, 65 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h
> index 1aa0cbe..7d1ebaf 100644
> --- a/src/gallium/include/pipe/p_context.h
> +++ b/src/gallium/include/pipe/p_context.h
> @@ -260,7 +260,11 @@ struct pipe_context {
>  
>     /**
>      * Clear the specified set of currently bound buffers to specified values.
> -    * The entire buffers are cleared (no scissor, no colormask, etc).
> +    * If the driver doesn't expose PIPE_CAP_OPENGL_CLEAR, the entire buffers
> +    * will be cleared (no scissor, no colormask, etc).
> +    *
> +    * If PIPE_CAP_OPENGL_CLEAR is supported, the driver will apply currently
> +    * bound scissors, color and stencil masks.
>      *
>      * \param buffers  bitfield of PIPE_CLEAR_* values.
>      * \param rgba  pointer to an array of one float for each of r, g, b, a.
> diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
> index 0a4bd58..c698403 100644
> --- a/src/gallium/include/pipe/p_defines.h
> +++ b/src/gallium/include/pipe/p_defines.h
> @@ -189,8 +189,10 @@ enum pipe_texture_target {
>   */
>  /** All color buffers currently bound */
>  #define PIPE_CLEAR_COLOR        (1 << 0)
> +#define PIPE_CLEAR_DEPTH        (1 << 1)
> +#define PIPE_CLEAR_STENCIL      (1 << 2)
>  /** Depth/stencil combined */
> -#define PIPE_CLEAR_DEPTHSTENCIL (1 << 1)
> +#define PIPE_CLEAR_DEPTHSTENCIL (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)
>  
>  
>  /**
> @@ -453,6 +455,7 @@ enum pipe_transfer_usage {
>  #define PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT 37
>  #define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER 38
>  #define PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER 39
> +#define PIPE_CAP_OPENGL_CLEAR            40
>  
>  
>  /**
> diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c
> index eb9ba05..b74d755 100644
> --- a/src/mesa/state_tracker/st_cb_clear.c
> +++ b/src/mesa/state_tracker/st_cb_clear.c
> @@ -539,9 +539,55 @@ st_Clear(GLcontext *ctx, GLbitfield mask)
>                              ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
>  }
>  
> +static void
> +st_Clear_no_fallback(GLcontext *ctx, GLbitfield mask)
> +{
> +   struct st_context *st = ctx->st;
> +   struct gl_renderbuffer *depthRb
> +      = ctx->DrawBuffer->Attachment[BUFFER_DEPTH].Renderbuffer;
> +   struct gl_renderbuffer *stencilRb
> +      = ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
> +   unsigned clear_buffers = 0x0;
> +   GLuint i;
> +
> +   /* This makes sure the pipe has the latest scissor, etc values */
> +   st_validate_state( st );
> +
> +   if (mask & BUFFER_BITS_COLOR) {
> +      for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
> +         GLuint b = ctx->DrawBuffer->_ColorDrawBufferIndexes[i];
> +
> +         if (mask & (1 << b)) {
> +            struct gl_renderbuffer *rb
> +               = ctx->DrawBuffer->Attachment[b].Renderbuffer;
> +
> +            assert(rb);
> +            if (st_renderbuffer(rb)->surface)
> +               clear_buffers |= PIPE_CLEAR_COLOR;
> +         }
> +      }
> +   }
> +
> +   /* separate depth/stencil clears */
> +   if (mask & BUFFER_BIT_DEPTH)
> +         if (st_renderbuffer(depthRb)->surface)
> +            clear_buffers |= PIPE_CLEAR_DEPTH;
> +
> +   if (mask & BUFFER_BIT_STENCIL)
> +         if (st_renderbuffer(stencilRb)->surface)
> +            clear_buffers |= PIPE_CLEAR_STENCIL;
> +
> +   st->pipe->clear(st->pipe, clear_buffers, ctx->Color.ClearColor,
> +                   ctx->Depth.Clear, ctx->Stencil.Clear);
> +
> +   if (mask & BUFFER_BIT_ACCUM)
> +      st_clear_accum_buffer(ctx,
> +                            ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer);
> +}
> +
>  
>  void
> -st_init_clear_functions(struct dd_function_table *functions)
> +st_init_clear_functions(struct dd_function_table *functions, boolean cap)
>  {
> -   functions->Clear = st_Clear;
> +   functions->Clear = cap ? st_Clear_no_fallback : st_Clear;
>  }
> diff --git a/src/mesa/state_tracker/st_cb_clear.h b/src/mesa/state_tracker/st_cb_clear.h
> index bc035ac..edac611 100644
> --- a/src/mesa/state_tracker/st_cb_clear.h
> +++ b/src/mesa/state_tracker/st_cb_clear.h
> @@ -42,7 +42,7 @@ st_flush_clear(struct st_context *st);
>  
>  
>  extern void
> -st_init_clear_functions(struct dd_function_table *functions);
> +st_init_clear_functions(struct dd_function_table *functions, boolean cap);
>  
>  
>  #endif /* ST_CB_CLEAR_H */
> diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
> index 0a1503f..be0952c 100644
> --- a/src/mesa/state_tracker/st_context.c
> +++ b/src/mesa/state_tracker/st_context.c
> @@ -198,7 +198,7 @@ struct st_context *st_create_context(struct pipe_context *pipe,
>     struct dd_function_table funcs;
>  
>     memset(&funcs, 0, sizeof(funcs));
> -   st_init_driver_functions(&funcs);
> +   st_init_driver_functions(&funcs, pipe->screen);
>  
>     ctx = _mesa_create_context(visual, shareCtx, &funcs, NULL);
>  
> @@ -338,7 +338,8 @@ st_proc st_get_proc_address(const char *procname)
>  
>  
>  
> -void st_init_driver_functions(struct dd_function_table *functions)
> +void st_init_driver_functions(struct dd_function_table *functions,
> +                              struct pipe_screen *screen)
>  {
>     _mesa_init_glsl_driver_functions(functions);
>  
> @@ -349,7 +350,8 @@ void st_init_driver_functions(struct dd_function_table *functions)
>     st_init_blit_functions(functions);
>  #endif
>     st_init_bufferobject_functions(functions);
> -   st_init_clear_functions(functions);
> +   st_init_clear_functions(functions,
> +                           screen->get_param(screen, PIPE_CAP_OPENGL_CLEAR));
>  #if FEATURE_drawpix
>     st_init_bitmap_functions(functions);
>     st_init_drawpixels_functions(functions);
> diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
> index f28d5aa..3ff4c45 100644
> --- a/src/mesa/state_tracker/st_context.h
> +++ b/src/mesa/state_tracker/st_context.h
> @@ -218,7 +218,8 @@ struct st_framebuffer
>  };
>  
>  
> -extern void st_init_driver_functions(struct dd_function_table *functions);
> +extern void st_init_driver_functions(struct dd_function_table *functions,
> +                                     struct pipe_screen *screen);
>  
>  void st_invalidate_state(GLcontext * ctx, GLuint new_state);
>  
> 
> 
> ------------------------------------------------------------------------
> 
> From b3ff968e57f58121e90515e2dafd5d7ec15e7792 Mon Sep 17 00:00:00 2001
> From: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
> Date: Sat, 10 Apr 2010 19:41:52 +0200
> Subject: [PATCH 2/2] nv50: support PIPE_CAP_OPENGL_CLEAR
> 
> ---
>  src/gallium/drivers/nv50/nv50_clear.c  |   46 ++++++++++++++++++++-----------
>  src/gallium/drivers/nv50/nv50_screen.c |    6 ++++
>  2 files changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c
> index 5447904..d86f91b 100644
> --- a/src/gallium/drivers/nv50/nv50_clear.c
> +++ b/src/gallium/drivers/nv50/nv50_clear.c
> @@ -26,6 +26,10 @@
>  
>  #include "nv50_context.h"
>  
> +/* don't need NEW_BLEND, NV50TCL_COLOR_MASK doesn't affect CLEAR_BUFFERS */
> +#define NV50_CLEAR_BUFFERS_STATE \
> + (NV50_NEW_FRAMEBUFFER | NV50_NEW_RASTERIZER | NV50_NEW_ZSA | NV50_NEW_SCISSOR)
> +
>  void
>  nv50_clear(struct pipe_context *pipe, unsigned buffers,
>  	   const float *rgba, double depth, unsigned stencil)
> @@ -35,37 +39,47 @@ nv50_clear(struct pipe_context *pipe, unsigned buffers,
>  	struct nouveau_grobj *tesla = nv50->screen->tesla;
>  	struct pipe_framebuffer_state *fb = &nv50->framebuffer;
>  	unsigned mode = 0, i;
> -	const unsigned dirty = nv50->dirty;
> +	const unsigned dirty = nv50->dirty & ~NV50_CLEAR_BUFFERS_STATE;
>  
> -	/* don't need NEW_BLEND, NV50TCL_COLOR_MASK doesn't affect CLEAR_BUFFERS */
> -	nv50->dirty &= NV50_NEW_FRAMEBUFFER | NV50_NEW_SCISSOR;
> +	nv50->dirty &= NV50_CLEAR_BUFFERS_STATE;
>  	if (!nv50_state_validate(nv50, 64))
>  		return;
>  
> -	if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
> +	if (buffers & PIPE_CLEAR_COLOR) {
> +		assert(fb->nr_cbufs);
>  		BEGIN_RING(chan, tesla, NV50TCL_CLEAR_COLOR(0), 4);
> -		OUT_RING  (chan, fui(rgba[0]));
> -		OUT_RING  (chan, fui(rgba[1]));
> -		OUT_RING  (chan, fui(rgba[2]));
> -		OUT_RING  (chan, fui(rgba[3]));
> -		mode |= 0x3c;
> +		OUT_RINGf (chan, rgba[0]);
> +		OUT_RINGf (chan, rgba[1]);
> +		OUT_RINGf (chan, rgba[2]);
> +		OUT_RINGf (chan, rgba[3]);
>  	}
>  
> -	if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
> +	if (buffers & PIPE_CLEAR_DEPTH) {
>  		BEGIN_RING(chan, tesla, NV50TCL_CLEAR_DEPTH, 1);
> -		OUT_RING  (chan, fui(depth));
> +		OUT_RINGf (chan, depth);
> +		mode |= NV50TCL_CLEAR_BUFFERS_Z;
> +	}
> +	if (buffers & PIPE_CLEAR_STENCIL) {
>  		BEGIN_RING(chan, tesla, NV50TCL_CLEAR_STENCIL, 1);
>  		OUT_RING  (chan, stencil & 0xff);
> +		mode |= NV50TCL_CLEAR_BUFFERS_S;
> +	}
>  
> -		mode |= 0x03;
> +	if (!(buffers & PIPE_CLEAR_COLOR)) {
> +		BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
> +		OUT_RING  (chan, mode);
> +		nv50->dirty = dirty;
> +		return;
>  	}
>  
> -	BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
> -	OUT_RING  (chan, mode);
> +	for (i = 0; i < fb->nr_cbufs; ++i, mode = 0) {
> +		if (nv50->blend->pipe.independent_blend_enable)
> +			mode |= nv50->blend->pipe.rt[i].colormask << 2;
> +		else
> +			mode |= nv50->blend->pipe.rt[0].colormask << 2;
>  
> -	for (i = 1; i < fb->nr_cbufs; i++) {
>  		BEGIN_RING(chan, tesla, NV50TCL_CLEAR_BUFFERS, 1);
> -		OUT_RING  (chan, (i << 6) | 0x3c);
> +		OUT_RING  (chan, (i << 6) | mode);
>  	}
>  	nv50->dirty = dirty;
>  }
> diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
> index a0fe4c5..f7db403 100644
> --- a/src/gallium/drivers/nv50/nv50_screen.c
> +++ b/src/gallium/drivers/nv50/nv50_screen.c
> @@ -148,6 +148,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, int param)
>  	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
>  	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
>  		return 0;
> +	case PIPE_CAP_OPENGL_CLEAR:
> +		return 1;
>  	default:
>  		NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param);
>  		return 0;
> @@ -397,6 +399,10 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
>  	BEGIN_RING(chan, screen->tesla, NV50TCL_VP_REG_ALLOC_RESULT, 1);
>  	OUT_RING  (chan, 8);
>  
> +	/* let CLEAR_BUFFERS honour SCISSOR_* and STENCIL_FRONT_MASK */
> +	BEGIN_RING(chan, screen->tesla, 0x143c, 1);
> +	OUT_RING  (chan, 0x01);
> +
>  	/* constant buffers for immediates and VP/FP parameters */
>  	ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, (32 * 4) * 4,
>  			     &screen->constbuf_misc[0]);
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list