Mesa (master): st/mesa: use shader stencil export to accelerate shader drawpixels.

Dave Airlie airlied at kemper.freedesktop.org
Tue Oct 12 23:39:39 UTC 2010


Module: Mesa
Branch: master
Commit: ef8bb7ada98f1ddc8e2554a7336af5d669cb1290
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef8bb7ada98f1ddc8e2554a7336af5d669cb1290

Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Oct  6 09:35:07 2010 +1000

st/mesa: use shader stencil export to accelerate shader drawpixels.

If the pipe driver has shader stencil export we can accelerate DrawPixels
using it. It tries to pick an S8 texture and works its way to X24S8 and S8X24
if that isn't supported.

---

 src/mesa/state_tracker/st_cb_drawpixels.c |  185 ++++++++++++++++++++---------
 src/mesa/state_tracker/st_mesa_to_tgsi.c  |    7 +
 src/mesa/state_tracker/st_program.c       |    9 ++
 src/mesa/state_tracker/st_texture.h       |   14 ++
 4 files changed, 158 insertions(+), 57 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c
index 7e57917..74a95b9 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -168,7 +168,7 @@ combined_drawpix_fragment_program(GLcontext *ctx)
  * \return pointer to the Gallium driver fragment shader
  */
 static void *
-make_fragment_shader_z(struct st_context *st)
+make_fragment_shader_z(struct st_context *st, GLboolean write_depth, GLboolean write_stencil)
 {
    GLcontext *ctx = st->ctx;
    struct gl_program *p;
@@ -185,7 +185,8 @@ make_fragment_shader_z(struct st_context *st)
    if (!p)
       return NULL;
 
-   p->NumInstructions = 3;
+   p->NumInstructions = write_depth ? 2 : 1;
+   p->NumInstructions += write_stencil ? 1 : 0;
 
    p->Instructions = _mesa_alloc_instructions(p->NumInstructions);
    if (!p->Instructions) {
@@ -195,23 +196,29 @@ make_fragment_shader_z(struct st_context *st)
    _mesa_init_instructions(p->Instructions, p->NumInstructions);
 
    /* TEX result.depth, fragment.texcoord[0], texture[0], 2D; */
-   p->Instructions[ic].Opcode = OPCODE_TEX;
-   p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
-   p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
-   p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
-   p->Instructions[ic].TexSrcUnit = 0;
-   p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
-   ic++;
-
-   /* MOV result.color, fragment.color */
-   p->Instructions[ic].Opcode = OPCODE_MOV;
-   p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
-   p->Instructions[ic].DstReg.Index = FRAG_RESULT_COLOR;
-   p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
-   p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_COL0;
-   ic++;
+   if (write_depth) {
+      p->Instructions[ic].Opcode = OPCODE_TEX;
+      p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
+      p->Instructions[ic].DstReg.Index = FRAG_RESULT_DEPTH;
+      p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Z;
+      p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+      p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+      p->Instructions[ic].TexSrcUnit = 0;
+      p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+      ic++;
+   }
+
+   if (write_stencil) {
+      p->Instructions[ic].Opcode = OPCODE_TEX;
+      p->Instructions[ic].DstReg.File = PROGRAM_OUTPUT;
+      p->Instructions[ic].DstReg.Index = FRAG_RESULT_STENCIL;
+      p->Instructions[ic].DstReg.WriteMask = WRITEMASK_Y;
+      p->Instructions[ic].SrcReg[0].File = PROGRAM_INPUT;
+      p->Instructions[ic].SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+      p->Instructions[ic].TexSrcUnit = 1;
+      p->Instructions[ic].TexSrcTarget = TEXTURE_2D_INDEX;
+      ic++;
+   }
 
    /* END; */
    p->Instructions[ic++].Opcode = OPCODE_END;
@@ -219,9 +226,15 @@ make_fragment_shader_z(struct st_context *st)
    assert(ic == p->NumInstructions);
 
    p->InputsRead = FRAG_BIT_TEX0 | FRAG_BIT_COL0;
-   p->OutputsWritten = (1 << FRAG_RESULT_COLOR) | (1 << FRAG_RESULT_DEPTH);
-   p->SamplersUsed = 0x1;  /* sampler 0 (bit 0) is used */
-
+   p->OutputsWritten = 0;
+   if (write_depth)
+      p->OutputsWritten |= (1 << FRAG_RESULT_DEPTH);
+   if (write_stencil)
+      p->OutputsWritten |= (1 << FRAG_RESULT_STENCIL);
+
+   p->SamplersUsed =  0x1;  /* sampler 0 (bit 0) is used */
+   if (write_stencil)
+      p->SamplersUsed |= 1 << 1;
    st->drawpix.z_shader = (struct st_fragment_program *) p;
    st_translate_fragment_program(st, st->drawpix.z_shader);
 
@@ -498,18 +511,19 @@ static void
 draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
                    GLsizei width, GLsizei height,
                    GLfloat zoomX, GLfloat zoomY,
-                   struct pipe_sampler_view *sv,
+                   struct pipe_sampler_view **sv,
+                   int num_sampler_view,
                    void *driver_vp,
                    void *driver_fp,
                    const GLfloat *color,
-                   GLboolean invertTex)
+                   GLboolean invertTex, GLboolean write_depth, GLboolean write_stencil)
 {
    struct st_context *st = st_context(ctx);
    struct pipe_context *pipe = st->pipe;
    struct cso_context *cso = st->cso_context;
    GLfloat x0, y0, x1, y1;
    GLsizei maxSize;
-   boolean normalized = sv->texture->target != PIPE_TEXTURE_RECT;
+   boolean normalized = sv[0]->texture->target != PIPE_TEXTURE_RECT;
 
    /* limit checks */
    /* XXX if DrawPixels image is larger than max texture size, break
@@ -519,6 +533,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    assert(width <= maxSize);
    assert(height <= maxSize);
 
+   cso_save_depth_stencil_alpha(cso);
    cso_save_rasterizer(cso);
    cso_save_viewport(cso);
    cso_save_samplers(cso);
@@ -536,6 +551,24 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       cso_set_rasterizer(cso, &rasterizer);
    }
 
+   if (write_depth || write_stencil)
+   {
+      struct pipe_depth_stencil_alpha_state dsa;
+      memset(&dsa, 0, sizeof(dsa));
+      if (write_depth) {
+	 dsa.depth.enabled = 1;
+	 dsa.depth.func = PIPE_FUNC_ALWAYS;
+	 dsa.depth.writemask = 1;
+      }
+      if (write_stencil) {
+	 dsa.stencil[0].enabled = 1;
+	 dsa.stencil[0].func = PIPE_FUNC_ALWAYS;
+	 dsa.stencil[0].writemask = 0xff;
+	 dsa.stencil[0].zpass_op = PIPE_STENCIL_OP_REPLACE;
+      }
+      cso_set_depth_stencil_alpha(cso, &dsa);
+   }
+
    /* fragment shader state: TEX lookup program */
    cso_set_fragment_shader_handle(cso, driver_fp);
 
@@ -556,7 +589,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
       sampler.normalized_coords = normalized;
 
       cso_single_sampler(cso, 0, &sampler);
-      if (st->pixel_xfer.pixelmap_enabled) {
+      if (num_sampler_view > 1) {
          cso_single_sampler(cso, 1, &sampler);
       }
       cso_single_sampler_done(cso);
@@ -581,15 +614,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    cso_set_vertex_elements(cso, 3, st->velems_util_draw);
 
    /* texture state: */
-   if (st->pixel_xfer.pixelmap_enabled) {
-      struct pipe_sampler_view *sampler_views[2];
-      sampler_views[0] = sv;
-      sampler_views[1] = st->pixel_xfer.pixelmap_sampler_view;
-      cso_set_fragment_sampler_views(cso, 2, sampler_views);
-   }
-   else {
-      cso_set_fragment_sampler_views(cso, 1, &sv);
-   }
+   cso_set_fragment_sampler_views(cso, num_sampler_view, sv);
 
    /* Compute Gallium window coords (y=0=top) with pixel zoom.
     * Recall that these coords are transformed by the current
@@ -609,10 +634,11 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z,
    z = z * 2.0 - 1.0;
 
    draw_quad(ctx, x0, y0, z, x1, y1, color, invertTex,
-             normalized ? ((GLfloat) width / sv->texture->width0) : (GLfloat)width,
-             normalized ? ((GLfloat) height / sv->texture->height0) : (GLfloat)height);
+             normalized ? ((GLfloat) width / sv[0]->texture->width0) : (GLfloat)width,
+             normalized ? ((GLfloat) height / sv[0]->texture->height0) : (GLfloat)height);
 
    /* restore state */
+   cso_restore_depth_stencil_alpha(cso);
    cso_restore_rasterizer(cso);
    cso_restore_viewport(cso);
    cso_restore_samplers(cso);
@@ -780,12 +806,35 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
    void *driver_vp, *driver_fp;
    struct st_context *st = st_context(ctx);
    const GLfloat *color;
-
-   if (format == GL_STENCIL_INDEX ||
-       format == GL_DEPTH_STENCIL) {
-      draw_stencil_pixels(ctx, x, y, width, height, format, type,
-                          unpack, pixels);
-      return;
+   struct pipe_context *pipe = st->pipe;
+   GLboolean write_stencil = GL_FALSE, write_depth = GL_FALSE;
+   struct pipe_sampler_view *sv[2];
+   int num_sampler_view = 1;
+   enum pipe_format stencil_format = PIPE_FORMAT_NONE;
+
+   if (format == GL_DEPTH_STENCIL)
+      write_stencil = write_depth = GL_TRUE;
+   else if (format == GL_STENCIL_INDEX)
+      write_stencil = GL_TRUE;
+   else if (format == GL_DEPTH_COMPONENT)
+      write_depth = GL_TRUE;
+
+   if (write_stencil) {
+      enum pipe_format tex_format;
+      /* can we write to stencil if not fallback */
+      if (!pipe->screen->get_param(pipe->screen, PIPE_CAP_SHADER_STENCIL_EXPORT))
+	 goto stencil_fallback;
+      
+      tex_format = st_choose_format(st->pipe->screen, base_format(format), PIPE_TEXTURE_2D,
+				    0, PIPE_BIND_SAMPLER_VIEW);
+      if (tex_format == PIPE_FORMAT_Z24_UNORM_S8_USCALED)
+	 stencil_format = PIPE_FORMAT_X24S8_USCALED;
+      else if (tex_format == PIPE_FORMAT_S8_USCALED_Z24_UNORM)
+	 stencil_format = PIPE_FORMAT_S8X24_USCALED;
+      else
+	 stencil_format = PIPE_FORMAT_S8_USCALED;
+      if (stencil_format == PIPE_FORMAT_NONE)
+	 goto stencil_fallback;
    }
 
    /* Mesa state should be up to date by now */
@@ -793,8 +842,8 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
 
    st_validate_state(st);
 
-   if (format == GL_DEPTH_COMPONENT) {
-      driver_fp = make_fragment_shader_z(st);
+   if (write_depth || write_stencil) {
+      driver_fp = make_fragment_shader_z(st, write_depth, write_stencil);
       driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
       color = ctx->Current.RasterColor;
    }
@@ -802,6 +851,10 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
       driver_fp = combined_drawpix_fragment_program(ctx);
       driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
       color = NULL;
+      if (st->pixel_xfer.pixelmap_enabled) {
+	  sv[1] = st->pixel_xfer.pixelmap_sampler_view;
+	  num_sampler_view++;
+      }
    }
 
    /* draw with textured quad */
@@ -809,20 +862,32 @@ st_DrawPixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height,
       struct pipe_resource *pt
          = make_texture(st, width, height, format, type, unpack, pixels);
       if (pt) {
-         struct pipe_sampler_view *sv = st_create_texture_sampler_view(st->pipe, pt);
+         sv[0] = st_create_texture_sampler_view(st->pipe, pt);
+
+         if (sv[0]) {
+	    if (write_stencil) {
+	       sv[1] = st_create_texture_sampler_view_format(st->pipe, pt, stencil_format);
+	       num_sampler_view++;
+	    }
 
-         if (sv) {
             draw_textured_quad(ctx, x, y, ctx->Current.RasterPos[2],
                                width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
                                sv,
-                               driver_vp, 
+                               num_sampler_view,
+                               driver_vp,
                                driver_fp,
-                               color, GL_FALSE);
-            pipe_sampler_view_reference(&sv, NULL);
+                               color, GL_FALSE, write_depth, write_stencil);
+            pipe_sampler_view_reference(&sv[0], NULL);
+            if (num_sampler_view > 1)
+               pipe_sampler_view_reference(&sv[1], NULL);
          }
          pipe_resource_reference(&pt, NULL);
       }
    }
+   return;
+stencil_fallback:
+   draw_stencil_pixels(ctx, x, y, width, height, format, type,
+		       unpack, pixels);
 }
 
 
@@ -945,7 +1010,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    struct st_renderbuffer *rbRead;
    void *driver_vp, *driver_fp;
    struct pipe_resource *pt;
-   struct pipe_sampler_view *sv;
+   struct pipe_sampler_view *sv[2];
+   int num_sampler_view = 1;
    GLfloat *color;
    enum pipe_format srcFormat, texFormat;
    GLboolean invertTex = GL_FALSE;
@@ -966,12 +1032,16 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
       color = NULL;
       driver_fp = combined_drawpix_fragment_program(ctx);
       driver_vp = make_passthrough_vertex_shader(st, GL_FALSE);
+      if (st->pixel_xfer.pixelmap_enabled) {
+	  sv[1] = st->pixel_xfer.pixelmap_sampler_view;
+	  num_sampler_view++;
+      }
    }
    else {
       assert(type == GL_DEPTH);
       rbRead = st_renderbuffer(ctx->ReadBuffer->_DepthBuffer);
       color = ctx->Current.Attrib[VERT_ATTRIB_COLOR0];
-      driver_fp = make_fragment_shader_z(st);
+      driver_fp = make_fragment_shader_z(st, GL_TRUE, GL_FALSE);
       driver_vp = make_passthrough_vertex_shader(st, GL_TRUE);
    }
 
@@ -1031,8 +1101,8 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
    if (!pt)
       return;
 
-   sv = st_create_texture_sampler_view(st->pipe, pt);
-   if (!sv) {
+   sv[0] = st_create_texture_sampler_view(st->pipe, pt);
+   if (!sv[0]) {
       pipe_resource_reference(&pt, NULL);
       return;
    }
@@ -1102,13 +1172,14 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy,
     */
    draw_textured_quad(ctx, dstx, dsty, ctx->Current.RasterPos[2],
                       width, height, ctx->Pixel.ZoomX, ctx->Pixel.ZoomY,
-                      sv, 
+                      sv,
+                      num_sampler_view,
                       driver_vp, 
                       driver_fp,
-                      color, invertTex);
+                      color, invertTex, GL_FALSE, GL_FALSE);
 
    pipe_resource_reference(&pt, NULL);
-   pipe_sampler_view_reference(&sv, NULL);
+   pipe_sampler_view_reference(&sv[0], NULL);
 }
 
 
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index 0ed822b..582ca6f 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -1002,6 +1002,13 @@ st_translate_mesa_program(
             t->outputs[i] = ureg_writemask( t->outputs[i],
                                             TGSI_WRITEMASK_Z );
             break;
+         case TGSI_SEMANTIC_STENCIL:
+            t->outputs[i] = ureg_DECL_output( ureg,
+                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
+                                              outputSemanticIndex[i] );
+            t->outputs[i] = ureg_writemask( t->outputs[i],
+                                            TGSI_WRITEMASK_Y );
+            break;
          case TGSI_SEMANTIC_COLOR:
             t->outputs[i] = ureg_DECL_output( ureg,
                                               TGSI_SEMANTIC_COLOR,
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index 18a7bbe..733cdd0 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -398,11 +398,20 @@ st_translate_fragment_program(struct st_context *st,
          outputsWritten &= ~(1 << FRAG_RESULT_DEPTH);
       }
 
+      if (outputsWritten & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
+         fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_STENCIL;
+         fs_output_semantic_index[fs_num_outputs] = 0;
+         outputMapping[FRAG_RESULT_STENCIL] = fs_num_outputs;
+         fs_num_outputs++;
+         outputsWritten &= ~(1 << FRAG_RESULT_STENCIL);
+      }
+
       /* handle remaning outputs (color) */
       for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
          if (outputsWritten & BITFIELD64_BIT(attr)) {
             switch (attr) {
             case FRAG_RESULT_DEPTH:
+            case FRAG_RESULT_STENCIL:
                /* handled above */
                assert(0);
                break;
diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h
index ed5d271..c78901c 100644
--- a/src/mesa/state_tracker/st_texture.h
+++ b/src/mesa/state_tracker/st_texture.h
@@ -134,6 +134,20 @@ st_create_texture_sampler_view(struct pipe_context *pipe,
 
 
 static INLINE struct pipe_sampler_view *
+st_create_texture_sampler_view_format(struct pipe_context *pipe,
+                                      struct pipe_resource *texture,
+                                      enum pipe_format format)
+{
+   struct pipe_sampler_view templ;
+
+   u_sampler_view_default_template(&templ,
+                                   texture,
+                                   format);
+
+   return pipe->create_sampler_view(pipe, texture, &templ);
+}
+
+static INLINE struct pipe_sampler_view *
 st_get_texture_sampler_view(struct st_texture_object *stObj,
                             struct pipe_context *pipe)
 




More information about the mesa-commit mailing list