[Mesa-dev] [PATCH 21/22] meta/blit: Use a single, master shader template for MSAA-SS blits

Ian Romanick idr at freedesktop.org
Thu Feb 18 01:58:14 UTC 2016


From: Ian Romanick <ian.d.romanick at intel.com>

This changes the text of the shader quite significantly, but it should
produce the same (or nearly same) code.  This makes the shader much
easier to understand because the code is now all in one place instead of
being scattered about the C code.  This makes it much easier to
implement the optimization in the next patch...

NOTE: The current code is limited to 32 samples.  Almost everywhere in
OpenGL uses an integer mask for samples, so a lot of stuff will need to
change to support > 32 samples.

Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
 src/mesa/drivers/common/meta_blit.c | 67 ++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 30 deletions(-)

diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index 355c937..28aabd3 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -518,10 +518,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                      tex_coords);
 
       } else {
-         char *sample_resolve;
-         int i;
-         int step;
-
          /* We're assuming power of two samples for this resolution procedure.
           *
           * To avoid losing any floating point precision if the samples all
@@ -530,26 +526,7 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
           * doing a naive sum and dividing.
           */
          assert(_mesa_is_pow_two(samples));
-         /* Fetch each individual sample. */
-         sample_resolve = rzalloc_size(mem_ctx, 1);
-         for (i = 0; i < samples; i++) {
-            ralloc_asprintf_append(&sample_resolve,
-                                   "   gvec4 sample_1_%d = texelFetch(texSampler, i%s(texCoords), %d);\n",
-                                   i, texcoord_type, i);
-         }
-         /* Now, merge each pair of samples, then merge each pair of those,
-          * etc.
-          */
-         for (step = 2; step <= samples; step *= 2) {
-            for (i = 0; i < samples; i += step) {
-               ralloc_asprintf_append(&sample_resolve,
-                                      "   gvec4 sample_%d_%d = merge(sample_%d_%d, sample_%d_%d);\n",
-                                      step, i,
-                                      step / 2, i,
-                                      step / 2, i + step / 2);
-            }
-         }
-
+         assert(samples <= 32);
          fs_source = ralloc_asprintf(mem_ctx,
                                      "#version 130\n"
                                      "#extension GL_ARB_texture_multisample: require\n"
@@ -557,30 +534,60 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                                      "uniform %ssampler2DMS%s texSampler;\n"
                                      "in %s texCoords;\n"
                                      "out gvec4 out_color[%u];\n"
+                                     "#define SAMPLES %d\n"
+                                     "#define S s%d\n"
                                      "\n"
                                      "uvec4 merge(uvec4 a, uvec4 b) { return (a >> 1) + (b >> 1) + (a & b & 1u); }\n"
                                      "ivec4 merge(ivec4 a, ivec4 b) { return (a >> 1) + (b >> 1) + (a & b & 1); }\n"
                                      /* The divide will happen at the end for floats. */
                                      "vec4 merge(vec4 a, vec4 b) { return a + b; }\n"
+
+                                     /* Reduce from N samples to N/2 samples.
+                                      *
+                                      * NOTE: Missing \n characters from each
+                                      * line of the macro definition is
+                                      * intentional!
+                                      */
+                                     "#define REDUCE(dst, src)"
+                                     "   do {"
+                                     "      if (src.length() <= SAMPLES) {"
+                                     "         for (i = 0; i < dst.length(); i++)"
+                                     "            dst[i] = merge(src[i*2], src[i*2+1]);"
+                                     "	    }"
+                                     "   } while (false)\n"
+                                     "\n"
                                      "void emit2(gvec4 s) { for (int i = 0; i < out_color.length(); i++) out_color[i] = s; }\n"
                                      "void emit(ivec4 s) { emit2(gvec4(s)); }\n"
                                      "void emit(uvec4 s) { emit2(gvec4(s)); }\n"
                                      /* Scale the final result. */
-                                     "void emit(vec4 s) { emit2(gvec4(s / %f)); }\n"
+                                     "void emit(vec4 s) { emit2(gvec4(s / float(SAMPLES))); }\n"
                                      "\n"
                                      "void main()\n"
                                      "{\n"
-                                     "%s\n" /* sample_resolve */
-                                     "    emit(sample_%d_0);\n"
+                                     "   gvec4 s32[32], s16[16], s8[8];\n"
+                                     "   gvec4 s4[4], s2[2], s1[1];\n"
+                                     "   i%s tc = i%s(texCoords);\n"
+                                     "   int i;\n"
+                                     "\n"
+                                     "   for (i = 0; i < SAMPLES; i++)\n"
+                                     "      S[i] = texelFetch(texSampler, tc, i);\n"
+                                     "\n"
+                                     "   REDUCE(s16, s32);\n"
+                                     "   REDUCE(s8, s16);\n"
+                                     "   REDUCE(s4, s8);\n"
+                                     "   REDUCE(s2, s4);\n"
+                                     "   REDUCE(s1, s2);\n"
+                                     "   emit(s1[0]);\n"
                                      "}\n",
                                      vec4_prefix,
                                      vec4_prefix,
                                      sampler_array_suffix,
                                      texcoord_type,
                                      drawFb->_NumColorDrawBuffers,
-                                     (float) samples,
-                                     sample_resolve,
-                                     samples);
+                                     samples,
+                                     samples,
+                                     texcoord_type,
+                                     texcoord_type);
       }
 
       vs_source = ralloc_asprintf(mem_ctx,
-- 
2.5.0



More information about the mesa-dev mailing list