[Mesa-dev] [PATCH 1/2] meta: Implement ext_framebuffer_multisample_blit_scaled extension

Anuj Phogat anuj.phogat at gmail.com
Mon Sep 8 11:15:00 PDT 2014


Extension enables doing a multisample buffer resolve and buffer
scaling using a single glBlitFrameBuffer() call. Currently, we
have this extension implemented in BLORP which is only used by
SNB and IVB. This patch implements the extension in meta path
which makes it available to Broadwell.

Implementation features:
 - Supports scaled resolves of 2X, 4X and 8X multisample buffers.

 - Avoids unnecessary shader compilations by storing the pre compiled
   shaders for each supported sample count.

 - Uses bilinear filtering for both GL_SCALED_RESOLVE_FASTEST_EXT and
   GL_SCALED_RESOLVE_NICEST_EXT filter options. Although, extension
   allows this behavior, it'll be nice to use trilinear or anistropic
   filtering for GL_SCALED_RESOLVE_NICEST_EXT filter. I'll work on it
   after pushing this series.

Signed-off-by: Anuj Phogat <anuj.phogat at gmail.com>
---
 src/mesa/drivers/common/meta.h      |   6 +
 src/mesa/drivers/common/meta_blit.c | 213 ++++++++++++++++++++++++++++++++++--
 2 files changed, 208 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/common/meta.h b/src/mesa/drivers/common/meta.h
index edc3e8c..2c9517b 100644
--- a/src/mesa/drivers/common/meta.h
+++ b/src/mesa/drivers/common/meta.h
@@ -279,6 +279,12 @@ enum blit_msaa_shader {
    BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_COPY_UINT,
    BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_RESOLVE,
    BLIT_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_DEPTH_COPY,
+   BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
+   BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
+   BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE,
+   BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
+   BLIT_4X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
+   BLIT_8X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE,
    BLIT_MSAA_SHADER_COUNT,
 };
 
diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c
index fc9848a..0fe4410 100644
--- a/src/mesa/drivers/common/meta_blit.c
+++ b/src/mesa/drivers/common/meta_blit.c
@@ -55,6 +55,194 @@
 #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
 
 static void
+setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
+                                   struct blit_state *blit,
+                                   struct gl_renderbuffer *src_rb,
+                                   GLenum target, GLenum filter)
+{
+   GLint loc_src_width, loc_src_height;
+   int shader_offset = 0;
+   void *mem_ctx = ralloc_context(NULL);
+   char *fs_source;
+   char *name, *sample_number;
+   char* sample_map = "";
+   char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);;
+   const char *vs_source;
+   const char *sampler_array_suffix = "";
+   const char *texcoord_type = "vec2";
+   const int samples = MAX2(src_rb->NumSamples, 1);
+   const float y_scale = samples * 0.5;
+   enum blit_msaa_shader shader_index;
+
+   /* We expect only power of 2 samples in source multisample buffer. */
+   assert((samples & (samples - 1)) == 0);
+   while (samples >> (shader_offset + 1)) {
+      shader_offset++;
+   }
+   /* Update the assert if we plan to support more than 8X MSAA. */
+   assert(shader_offset > 0 && shader_offset < 4);
+
+   assert(target == GL_TEXTURE_2D_MULTISAMPLE ||
+          target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY);
+
+   shader_index = BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE +
+                  shader_offset - 1;
+
+   if (target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
+      shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
+                      BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
+      sampler_array_suffix = "Array";
+      texcoord_type = "vec3";
+   }
+
+   if (blit->msaa_shaders[shader_index]) {
+      _mesa_UseProgram(blit->msaa_shaders[shader_index]);
+      /* Update the uniform values. */
+      loc_src_width =
+         glGetUniformLocation(blit->msaa_shaders[shader_index], "src_width");
+      loc_src_height =
+         glGetUniformLocation(blit->msaa_shaders[shader_index], "src_height");
+      glUniform1f(loc_src_width, src_rb->Width);
+      glUniform1f(loc_src_height, src_rb->Height);
+      return;
+   }
+
+   name = ralloc_asprintf(mem_ctx, "vec4 MSAA scaled resolve");
+
+  /* Below switch is used to setup the shader expression, which computes
+   * sample index and map it to to a sample number on Intel hardware.
+   * Sample index layout shows the numbering of slots in a rectangular
+   * grid of samples with in a pixel. Sample number layout shows the
+   * rectangular grid of samples roughly corresponding to the real sample
+   * locations with in a pixel. Sample number layout matches the sample
+   * index layout in case of 2X and 4x MSAA, but they are different in
+   * case of 8X MSAA.
+   *
+   * 2X MSAA sample index / number layout
+   *           ---------
+   *           | 0 | 1 |
+   *           ---------
+   *
+   * 4X MSAA sample index / number layout
+   *           ---------
+   *           | 0 | 1 |
+   *           ---------
+   *           | 2 | 3 |
+   *           ---------
+   *
+   * 8X MSAA sample index layout    8x MSAA sample number layout
+   *           ---------                      ---------
+   *           | 0 | 1 |                      | 5 | 2 |
+   *           ---------                      ---------
+   *           | 2 | 3 |                      | 4 | 6 |
+   *           ---------                      ---------
+   *           | 4 | 5 |                      | 0 | 3 |
+   *           ---------                      ---------
+   *           | 6 | 7 |                      | 7 | 1 |
+   *           ---------                      ---------
+   */
+   switch(samples) {
+   case 2:
+      sample_number =  "int(2 * fract(coord.x))";
+      break;
+   case 4:
+      sample_number =  "int(2 * fract(coord.x) + 4 * fract(coord.y))";
+      break;
+   case 8:
+      sample_map = "   const int sample_map[8] = int[8](5 , 2, 4, 6, 0, 3, 7, 1);\n";
+      sample_number =  "sample_map[int(2 * fract(coord.x) + 8 * fract(coord.y))]";
+      break;
+   default:
+      _mesa_problem(ctx, "Unsupported sample count %d\n", samples);
+   }
+
+   ralloc_asprintf_append(&texel_fetch_macro,
+                          "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
+                          texcoord_type, sample_number);
+
+   vs_source = ralloc_asprintf(mem_ctx,
+                               "#version 130\n"
+                               "in vec2 position;\n"
+                               "in %s textureCoords;\n"
+                               "out %s texCoords;\n"
+                               "void main()\n"
+                               "{\n"
+                               "   texCoords = textureCoords;\n"
+                               "   gl_Position = vec4(position, 0.0, 1.0);\n"
+                               "}\n",
+                               texcoord_type,
+                               texcoord_type);
+   fs_source = ralloc_asprintf(mem_ctx,
+                               "#version 130\n"
+                               "#extension GL_ARB_texture_multisample : enable\n"
+                               "uniform sampler2DMS%s texSampler;\n"
+                               "uniform float src_width, src_height;\n"
+                               "in %s texCoords;\n"
+                               "out vec4 out_color;\n"
+                               "\n"
+                               "void main()\n"
+                               "{\n"
+                               "%s"
+                               "   const float x_scale = 2.0f, x_scale_inv = 0.5f;\n"
+                               "   const float y_scale = %ff, y_scale_inv = %ff;\n"
+                               "   const float x_offset = 0.25f, y_offset = %ff;\n"
+                               "   vec2 s_0_coord, s_1_coord, s_2_coord, s_3_coord;\n"
+                               "   vec4 s_0_color, s_1_color, s_2_color, s_3_color;\n"
+                               "   vec4 x_0_color, x_1_color;\n"
+                               "   float x_f, y_f;\n"
+                               "\n"
+                               "   vec2 tex_coord = vec2(texCoords.x - x_offset, texCoords.y - y_offset);\n"
+                               "   tex_coord = vec2(x_scale * tex_coord.x, y_scale * tex_coord.y);\n"
+                               "\n"
+                               "   clamp(tex_coord.x, 0.0f, x_scale * src_width - 1.0f);\n"
+                               "   clamp(tex_coord.y, 0.0f, y_scale * src_height - 1.0f);\n"
+                               "\n"
+                               "   x_f = fract(tex_coord.x);\n"
+                               "   y_f = fract(tex_coord.y);\n"
+                               "\n"
+                               "   tex_coord.x = int(tex_coord.x) * x_scale_inv;\n"
+                               "   tex_coord.y = int(tex_coord.y) * y_scale_inv;\n"
+                               "\n"
+                               "   /* Compute the sample coordinates used for filtering. */\n"
+                               "   s_0_coord = tex_coord;\n"
+                               "   s_1_coord = tex_coord + vec2(x_scale_inv, 0.0f);\n"
+                               "   s_2_coord = tex_coord + vec2(0.0f, y_scale_inv);\n"
+                               "   s_3_coord = tex_coord + vec2(x_scale_inv, y_scale_inv);\n"
+                               "\n"
+                               "   /* Fetch sample color values. */\n"
+                               "%s"
+                               "   s_0_color = TEXEL_FETCH(s_0_coord)\n"
+                               "   s_1_color = TEXEL_FETCH(s_1_coord)\n"
+                               "   s_2_color = TEXEL_FETCH(s_2_coord)\n"
+                               "   s_3_color = TEXEL_FETCH(s_3_coord)\n"
+                               "#undef TEXEL_FETCH\n"
+                               "\n"
+                               "   /* Do bilinear filtering on sample colors. */\n"
+                               "   x_0_color =  mix(s_0_color, s_1_color, x_f);\n"
+                               "   x_1_color =  mix(s_2_color, s_3_color, x_f);\n"
+                               "   out_color = mix(x_0_color, x_1_color, y_f);\n"
+                               "}\n",
+                               sampler_array_suffix,
+                               texcoord_type,
+                               sample_map,
+                               y_scale,
+                               1.0f / y_scale,
+                               1.0f / samples,
+                               texel_fetch_macro);
+
+   _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
+                                       &blit->msaa_shaders[shader_index]);
+   loc_src_width =
+      glGetUniformLocation(blit->msaa_shaders[shader_index], "src_width");
+   loc_src_height =
+      glGetUniformLocation(blit->msaa_shaders[shader_index], "src_height");
+   glUniform1f(loc_src_width, src_rb->Width);
+   glUniform1f(loc_src_height, src_rb->Height);
+
+   ralloc_free(mem_ctx);
+}
+
+static void
 setup_glsl_msaa_blit_shader(struct gl_context *ctx,
                             struct blit_state *blit,
                             struct gl_renderbuffer *src_rb,
@@ -332,9 +520,13 @@ static void
 setup_glsl_blit_framebuffer(struct gl_context *ctx,
                             struct blit_state *blit,
                             struct gl_renderbuffer *src_rb,
-                            GLenum target)
+                            GLenum target, GLenum filter)
 {
    unsigned texcoord_size;
+   bool is_target_multisample = target == GL_TEXTURE_2D_MULTISAMPLE ||
+                                target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY;
+   bool is_filter_scaled_resolve = filter == GL_SCALED_RESOLVE_FASTEST_EXT ||
+                                   filter == GL_SCALED_RESOLVE_NICEST_EXT;
 
    /* target = GL_TEXTURE_RECTANGLE is not supported in GLES 3.0 */
    assert(_mesa_is_desktop_gl(ctx) || target == GL_TEXTURE_2D);
@@ -344,8 +536,9 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx,
    _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true,
                                    2, texcoord_size, 0);
 
-   if (target == GL_TEXTURE_2D_MULTISAMPLE ||
-       target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
+   if (is_target_multisample && is_filter_scaled_resolve) {
+      setup_glsl_msaa_blit_scaled_shader(ctx, blit, src_rb, target, filter);
+   } else if (is_target_multisample) {
       setup_glsl_msaa_blit_shader(ctx, blit, src_rb, target);
    } else {
       _mesa_meta_setup_blit_shader(ctx, target, &blit->shaders);
@@ -384,11 +577,6 @@ blitframebuffer_texture(struct gl_context *ctx,
    if (rb->NumSamples && !ctx->Extensions.ARB_texture_multisample)
       return false;
 
-   if (filter == GL_SCALED_RESOLVE_FASTEST_EXT ||
-       filter == GL_SCALED_RESOLVE_NICEST_EXT) {
-      filter = GL_LINEAR;
-   }
-
    _mesa_meta_fb_tex_blit_begin(ctx, &fb_tex_blit);
 
    if (readAtt->Texture &&
@@ -461,7 +649,7 @@ blitframebuffer_texture(struct gl_context *ctx,
    fb_tex_blit.stencilSamplingSave = texObj->StencilSampling;
 
    if (glsl_version) {
-      setup_glsl_blit_framebuffer(ctx, blit, rb, target);
+      setup_glsl_blit_framebuffer(ctx, blit, rb, target, filter);
    }
    else {
       _mesa_meta_setup_ff_tnl_for_blit(&ctx->Meta->Blit.VAO,
@@ -654,14 +842,17 @@ _mesa_meta_setup_sampler(struct gl_context *ctx,
                          GLenum target, GLenum filter, GLuint srcLevel)
 {
    GLuint sampler;
+   GLenum tex_filter = (filter == GL_SCALED_RESOLVE_FASTEST_EXT ||
+                        filter == GL_SCALED_RESOLVE_NICEST_EXT) ?
+                       GL_NEAREST : filter;
 
    _mesa_GenSamplers(1, &sampler);
    _mesa_BindSampler(ctx->Texture.CurrentUnit, sampler);
 
    /* Prepare src texture state */
    _mesa_BindTexture(target, texObj->Name);
-   _mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, filter);
-   _mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, filter);
+   _mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, tex_filter);
+   _mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, tex_filter);
    if (target != GL_TEXTURE_RECTANGLE_ARB) {
       _mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
       _mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
-- 
1.9.3



More information about the mesa-dev mailing list