[virglrenderer-devel] [PATCH 3/9] shaders: add compute shader support.

Dave Airlie airlied at gmail.com
Tue Jul 31 22:28:15 UTC 2018


From: Dave Airlie <airlied at redhat.com>

This mostly consists of adding local memory support, and
support for the compute system values.

The req_local_mem need to be gotten from the host side
and passed into the shader compile so we can size the
shared memory correctly.
---
 src/vrend_renderer.c |  2 +-
 src/vrend_shader.c   | 96 ++++++++++++++++++++++++++++++++++++++++++----------
 src/vrend_shader.h   |  1 +
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
index fc82ee1..576cecb 100644
--- a/src/vrend_renderer.c
+++ b/src/vrend_renderer.c
@@ -2718,7 +2718,7 @@ static int vrend_shader_create(struct vrend_context *ctx,
 
    shader->id = glCreateShader(conv_shader_type(shader->sel->type));
    shader->compiled_fs_id = 0;
-   shader->glsl_prog = vrend_convert_shader(&ctx->shader_cfg, shader->sel->tokens, &key, &shader->sel->sinfo);
+   shader->glsl_prog = vrend_convert_shader(&ctx->shader_cfg, shader->sel->tokens, 0, &key, &shader->sel->sinfo);
    if (!shader->glsl_prog) {
       report_context_error(ctx, VIRGL_ERROR_CTX_ILLEGAL_SHADER, 0);
       glDeleteShader(shader->id);
diff --git a/src/vrend_shader.c b/src/vrend_shader.c
index 42ebe11..0c3a624 100644
--- a/src/vrend_shader.c
+++ b/src/vrend_shader.c
@@ -167,6 +167,8 @@ struct dump_ctx {
    struct immed imm[MAX_IMMEDIATE];
    unsigned fragcoord_input;
 
+   uint32_t req_local_mem;
+
    uint32_t num_ubo;
    int ubo_idx[32];
    int ubo_sizes[32];
@@ -208,6 +210,8 @@ struct dump_ctx {
    int tes_spacing;
    int tes_vertex_order;
    int tes_point_mode;
+
+   uint16_t local_cs_block_size[3];
 };
 
 static const struct vrend_shader_table shader_req_table[] = {
@@ -267,6 +271,7 @@ struct source_info {
    uint32_t sreg_index;
    bool tg4_has_component;
    bool override_no_wm[3];
+   bool override_no_cast[3];
 };
 
 static const struct vrend_shader_table conversion_table[] =
@@ -331,6 +336,7 @@ static inline const char *tgsi_proc_to_prefix(int shader_type)
    case TGSI_PROCESSOR_GEOMETRY: return "gs";
    case TGSI_PROCESSOR_TESS_CTRL: return "tc";
    case TGSI_PROCESSOR_TESS_EVAL: return "te";
+   case TGSI_PROCESSOR_COMPUTE: return "cs";
    default:
       return NULL;
    };
@@ -1240,12 +1246,23 @@ iter_declaration(struct tgsi_iterate_context *iter,
          name_prefix = "gl_TessLevelOuter";
       } else if (decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER) {
          name_prefix = "gl_TessLevelInner";
+      } else if (decl->Semantic.Name == TGSI_SEMANTIC_THREAD_ID) {
+         name_prefix = "gl_LocalInvocationID";
+         ctx->system_values[i].override_no_wm = false;
+      } else if (decl->Semantic.Name == TGSI_SEMANTIC_BLOCK_ID) {
+         name_prefix = "gl_WorkGroupID";
+         ctx->system_values[i].override_no_wm = false;
+      } else if (decl->Semantic.Name == TGSI_SEMANTIC_GRID_SIZE) {
+         name_prefix = "gl_NumWorkGroups";
+         ctx->system_values[i].override_no_wm = false;
       } else {
          fprintf(stderr, "unsupported system value %d\n", decl->Semantic.Name);
          name_prefix = "unknown";
       }
       snprintf(ctx->system_values[i].glsl_name, 64, "%s", name_prefix);
       break;
+   case TGSI_FILE_MEMORY:
+      break;
    default:
       fprintf(stderr,"unsupported file %d declaration\n", decl->Declaration.File);
       break;
@@ -1323,6 +1340,12 @@ iter_property(struct tgsi_iterate_context *iter,
          ctx->shader_req_bits |= SHADER_REQ_IMAGE_LOAD_STORE;
    }
 
+   if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH)
+      ctx->local_cs_block_size[0] = prop->u[0].Data;
+   if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT)
+      ctx->local_cs_block_size[1] = prop->u[0].Data;
+   if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH)
+      ctx->local_cs_block_size[2] = prop->u[0].Data;
    return TRUE;
 }
 
@@ -2318,6 +2341,7 @@ static enum vrend_type_qualifier get_coord_prefix(int resource, bool *is_ms)
 static int
 translate_store(struct dump_ctx *ctx,
                 struct tgsi_full_instruction *inst,
+                struct source_info *sinfo,
                 char srcs[4][255],
                 char dsts[3][255])
 {
@@ -2346,8 +2370,8 @@ translate_store(struct dump_ctx *ctx,
       }
       snprintf(buf, 512, "imageStore(%s,%s(floatBitsToInt(%s)),%s%s(%s));\n", dsts[0], get_string(coord_prefix), srcs[0], ms_str, get_string(stypeprefix), srcs[1]);
       EMIT_BUF_WITH_RET(ctx, buf);
-   } else if (dst->Register.File == TGSI_FILE_BUFFER) {
-      const char *conversion = get_string(FLOAT_BITS_TO_UINT);
+   } else if (dst->Register.File == TGSI_FILE_BUFFER || dst->Register.File == TGSI_FILE_MEMORY) {
+      const char *conversion = sinfo->override_no_cast[1] ? "" : get_string(FLOAT_BITS_TO_UINT);
       if (inst->Dst[0].Register.WriteMask & 0x1) {
          snprintf(buf, 255, "%s[uint(floatBitsToUint(%s))>>2] = %s(%s).x;\n", dsts[0], srcs[0], conversion, srcs[1]);
          EMIT_BUF_WITH_RET(ctx, buf);
@@ -2402,7 +2426,8 @@ translate_load(struct dump_ctx *ctx,
       }
       snprintf(buf, 512, "%s = %s(imageLoad(%s, %s(floatBitsToInt(%s))%s)%s);\n", dsts[0], get_string(dtypeprefix), srcs[0], get_string(coord_prefix), srcs[1], ms_str, wm);
       EMIT_BUF_WITH_RET(ctx, buf);
-   } else if (src->Register.File == TGSI_FILE_BUFFER) {
+   } else if (src->Register.File == TGSI_FILE_BUFFER ||
+              src->Register.File == TGSI_FILE_MEMORY) {
       char mydst[255], atomic_op[9], atomic_src[10];
       strcpy(mydst, dsts[0]);
       char *wmp = strchr(mydst, '.');
@@ -2566,7 +2591,8 @@ translate_atomic(struct dump_ctx *ctx,
       snprintf(buf, 512, "%s = %s(imageAtomic%s(%s, %s(floatBitsToInt(%s))%s, %s(%s(%s))%s));\n", dsts[0], get_string(dtypeprefix), opname, srcs[0], get_string(coord_prefix), srcs[1], ms_str, get_string(stypecast), get_string(stypeprefix), srcs[2], cas_str);
       EMIT_BUF_WITH_RET(ctx, buf);
    }
-   if (src->Register.File == TGSI_FILE_BUFFER) {
+   if (src->Register.File == TGSI_FILE_BUFFER ||
+       src->Register.File == TGSI_FILE_MEMORY) {
       snprintf(buf, 512, "%s = %s(atomic%s(%s[int(floatBitsToInt(%s)) >> 2], uint(%s(%s).x)%s));\n", dsts[0], get_string(dtypeprefix), opname, srcs[0], srcs[1], get_string(stypeprefix), srcs[2], cas_str);
       EMIT_BUF_WITH_RET(ctx, buf);
    }
@@ -2758,6 +2784,8 @@ get_destination_info(struct dump_ctx *ctx,
                snprintf(dsts[i], 255, "%sssboarr%s[%d].%sssbocontents%d", cname, atomic_str, dst_reg->Register.Index - base, cname, base);
          } else
             snprintf(dsts[i], 255, "%sssbocontents%d", cname, dst_reg->Register.Index);
+      } else if (dst_reg->Register.File == TGSI_FILE_MEMORY) {
+         snprintf(dsts[i], 255, "values");
       } else if (dst_reg->Register.File == TGSI_FILE_ADDRESS) {
          snprintf(dsts[i], 255, "addr%d", dst_reg->Register.Index);
       }
@@ -2836,6 +2864,7 @@ get_source_info(struct dump_ctx *ctx,
       boolean isfloatabsolute = src->Register.Absolute && stype != TGSI_TYPE_DOUBLE;
 
       sinfo->override_no_wm[i] = false;
+      sinfo->override_no_cast[i] = false;
       if (isfloatabsolute)
          swizzle[swz_idx++] = ')';
 
@@ -3040,6 +3069,9 @@ get_source_info(struct dump_ctx *ctx,
             snprintf(srcs[i], 255, "%sssbocontents%d%s", cname, src->Register.Index, swizzle);
          }
          sinfo->sreg_index = src->Register.Index;
+      } else if (src->Register.File == TGSI_FILE_MEMORY) {
+         snprintf(srcs[i], 255, "values");
+         sinfo->sreg_index = src->Register.Index;
       } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
          if (src->Register.Index >= (int)ARRAY_SIZE(ctx->imm)) {
             fprintf(stderr, "Immediate exceeded, max is %lu\n", ARRAY_SIZE(ctx->imm));
@@ -3159,6 +3191,15 @@ get_source_info(struct dump_ctx *ctx,
                            ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleY),
                            ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleZ),
                            ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleW));
+               } else if (ctx->system_values[j].name == TGSI_SEMANTIC_GRID_SIZE ||
+                          ctx->system_values[j].name == TGSI_SEMANTIC_THREAD_ID ||
+                          ctx->system_values[j].name == TGSI_SEMANTIC_BLOCK_ID) {
+                  snprintf(srcs[i], 255, "uvec4(%s.%c, %s.%c, %s.%c, %s.%c)",
+                           ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleX),
+                           ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleY),
+                           ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleZ),
+                           ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleW));
+                  sinfo->override_no_cast[i] = true;
                } else if (ctx->system_values[j].name == TGSI_SEMANTIC_SAMPLEMASK) {
                   snprintf(srcs[i], 255, "ivec4(%s, %s, %s, %s)",
                      src->Register.SwizzleX == TGSI_SWIZZLE_X ? ctx->system_values[j].glsl_name : "0",
@@ -3813,7 +3854,7 @@ iter_instruction(struct tgsi_iterate_context *iter,
       break;
    }
    case TGSI_OPCODE_STORE:
-      ret = translate_store(ctx, inst, srcs, dsts);
+      ret = translate_store(ctx, inst, &sinfo, srcs, dsts);
       if (ret)
          return FALSE;
       break;
@@ -3893,19 +3934,24 @@ static char *emit_header(struct dump_ctx *ctx, char *glsl_hdr)
       STRCAT_WITH_RET(glsl_hdr, "precision highp int;\n");
    } else {
       char buf[128];
-      if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
-          ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL ||
-          ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
-          ctx->glsl_ver_required == 150)
-         STRCAT_WITH_RET(glsl_hdr, "#version 150\n");
-      else if (ctx->glsl_ver_required == 140)
-         STRCAT_WITH_RET(glsl_hdr, "#version 140\n");
-      else
-         STRCAT_WITH_RET(glsl_hdr, "#version 130\n");
-      if (ctx->prog_type == TGSI_PROCESSOR_VERTEX ||
-          ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
-          ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
-         PAD_GPU_SHADER5(glsl_hdr);
+      if (ctx->prog_type == TGSI_PROCESSOR_COMPUTE) {
+         STRCAT_WITH_RET(glsl_hdr, "#version 330\n");
+         STRCAT_WITH_RET(glsl_hdr, "#extension GL_ARB_compute_shader : require\n");
+      } else {
+         if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
+             ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL ||
+             ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
+             ctx->glsl_ver_required == 150)
+            STRCAT_WITH_RET(glsl_hdr, "#version 150\n");
+         else if (ctx->glsl_ver_required == 140)
+            STRCAT_WITH_RET(glsl_hdr, "#version 140\n");
+         else
+            STRCAT_WITH_RET(glsl_hdr, "#version 130\n");
+         if (ctx->prog_type == TGSI_PROCESSOR_VERTEX ||
+             ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
+             ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
+            PAD_GPU_SHADER5(glsl_hdr);
+      }
 
       if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
           ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
@@ -4209,6 +4255,18 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
          STRCAT_WITH_RET(glsl_hdr, buf);
       }
    }
+
+   if (ctx->prog_type == TGSI_PROCESSOR_COMPUTE) {
+      snprintf(buf, 255, "layout (local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n",
+               ctx->local_cs_block_size[0], ctx->local_cs_block_size[1], ctx->local_cs_block_size[2]);
+      STRCAT_WITH_RET(glsl_hdr, buf);
+
+      if (ctx->req_local_mem) {
+         snprintf(buf, 255, "shared uint values[%d];\n", ctx->req_local_mem / 4);
+         STRCAT_WITH_RET(glsl_hdr, buf);
+      }
+   }
+
    if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) {
       char invocbuf[25];
 
@@ -4709,6 +4767,7 @@ static boolean fill_interpolants(struct dump_ctx *ctx, struct vrend_shader_info
 
 char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
                            const struct tgsi_token *tokens,
+                           uint32_t req_local_mem,
                            struct vrend_shader_key *key,
                            struct vrend_shader_info *sinfo)
 {
@@ -4734,6 +4793,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
    ctx.ssbo_array_base = 0xffffffff;
    ctx.ssbo_atomic_array_base = 0xffffffff;
    ctx.has_sample_input = false;
+   ctx.req_local_mem = req_local_mem;
    tgsi_scan_shader(tokens, &ctx.info);
    /* if we are in core profile mode we should use GLSL 1.40 */
    if (cfg->use_core_profile && cfg->glsl_version >= 140)
diff --git a/src/vrend_shader.h b/src/vrend_shader.h
index 04e92e6..bcca3e3 100644
--- a/src/vrend_shader.h
+++ b/src/vrend_shader.h
@@ -119,6 +119,7 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg,
 
 char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
                            const struct tgsi_token *tokens,
+                           uint32_t req_local_mem,
                            struct vrend_shader_key *key,
                            struct vrend_shader_info *sinfo);
 const char *vrend_shader_samplertypeconv(int sampler_type, int *is_shad);
-- 
2.14.3



More information about the virglrenderer-devel mailing list