[virglrenderer-devel] [PATCH 2/3] shader: add basic shader_storage_buffer_object parsing.

Dave Airlie airlied at gmail.com
Tue Jul 17 07:48:09 UTC 2018


From: Dave Airlie <airlied at redhat.com>

This adds the basic shader parsing for the SSBO extension,

TODO:
Add indirect handling.
---
 src/vrend_shader.c | 304 +++++++++++++++++++++++++++++++++++++++++----
 src/vrend_shader.h |   1 +
 2 files changed, 281 insertions(+), 24 deletions(-)

diff --git a/src/vrend_shader.c b/src/vrend_shader.c
index cbbfbbc..ae3f028 100644
--- a/src/vrend_shader.c
+++ b/src/vrend_shader.c
@@ -108,6 +108,34 @@ struct vrend_io_range {
    bool used;
 };
 
+enum vrend_type_qualifier {
+   TYPE_CONVERSION_NONE = 0,
+   FLOAT = 1,
+   VEC2 = 2,
+   VEC3 = 3,
+   VEC4 = 4,
+   INT = 5,
+   IVEC2 = 6,
+   IVEC3 = 7,
+   IVEC4 = 8,
+   UINT = 9,
+   UVEC2 = 10,
+   UVEC3 = 11,
+   UVEC4 = 12,
+   FLOAT_BITS_TO_UINT = 13,
+   UINT_BITS_TO_FLOAT = 14,
+   FLOAT_BITS_TO_INT = 15,
+   INT_BITS_TO_FLOAT = 16,
+   DOUBLE = 17,
+   DVEC2 = 18,
+};
+
+struct vrend_ssbo {
+   uint32_t id;
+   unsigned atomic;
+   enum vrend_type_qualifier type;
+};
+
 struct dump_ctx {
    struct tgsi_iterate_context iter;
    struct vrend_shader_cfg *cfg;
@@ -137,6 +165,10 @@ struct dump_ctx {
    struct vrend_shader_sampler samplers[32];
    uint32_t samplers_used;
    bool sviews_used;
+
+   uint32_t num_ssbo;
+   struct vrend_ssbo ssbos[32];
+
    struct vrend_sampler_array *sampler_arrays;
    uint32_t num_sampler_arrays;
    int last_sampler_array_idx;
@@ -206,28 +238,6 @@ static const struct vrend_shader_table shader_req_table[] = {
     { SHADER_REQ_FP64, "GL_ARB_gpu_shader_fp64" },
 };
 
-enum vrend_type_qualifier {
-   TYPE_CONVERSION_NONE = 0,
-   FLOAT = 1,
-   VEC2 = 2,
-   VEC3 = 3,
-   VEC4 = 4,
-   INT = 5,
-   IVEC2 = 6,
-   IVEC3 = 7,
-   IVEC4 = 8,
-   UINT = 9,
-   UVEC2 = 10,
-   UVEC3 = 11,
-   UVEC4 = 12,
-   FLOAT_BITS_TO_UINT = 13,
-   UINT_BITS_TO_FLOAT = 14,
-   FLOAT_BITS_TO_INT = 15,
-   INT_BITS_TO_FLOAT = 16,
-   DOUBLE = 17,
-   DVEC2 = 18,
-};
-
 struct dest_info {
   enum vrend_type_qualifier dtypeprefix;
   enum vrend_type_qualifier dstconv;
@@ -1043,6 +1053,15 @@ iter_declaration(struct tgsi_iterate_context *iter,
       } else
       ctx->sviews_used = true;
       break;
+   case TGSI_FILE_BUFFER:
+      if (ctx->num_ssbo >= ARRAY_SIZE(ctx->ssbos)) {
+         fprintf(stderr, "Buffer view exceeded, max is %lu\n", ARRAY_SIZE(ctx->ssbos));
+         return FALSE;
+      }
+      ctx->ssbos[ctx->num_ssbo].id = decl->Range.First;
+      ctx->ssbos[ctx->num_ssbo].atomic = decl->Declaration.Atomic;
+      ctx->num_ssbo++;
+      break;
    case TGSI_FILE_CONSTANT:
       if (decl->Declaration.Dimension && decl->Dim.Index2D != 0) {
          if (ctx->num_ubo >= ARRAY_SIZE(ctx->ubo_idx)) {
@@ -2123,6 +2142,177 @@ create_swizzled_clipdist(struct dump_ctx *ctx,
    snprintf(result, 255, "%s(vec4(%s,%s,%s,%s))", stypeprefix, clipdistvec[0], clipdistvec[1], clipdistvec[2], clipdistvec[3]);
 }
 
+static int
+translate_store(struct dump_ctx *ctx,
+                struct tgsi_full_instruction *inst,
+                struct source_info *sinfo,
+                char srcs[4][255],
+                char dsts[3][255])
+{
+   const struct tgsi_full_dst_register *dst = &inst->Dst[0];
+   char buf[512];
+
+   if (dst->Register.File == TGSI_FILE_BUFFER) {
+      /* We don't want to cast uvec4's ... */
+      const char *conversion = get_string(FLOAT_BITS_TO_UINT);
+      if (inst->Dst[0].Register.WriteMask & 0x1) {
+         snprintf(buf, 255, "%s[uint(floatBitsToUint(%s))>>2] = %s(%s).x;\n", dsts[0], srcs[0], conversion, srcs[1]);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x2) {
+         snprintf(buf, 255, "%s[(uint(floatBitsToUint(%s))>>2)+1u] = %s(%s).y;\n", dsts[0], srcs[0], conversion, srcs[1]);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x4) {
+         snprintf(buf, 255, "%s[(uint(floatBitsToUint(%s))>>2)+2u] = %s(%s).z;\n", dsts[0], srcs[0], conversion, srcs[1]);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x8) {
+         snprintf(buf, 255, "%s[(uint(floatBitsToUint(%s))>>2)+3u] = %s(%s).w;\n", dsts[0], srcs[0], conversion, srcs[1]);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+   }
+   return 0;
+}
+
+static int
+translate_load(struct dump_ctx *ctx,
+               struct tgsi_full_instruction *inst,
+               char srcs[4][255],
+               char dsts[3][255])
+{
+   char buf[512];
+   const struct tgsi_full_src_register *src = &inst->Src[0];
+
+   if (src->Register.File == TGSI_FILE_BUFFER) {
+      char mydst[255], atomic_op[9], atomic_src[10];
+      strcpy(mydst, dsts[0]);
+      char *wmp = strchr(mydst, '.');
+      if (wmp)
+         wmp[0] = 0;
+      snprintf(buf, 255, "ssbo_addr_temp = uint(floatBitsToUint(%s)) >> 2;\n", srcs[1]);
+      EMIT_BUF_WITH_RET(ctx, buf);
+
+      atomic_op[0] = atomic_src[0] = '\0';
+      for (uint32_t i = 0; i < ctx->num_ssbo; i++) {
+	 /* Emulate atomicCounter with atomicOr. */
+         if ((ctx->ssbos[i].id == (uint32_t)src->Register.Index) && ctx->ssbos[i].atomic) {
+            strcpy(atomic_op, "atomicOr");
+            strcpy(atomic_src, ", uint(0)");
+            break;
+         }
+      }
+
+      if (inst->Dst[0].Register.WriteMask & 0x1) {
+	 snprintf(buf, 255, "%s.x = (uintBitsToFloat(%s(%s[ssbo_addr_temp]%s)));\n", mydst, atomic_op, srcs[0], atomic_src);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x2) {
+         snprintf(buf, 255, "%s.y = (uintBitsToFloat(%s(%s[ssbo_addr_temp + 1u]%s)));\n", mydst, atomic_op, srcs[0], atomic_src);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x4) {
+         snprintf(buf, 255, "%s.z = (uintBitsToFloat(%s(%s[ssbo_addr_temp + 2u]%s)));\n", mydst, atomic_op, srcs[0], atomic_src);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+      if (inst->Dst[0].Register.WriteMask & 0x8) {
+         snprintf(buf, 255, "%s.w = (uintBitsToFloat(%s(%s[ssbo_addr_temp + 3u]%s)));\n", mydst, atomic_op, srcs[0], atomic_src);
+         EMIT_BUF_WITH_RET(ctx, buf);
+      }
+   }
+   return 0;
+}
+
+static const char *get_atomic_opname(int tgsi_opcode, bool *is_cas)
+{
+   const char *opname;
+   *is_cas = false;
+   switch (tgsi_opcode) {
+   case TGSI_OPCODE_ATOMUADD:
+      opname = "Add";
+      break;
+   case TGSI_OPCODE_ATOMXCHG:
+      opname = "Exchange";
+      break;
+   case TGSI_OPCODE_ATOMCAS:
+      opname = "CompSwap";
+      *is_cas = true;
+      break;
+   case TGSI_OPCODE_ATOMAND:
+      opname = "And";
+      break;
+   case TGSI_OPCODE_ATOMOR:
+      opname = "Or";
+      break;
+   case TGSI_OPCODE_ATOMXOR:
+      opname = "Xor";
+      break;
+   case TGSI_OPCODE_ATOMUMIN:
+      opname = "Min";
+      break;
+   case TGSI_OPCODE_ATOMUMAX:
+      opname = "Max";
+      break;
+   case TGSI_OPCODE_ATOMIMIN:
+      opname = "Min";
+      break;
+   case TGSI_OPCODE_ATOMIMAX:
+      opname = "Max";
+      break;
+   default:
+      fprintf(stderr, "illegal atomic opcode");
+      return NULL;
+   }
+   return opname;
+}
+
+static int
+translate_resq(struct dump_ctx *ctx, struct tgsi_full_instruction *inst,
+               char srcs[4][255], char dsts[3][255])
+{
+   char buf[512];
+   const struct tgsi_full_src_register *src = &inst->Src[0];
+
+   if (src->Register.File == TGSI_FILE_BUFFER) {
+      snprintf(buf, 255, "%s = %s(int(%s.length()) << 2);\n", dsts[0], get_string(INT_BITS_TO_FLOAT), srcs[0]);
+   }
+
+   EMIT_BUF_WITH_RET(ctx, buf);
+   return 0;
+}
+
+static int
+translate_atomic(struct dump_ctx *ctx,
+                 struct tgsi_full_instruction *inst,
+                 char srcs[4][255],
+                 char dsts[3][255])
+{
+   char buf[512];
+   const struct tgsi_full_src_register *src = &inst->Src[0];
+   const char *opname;
+   enum vrend_type_qualifier stypeprefix;
+   enum vrend_type_qualifier dtypeprefix;
+   enum vrend_type_qualifier stypecast;
+   bool is_cas;
+   char cas_str[64] = {};
+
+   stypeprefix = FLOAT_BITS_TO_UINT;
+   dtypeprefix = UINT_BITS_TO_FLOAT;
+   stypecast = INT;
+
+   opname = get_atomic_opname(inst->Instruction.Opcode, &is_cas);
+   if (!opname)
+      return -1;
+
+   if (is_cas)
+      snprintf(cas_str, 64, ", %s(%s(%s))", get_string(stypecast), get_string(stypeprefix), srcs[3]);
+
+   if (src->Register.File == TGSI_FILE_BUFFER)
+      snprintf(buf, 512, "%s = %s(atomic%s(%s[int(floatBitsToInt(%s)) >> 2], uint(%s(%s).x)%s));\n", dsts[0], get_string(dtypeprefix), opname, srcs[0], srcs[1], get_string(stypeprefix), srcs[2], cas_str);
+   EMIT_BUF_WITH_RET(ctx, buf);
+   return 0;
+}
+
 static int
 get_destination_info(struct dump_ctx *ctx,
                      const struct tgsi_full_instruction *inst,
@@ -2278,8 +2468,14 @@ get_destination_info(struct dump_ctx *ctx,
             snprintf(dsts[i], 255, "temp%d[addr0 + %d]%s", range->first, dst_reg->Register.Index - range->first, writemask);
          } else
             snprintf(dsts[i], 255, "temp%d[%d]%s", range->first, dst_reg->Register.Index - range->first, writemask);
-      }
-      else if (dst_reg->Register.File == TGSI_FILE_ADDRESS) {
+      } else if (dst_reg->Register.File == TGSI_FILE_BUFFER) {
+         const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
+         if (dst_reg->Register.Indirect) {
+            assert(dst_reg->Indirect.File == TGSI_FILE_ADDRESS);
+            snprintf(dsts[i], 255, "%sssbocontents[%d]", cname, dst_reg->Register.Index);
+         } else
+            snprintf(dsts[i], 255, "%sssbocontents%d", cname, dst_reg->Register.Index);
+      } else if (dst_reg->Register.File == TGSI_FILE_ADDRESS) {
          snprintf(dsts[i], 255, "addr%d", dst_reg->Register.Index);
       }
 
@@ -2535,6 +2731,14 @@ get_source_info(struct dump_ctx *ctx,
             snprintf(srcs[i], 255, "%ssamp%d%s", cname, src->Register.Index, swizzle);
          }
          sinfo->sreg_index = src->Register.Index;
+      } else if (src->Register.File == TGSI_FILE_BUFFER) {
+         const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
+         if (ctx->info.indirect_files & (1 << TGSI_FILE_BUFFER)) {
+            fprintf(stderr, "TODO\n");
+         } else {
+            snprintf(srcs[i], 255, "%sssbocontents%d%s", cname, src->Register.Index, swizzle);
+         }
+         sinfo->sreg_index = src->Register.Index;
       } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
          if (src->Register.Index >= (int)ARRAY_SIZE(ctx->imm)) {
             fprintf(stderr, "Immediate exceeded, max is %lu\n", ARRAY_SIZE(ctx->imm));
@@ -3264,6 +3468,35 @@ iter_instruction(struct tgsi_iterate_context *iter,
       snprintf(buf, 255, "barrier();\n");
       EMIT_BUF_WITH_RET(ctx, buf);
       break;
+   case TGSI_OPCODE_STORE:
+      ret = translate_store(ctx, inst, &sinfo, srcs, dsts);
+      if (ret)
+         return FALSE;
+      break;
+   case TGSI_OPCODE_LOAD:
+      ret = translate_load(ctx, inst, srcs, dsts);
+      if (ret)
+         return FALSE;
+      break;
+   case TGSI_OPCODE_ATOMUADD:
+   case TGSI_OPCODE_ATOMXCHG:
+   case TGSI_OPCODE_ATOMCAS:
+   case TGSI_OPCODE_ATOMAND:
+   case TGSI_OPCODE_ATOMOR:
+   case TGSI_OPCODE_ATOMXOR:
+   case TGSI_OPCODE_ATOMUMIN:
+   case TGSI_OPCODE_ATOMUMAX:
+   case TGSI_OPCODE_ATOMIMIN:
+   case TGSI_OPCODE_ATOMIMAX:
+      ret = translate_atomic(ctx, inst, srcs, dsts);
+      if (ret)
+         return FALSE;
+      break;
+   case TGSI_OPCODE_RESQ:
+      ret = translate_resq(ctx, inst, srcs, dsts);
+      if (ret)
+         return FALSE;
+      break;
    default:
       fprintf(stderr,"failed to convert opcode %d\n", inst->Instruction.Opcode);
       break;
@@ -3344,6 +3577,8 @@ static char *emit_header(struct dump_ctx *ctx, char *glsl_hdr)
 
       if (ctx->num_cull_dist_prop || ctx->key->prev_stage_num_cull_out)
          STRCAT_WITH_RET(glsl_hdr, "#extension GL_ARB_cull_distance : require\n");
+      if (ctx->num_ssbo)
+         STRCAT_WITH_RET(glsl_hdr, "#extension GL_ARB_shader_storage_buffer_object : require\n");
 
       for (uint32_t i = 0; i < ARRAY_SIZE(shader_req_table); i++) {
          if (shader_req_table[i].key == SHADER_REQ_SAMPLER_RECT && ctx->glsl_ver_required >= 140)
@@ -3801,6 +4036,11 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
       STRCAT_WITH_RET(glsl_hdr, buf);
    }
 
+   if (ctx->num_ssbo) {
+     snprintf(buf, 255, "uint ssbo_addr_temp;\n");
+     STRCAT_WITH_RET(glsl_hdr, buf);
+   }
+
    if (ctx->shader_req_bits & SHADER_REQ_FP64) {
       snprintf(buf, 255, "dvec2 fp64_dst[3];\n");
       STRCAT_WITH_RET(glsl_hdr, buf);
@@ -3864,6 +4104,18 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
             return NULL;
       }
    }
+
+   if (ctx->info.indirect_files & (1 << TGSI_FILE_BUFFER)) {
+
+   } else {
+      for (i = 0; i < ctx->num_ssbo; i++){
+         uint32_t id = ctx->ssbos[i].id;
+         sname = tgsi_proc_to_prefix(ctx->prog_type);
+         snprintf(buf, 255, "layout (binding = %d, std430) buffer %sssbo%d { uint %sssbocontents%d[]; };\n", id, sname, id, sname, id);
+         STRCAT_WITH_RET(glsl_hdr, buf);
+      }
+   }
+
    if (ctx->prog_type == TGSI_PROCESSOR_FRAGMENT &&
        ctx->key->pstipple_tex == true) {
       snprintf(buf, 255, "uniform sampler2D pstipple_sampler;\nfloat stip_temp;\n");
@@ -4012,6 +4264,10 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
    sinfo->num_consts = ctx.num_consts;
    sinfo->num_ubos = ctx.num_ubo;
    memcpy(sinfo->ubo_idx, ctx.ubo_idx, ctx.num_ubo * sizeof(*ctx.ubo_idx));
+
+   for (uint32_t i = 0; i < ctx.num_ssbo; i++)
+      sinfo->ssbo_used_mask |= (1 << ctx.ssbos[i].id);
+
    sinfo->ubo_indirect = ctx.info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT);
    if (ctx_indirect_inputs(&ctx)) {
       if (ctx.generic_input_range.used)
diff --git a/src/vrend_shader.h b/src/vrend_shader.h
index db6f135..a19da20 100644
--- a/src/vrend_shader.h
+++ b/src/vrend_shader.h
@@ -46,6 +46,7 @@ struct vrend_sampler_array {
 
 struct vrend_shader_info {
    uint32_t samplers_used_mask;
+   uint32_t ssbo_used_mask;
    int num_consts;
    int num_inputs;
    int num_interps;
-- 
2.17.1



More information about the virglrenderer-devel mailing list