[virglrenderer-devel] [PATCH 3/9] shaders: add compute shader support.
Gurchetan Singh
gurchetansingh at chromium.org
Wed Aug 1 00:58:11 UTC 2018
On Tue, Jul 31, 2018 at 3:29 PM Dave Airlie <airlied at gmail.com> wrote:
>
> From: Dave Airlie <airlied at redhat.com>
>
> This mostly consists of adding local memory support, and
> support for the compute system values.
>
> The req_local_mem need to be gotten from the host side
nit: don't you mean guest side?
> and passed into the shader compile so we can size the
> shared memory correctly.
> ---
> src/vrend_renderer.c | 2 +-
> src/vrend_shader.c | 96 ++++++++++++++++++++++++++++++++++++++++++----------
> src/vrend_shader.h | 1 +
> 3 files changed, 80 insertions(+), 19 deletions(-)
>
> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> index fc82ee1..576cecb 100644
> --- a/src/vrend_renderer.c
> +++ b/src/vrend_renderer.c
> @@ -2718,7 +2718,7 @@ static int vrend_shader_create(struct vrend_context *ctx,
>
> shader->id = glCreateShader(conv_shader_type(shader->sel->type));
> shader->compiled_fs_id = 0;
> - shader->glsl_prog = vrend_convert_shader(&ctx->shader_cfg, shader->sel->tokens, &key, &shader->sel->sinfo);
> + shader->glsl_prog = vrend_convert_shader(&ctx->shader_cfg, shader->sel->tokens, 0, &key, &shader->sel->sinfo);
> if (!shader->glsl_prog) {
> report_context_error(ctx, VIRGL_ERROR_CTX_ILLEGAL_SHADER, 0);
> glDeleteShader(shader->id);
> diff --git a/src/vrend_shader.c b/src/vrend_shader.c
> index 42ebe11..0c3a624 100644
> --- a/src/vrend_shader.c
> +++ b/src/vrend_shader.c
> @@ -167,6 +167,8 @@ struct dump_ctx {
> struct immed imm[MAX_IMMEDIATE];
> unsigned fragcoord_input;
>
> + uint32_t req_local_mem;
> +
> uint32_t num_ubo;
> int ubo_idx[32];
> int ubo_sizes[32];
> @@ -208,6 +210,8 @@ struct dump_ctx {
> int tes_spacing;
> int tes_vertex_order;
> int tes_point_mode;
> +
> + uint16_t local_cs_block_size[3];
> };
>
> static const struct vrend_shader_table shader_req_table[] = {
> @@ -267,6 +271,7 @@ struct source_info {
> uint32_t sreg_index;
> bool tg4_has_component;
> bool override_no_wm[3];
> + bool override_no_cast[3];
> };
>
> static const struct vrend_shader_table conversion_table[] =
> @@ -331,6 +336,7 @@ static inline const char *tgsi_proc_to_prefix(int shader_type)
> case TGSI_PROCESSOR_GEOMETRY: return "gs";
> case TGSI_PROCESSOR_TESS_CTRL: return "tc";
> case TGSI_PROCESSOR_TESS_EVAL: return "te";
> + case TGSI_PROCESSOR_COMPUTE: return "cs";
> default:
> return NULL;
> };
> @@ -1240,12 +1246,23 @@ iter_declaration(struct tgsi_iterate_context *iter,
> name_prefix = "gl_TessLevelOuter";
> } else if (decl->Semantic.Name == TGSI_SEMANTIC_TESSINNER) {
> name_prefix = "gl_TessLevelInner";
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_THREAD_ID) {
> + name_prefix = "gl_LocalInvocationID";
> + ctx->system_values[i].override_no_wm = false;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_BLOCK_ID) {
> + name_prefix = "gl_WorkGroupID";
> + ctx->system_values[i].override_no_wm = false;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_GRID_SIZE) {
> + name_prefix = "gl_NumWorkGroups";
> + ctx->system_values[i].override_no_wm = false;
> } else {
> fprintf(stderr, "unsupported system value %d\n", decl->Semantic.Name);
> name_prefix = "unknown";
> }
> snprintf(ctx->system_values[i].glsl_name, 64, "%s", name_prefix);
> break;
> + case TGSI_FILE_MEMORY:
> + break;
> default:
> fprintf(stderr,"unsupported file %d declaration\n", decl->Declaration.File);
> break;
> @@ -1323,6 +1340,12 @@ iter_property(struct tgsi_iterate_context *iter,
> ctx->shader_req_bits |= SHADER_REQ_IMAGE_LOAD_STORE;
> }
>
> + if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH)
> + ctx->local_cs_block_size[0] = prop->u[0].Data;
> + if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT)
> + ctx->local_cs_block_size[1] = prop->u[0].Data;
> + if (prop->Property.PropertyName == TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH)
> + ctx->local_cs_block_size[2] = prop->u[0].Data;
> return TRUE;
> }
>
> @@ -2318,6 +2341,7 @@ static enum vrend_type_qualifier get_coord_prefix(int resource, bool *is_ms)
> static int
> translate_store(struct dump_ctx *ctx,
> struct tgsi_full_instruction *inst,
> + struct source_info *sinfo,
> char srcs[4][255],
> char dsts[3][255])
> {
> @@ -2346,8 +2370,8 @@ translate_store(struct dump_ctx *ctx,
> }
> snprintf(buf, 512, "imageStore(%s,%s(floatBitsToInt(%s)),%s%s(%s));\n", dsts[0], get_string(coord_prefix), srcs[0], ms_str, get_string(stypeprefix), srcs[1]);
> EMIT_BUF_WITH_RET(ctx, buf);
> - } else if (dst->Register.File == TGSI_FILE_BUFFER) {
> - const char *conversion = get_string(FLOAT_BITS_TO_UINT);
> + } else if (dst->Register.File == TGSI_FILE_BUFFER || dst->Register.File == TGSI_FILE_MEMORY) {
> + const char *conversion = sinfo->override_no_cast[1] ? "" : get_string(FLOAT_BITS_TO_UINT);
> if (inst->Dst[0].Register.WriteMask & 0x1) {
> snprintf(buf, 255, "%s[uint(floatBitsToUint(%s))>>2] = %s(%s).x;\n", dsts[0], srcs[0], conversion, srcs[1]);
> EMIT_BUF_WITH_RET(ctx, buf);
> @@ -2402,7 +2426,8 @@ translate_load(struct dump_ctx *ctx,
> }
> snprintf(buf, 512, "%s = %s(imageLoad(%s, %s(floatBitsToInt(%s))%s)%s);\n", dsts[0], get_string(dtypeprefix), srcs[0], get_string(coord_prefix), srcs[1], ms_str, wm);
> EMIT_BUF_WITH_RET(ctx, buf);
> - } else if (src->Register.File == TGSI_FILE_BUFFER) {
> + } else if (src->Register.File == TGSI_FILE_BUFFER ||
> + src->Register.File == TGSI_FILE_MEMORY) {
> char mydst[255], atomic_op[9], atomic_src[10];
> strcpy(mydst, dsts[0]);
> char *wmp = strchr(mydst, '.');
> @@ -2566,7 +2591,8 @@ translate_atomic(struct dump_ctx *ctx,
> snprintf(buf, 512, "%s = %s(imageAtomic%s(%s, %s(floatBitsToInt(%s))%s, %s(%s(%s))%s));\n", dsts[0], get_string(dtypeprefix), opname, srcs[0], get_string(coord_prefix), srcs[1], ms_str, get_string(stypecast), get_string(stypeprefix), srcs[2], cas_str);
> EMIT_BUF_WITH_RET(ctx, buf);
> }
> - if (src->Register.File == TGSI_FILE_BUFFER) {
> + if (src->Register.File == TGSI_FILE_BUFFER ||
> + src->Register.File == TGSI_FILE_MEMORY) {
> snprintf(buf, 512, "%s = %s(atomic%s(%s[int(floatBitsToInt(%s)) >> 2], uint(%s(%s).x)%s));\n", dsts[0], get_string(dtypeprefix), opname, srcs[0], srcs[1], get_string(stypeprefix), srcs[2], cas_str);
> EMIT_BUF_WITH_RET(ctx, buf);
> }
> @@ -2758,6 +2784,8 @@ get_destination_info(struct dump_ctx *ctx,
> snprintf(dsts[i], 255, "%sssboarr%s[%d].%sssbocontents%d", cname, atomic_str, dst_reg->Register.Index - base, cname, base);
> } else
> snprintf(dsts[i], 255, "%sssbocontents%d", cname, dst_reg->Register.Index);
> + } else if (dst_reg->Register.File == TGSI_FILE_MEMORY) {
> + snprintf(dsts[i], 255, "values");
> } else if (dst_reg->Register.File == TGSI_FILE_ADDRESS) {
> snprintf(dsts[i], 255, "addr%d", dst_reg->Register.Index);
> }
> @@ -2836,6 +2864,7 @@ get_source_info(struct dump_ctx *ctx,
> boolean isfloatabsolute = src->Register.Absolute && stype != TGSI_TYPE_DOUBLE;
>
> sinfo->override_no_wm[i] = false;
> + sinfo->override_no_cast[i] = false;
> if (isfloatabsolute)
> swizzle[swz_idx++] = ')';
>
> @@ -3040,6 +3069,9 @@ get_source_info(struct dump_ctx *ctx,
> snprintf(srcs[i], 255, "%sssbocontents%d%s", cname, src->Register.Index, swizzle);
> }
> sinfo->sreg_index = src->Register.Index;
> + } else if (src->Register.File == TGSI_FILE_MEMORY) {
> + snprintf(srcs[i], 255, "values");
> + sinfo->sreg_index = src->Register.Index;
> } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
> if (src->Register.Index >= (int)ARRAY_SIZE(ctx->imm)) {
> fprintf(stderr, "Immediate exceeded, max is %lu\n", ARRAY_SIZE(ctx->imm));
> @@ -3159,6 +3191,15 @@ get_source_info(struct dump_ctx *ctx,
> ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleY),
> ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleZ),
> ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleW));
> + } else if (ctx->system_values[j].name == TGSI_SEMANTIC_GRID_SIZE ||
> + ctx->system_values[j].name == TGSI_SEMANTIC_THREAD_ID ||
> + ctx->system_values[j].name == TGSI_SEMANTIC_BLOCK_ID) {
> + snprintf(srcs[i], 255, "uvec4(%s.%c, %s.%c, %s.%c, %s.%c)",
> + ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleX),
> + ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleY),
> + ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleZ),
> + ctx->system_values[j].glsl_name, get_swiz_char(src->Register.SwizzleW));
> + sinfo->override_no_cast[i] = true;
> } else if (ctx->system_values[j].name == TGSI_SEMANTIC_SAMPLEMASK) {
> snprintf(srcs[i], 255, "ivec4(%s, %s, %s, %s)",
> src->Register.SwizzleX == TGSI_SWIZZLE_X ? ctx->system_values[j].glsl_name : "0",
> @@ -3813,7 +3854,7 @@ iter_instruction(struct tgsi_iterate_context *iter,
> break;
> }
> case TGSI_OPCODE_STORE:
> - ret = translate_store(ctx, inst, srcs, dsts);
> + ret = translate_store(ctx, inst, &sinfo, srcs, dsts);
> if (ret)
> return FALSE;
> break;
> @@ -3893,19 +3934,24 @@ static char *emit_header(struct dump_ctx *ctx, char *glsl_hdr)
> STRCAT_WITH_RET(glsl_hdr, "precision highp int;\n");
> } else {
> char buf[128];
> - if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
> - ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL ||
> - ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
> - ctx->glsl_ver_required == 150)
> - STRCAT_WITH_RET(glsl_hdr, "#version 150\n");
> - else if (ctx->glsl_ver_required == 140)
> - STRCAT_WITH_RET(glsl_hdr, "#version 140\n");
> - else
> - STRCAT_WITH_RET(glsl_hdr, "#version 130\n");
> - if (ctx->prog_type == TGSI_PROCESSOR_VERTEX ||
> - ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
> - ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
> - PAD_GPU_SHADER5(glsl_hdr);
> + if (ctx->prog_type == TGSI_PROCESSOR_COMPUTE) {
> + STRCAT_WITH_RET(glsl_hdr, "#version 330\n");
> + STRCAT_WITH_RET(glsl_hdr, "#extension GL_ARB_compute_shader : require\n");
> + } else {
> + if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
> + ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL ||
> + ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
> + ctx->glsl_ver_required == 150)
> + STRCAT_WITH_RET(glsl_hdr, "#version 150\n");
> + else if (ctx->glsl_ver_required == 140)
> + STRCAT_WITH_RET(glsl_hdr, "#version 140\n");
> + else
> + STRCAT_WITH_RET(glsl_hdr, "#version 130\n");
> + if (ctx->prog_type == TGSI_PROCESSOR_VERTEX ||
> + ctx->prog_type == TGSI_PROCESSOR_GEOMETRY ||
> + ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
> + PAD_GPU_SHADER5(glsl_hdr);
> + }
>
> if (ctx->prog_type == TGSI_PROCESSOR_TESS_CTRL ||
> ctx->prog_type == TGSI_PROCESSOR_TESS_EVAL)
> @@ -4209,6 +4255,18 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> STRCAT_WITH_RET(glsl_hdr, buf);
> }
> }
> +
> + if (ctx->prog_type == TGSI_PROCESSOR_COMPUTE) {
> + snprintf(buf, 255, "layout (local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n",
> + ctx->local_cs_block_size[0], ctx->local_cs_block_size[1], ctx->local_cs_block_size[2]);
> + STRCAT_WITH_RET(glsl_hdr, buf);
> +
> + if (ctx->req_local_mem) {
> + snprintf(buf, 255, "shared uint values[%d];\n", ctx->req_local_mem / 4);
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + }
> + }
> +
> if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY) {
> char invocbuf[25];
>
> @@ -4709,6 +4767,7 @@ static boolean fill_interpolants(struct dump_ctx *ctx, struct vrend_shader_info
>
> char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> const struct tgsi_token *tokens,
> + uint32_t req_local_mem,
> struct vrend_shader_key *key,
> struct vrend_shader_info *sinfo)
> {
> @@ -4734,6 +4793,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> ctx.ssbo_array_base = 0xffffffff;
> ctx.ssbo_atomic_array_base = 0xffffffff;
> ctx.has_sample_input = false;
> + ctx.req_local_mem = req_local_mem;
> tgsi_scan_shader(tokens, &ctx.info);
> /* if we are in core profile mode we should use GLSL 1.40 */
> if (cfg->use_core_profile && cfg->glsl_version >= 140)
> diff --git a/src/vrend_shader.h b/src/vrend_shader.h
> index 04e92e6..bcca3e3 100644
> --- a/src/vrend_shader.h
> +++ b/src/vrend_shader.h
> @@ -119,6 +119,7 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg,
>
> char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> const struct tgsi_token *tokens,
> + uint32_t req_local_mem,
> struct vrend_shader_key *key,
> struct vrend_shader_info *sinfo);
> const char *vrend_shader_samplertypeconv(int sampler_type, int *is_shad);
> --
> 2.14.3
>
> _______________________________________________
> virglrenderer-devel mailing list
> virglrenderer-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/virglrenderer-devel
More information about the virglrenderer-devel
mailing list