[virglrenderer-devel] [PATCH 4/4] renderer: add ARB_gpu_shader5 support.
Gurchetan Singh
gurchetansingh at chromium.org
Thu May 10 22:29:42 UTC 2018
This patch doesn't apply cleanly to ToT ("shader: Invert y coordonate
if on gles host"). Rebase?
On Wed, May 9, 2018 at 8:44 PM, Dave Airlie <airlied at gmail.com> wrote:
> ---
> src/gallium/include/pipe/p_state.h | 1 +
> src/vrend_decode.c | 4 +-
> src/vrend_renderer.c | 35 ++-
> src/vrend_shader.c | 496 +++++++++++++++++++++++++++++++------
> src/vrend_shader.h | 16 ++
> 5 files changed, 472 insertions(+), 80 deletions(-)
>
> diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
> index 2b4d9e4..80be1b5 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -206,6 +206,7 @@ struct pipe_stream_output_info
> unsigned num_components:3; /** 1 to 4 */
> unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
> unsigned dst_offset:16; /**< offset into the buffer in dwords */
> + unsigned stream:2;
> } output[PIPE_MAX_SO_OUTPUTS];
> };
>
> diff --git a/src/vrend_decode.c b/src/vrend_decode.c
> index 5f62568..1e31573 100644
> --- a/src/vrend_decode.c
> +++ b/src/vrend_decode.c
> @@ -97,6 +97,8 @@ static int vrend_decode_create_shader(struct vrend_decode_ctx *ctx,
> so_info.output[i].num_components = (tmp >> 10) & 0x7;
> so_info.output[i].output_buffer = (tmp >> 13) & 0x7;
> so_info.output[i].dst_offset = (tmp >> 16) & 0xffff;
> + tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i));
> + so_info.output[i].stream = (tmp & 0x3);
> }
> }
> shader_offset += 4 + (2 * num_so_outputs);
> @@ -659,7 +661,7 @@ static int vrend_decode_create_query(struct vrend_decode_ctx *ctx, uint32_t hand
>
> tmp = get_buf_entry(ctx, VIRGL_OBJ_QUERY_TYPE_INDEX);
> query_type = VIRGL_OBJ_QUERY_TYPE(tmp);
> - query_index = VIRGL_OBJ_QUERY_INDEX(tmp);
> + query_index = (tmp >> 16) & 0xffff;
>
> offset = get_buf_entry(ctx, VIRGL_OBJ_QUERY_OFFSET);
> res_handle = get_buf_entry(ctx, VIRGL_OBJ_QUERY_RES_HANDLE);
> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> index 26bf639..00b7a8c 100644
> --- a/src/vrend_renderer.c
> +++ b/src/vrend_renderer.c
> @@ -627,6 +627,7 @@ static void vrend_destroy_shader_selector(struct vrend_shader_selector *sel)
> free(sel->tmp_buf);
> free(sel->sinfo.so_names);
> free(sel->sinfo.interpinfo);
> + free(sel->sinfo.sampler_arrays);
> free(sel->tokens);
> free(sel);
> }
> @@ -771,13 +772,14 @@ static void dump_stream_out(struct pipe_stream_output_info *so)
> printf("\n");
> printf("outputs:\n");
> for (i = 0; i < so->num_outputs; i++) {
> - printf("\t%d: reg: %d sc: %d, nc: %d ob: %d do: %d\n",
> + printf("\t%d: reg: %d sc: %d, nc: %d ob: %d do: %d st: %d\n",
> i,
> so->output[i].register_index,
> so->output[i].start_component,
> so->output[i].num_components,
> so->output[i].output_buffer,
> - so->output[i].dst_offset);
> + so->output[i].dst_offset,
> + so->output[i].stream);
> }
> }
>
> @@ -870,7 +872,7 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
> struct vrend_shader *gs)
> {
> struct vrend_linked_shader_program *sprog = CALLOC_STRUCT(vrend_linked_shader_program);
> - char name[32];
> + char name[64];
> int i;
> GLuint prog_id;
> GLint lret;
> @@ -993,7 +995,11 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
> index = 0;
> while(mask) {
> i = u_bit_scan(&mask);
> - snprintf(name, 32, "%ssamp%d", prefix, i);
> + if (sprog->ss[id]->sel->sinfo.num_sampler_arrays) {
> + int arr_idx = shader_lookup_sampler_array(&sprog->ss[id]->sel->sinfo, i);
> + snprintf(name, 32, "%ssamp%d[%d]", prefix, arr_idx, i - sprog->ss[id]->sel->sinfo.sampler_arrays[arr_idx].first);
> + } else
> + snprintf(name, 32, "%ssamp%d", prefix, i);
> sprog->samp_locs[id][index] = glGetUniformLocation(prog_id, name);
> if (sprog->ss[id]->sel->sinfo.shadow_samp_mask & (1 << i)) {
> snprintf(name, 32, "%sshadmask%d", prefix, i);
> @@ -1046,7 +1052,11 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
>
> sprog->ubo_locs[id] = calloc(sprog->ss[id]->sel->sinfo.num_ubos, sizeof(uint32_t));
> for (i = 0; i < sprog->ss[id]->sel->sinfo.num_ubos; i++) {
> - snprintf(name, 32, "%subo%d", prefix, i + 1);
> + if (sprog->ss[id]->sel->sinfo.ubo_indirect)
> + snprintf(name, 32, "%subo[%d]", prefix, i);
> + else
> + snprintf(name, 32, "%subo%d", prefix, i + 1);
> +
> sprog->ubo_locs[id][i] = glGetUniformBlockIndex(prog_id, name);
> }
> } else
> @@ -6321,7 +6331,6 @@ int vrend_create_query(struct vrend_context *ctx, uint32_t handle,
> q->type = query_type;
> q->index = query_index;
> q->ctx_id = ctx->ctx_id;
> -
> vrend_resource_reference(&q->res, res);
>
> switch (q->type) {
> @@ -6384,7 +6393,10 @@ void vrend_begin_query(struct vrend_context *ctx, uint32_t handle)
> if (q->gltype == GL_TIMESTAMP)
> return;
>
> - glBeginQuery(q->gltype, q->id);
> + if (q->index > 0)
> + glBeginQueryIndexed(q->gltype, q->index, q->id);
> + else
> + glBeginQuery(q->gltype, q->id);
> }
>
> void vrend_end_query(struct vrend_context *ctx, uint32_t handle)
> @@ -6406,7 +6418,10 @@ void vrend_end_query(struct vrend_context *ctx, uint32_t handle)
> return;
> }
>
> - glEndQuery(q->gltype);
> + if (q->index > 0)
> + glEndQueryIndexed(q->gltype, q->index);
> + else
> + glEndQuery(q->gltype);
> }
>
> void vrend_get_query_result(struct vrend_context *ctx, uint32_t handle,
> @@ -6581,8 +6596,10 @@ bool vrend_renderer_fill_caps_common(uint32_t set, uint32_t version,
> caps->v1.glsl_level = 140;
> else if (gl_ver == 32)
> caps->v1.glsl_level = 150;
> - else if (gl_ver >= 33)
> + else if (gl_ver == 33)
> caps->v1.glsl_level = 330;
> + else if (gl_ver >= 40)
> + caps->v1.glsl_level = 400;
> } else {
> caps->v1.glsl_level = 130;
> }
> diff --git a/src/vrend_shader.c b/src/vrend_shader.c
> index d58b943..668d187 100644
> --- a/src/vrend_shader.c
> +++ b/src/vrend_shader.c
> @@ -37,7 +37,7 @@ extern int vrend_dump_shaders;
>
> /* start convert of tgsi to glsl */
>
> -#define INTERP_PREFIX " "
> +#define INTERP_PREFIX " "
> #define INVARI_PREFIX "invariant"
>
> struct vrend_shader_io {
> @@ -55,6 +55,7 @@ struct vrend_shader_io {
> bool override_no_wm;
> bool is_int;
> char glsl_name[64];
> + unsigned stream;
> };
>
> struct vrend_shader_sampler {
> @@ -101,8 +102,13 @@ struct dump_ctx {
>
> struct vrend_shader_sampler samplers[32];
> uint32_t samplers_used;
> - int num_consts;
> + bool sviews_used;
> +
> + struct vrend_sampler_array *sampler_arrays;
> + int num_sampler_arrays;
> + int last_sampler_array_idx;
>
> + int num_consts;
> int num_imm;
> struct immed imm[MAX_IMMEDIATE];
> unsigned fragcoord_input;
> @@ -151,6 +157,8 @@ struct dump_ctx {
> bool vs_has_pervertex;
> bool uses_sample_shading;
> bool uses_gpu_shader5;
> + bool write_mul_temp;
> + bool write_interp_temp;
> };
>
> static inline const char *tgsi_proc_to_prefix(int shader_type)
> @@ -247,6 +255,46 @@ static struct vrend_temp_range *find_temp_range(struct dump_ctx *ctx, int index)
> return NULL;
> }
>
> +static int add_sampler_array(struct dump_ctx *ctx, int first, int last, int sview_type, int sview_rtype)
> +{
> + int idx = ctx->num_sampler_arrays;
> + ctx->num_sampler_arrays++;
> + ctx->sampler_arrays = realloc(ctx->sampler_arrays, sizeof(struct vrend_sampler_array) * ctx->num_sampler_arrays);
> + if (!ctx->sampler_arrays)
> + return -1;
> +
> + ctx->sampler_arrays[idx].first = first;
> + ctx->sampler_arrays[idx].last = last;
> + ctx->sampler_arrays[idx].idx = idx;
> + ctx->sampler_arrays[idx].sview_type = sview_type;
> + ctx->sampler_arrays[idx].sview_rtype = sview_rtype;
> + return idx;
> +}
> +
> +int lookup_sampler_array(struct dump_ctx *ctx, int index)
> +{
> + int i;
> + for (i = 0; i < ctx->num_sampler_arrays; i++) {
> + if (index >= ctx->sampler_arrays[i].first &&
> + index <= ctx->sampler_arrays[i].last) {
> + return ctx->sampler_arrays[i].idx;
> + }
> + }
> + return -1;
> +}
> +
> +int shader_lookup_sampler_array(struct vrend_shader_info *sinfo, int index)
> +{
> + int i;
> + for (i = 0; i < sinfo->num_sampler_arrays; i++) {
> + if (index >= sinfo->sampler_arrays[i].first &&
> + index <= sinfo->sampler_arrays[i].last) {
> + return sinfo->sampler_arrays[i].idx;
> + }
> + }
> + return -1;
> +}
> +
> static boolean
> iter_declaration(struct tgsi_iterate_context *iter,
> struct tgsi_full_declaration *decl )
> @@ -270,6 +318,7 @@ iter_declaration(struct tgsi_iterate_context *iter,
> ctx->inputs[i].name = decl->Semantic.Name;
> ctx->inputs[i].sid = decl->Semantic.Index;
> ctx->inputs[i].interpolate = decl->Interp.Interpolate;
> + ctx->inputs[i].centroid = decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
> ctx->inputs[i].first = decl->Range.First;
> ctx->inputs[i].glsl_predefined_no_emit = false;
> ctx->inputs[i].glsl_no_index = false;
> @@ -656,6 +705,19 @@ iter_declaration(struct tgsi_iterate_context *iter,
> return FALSE;
> }
> ctx->samplers[decl->Range.First].tgsi_sampler_return = decl->SamplerView.ReturnTypeX;
> + if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> + if (ctx->last_sampler_array_idx != -1) {
> + if (ctx->sampler_arrays[ctx->last_sampler_array_idx].sview_type == decl->SamplerView.Resource &&
> + ctx->sampler_arrays[ctx->last_sampler_array_idx].sview_rtype == decl->SamplerView.ReturnTypeX) {
> + ctx->sampler_arrays[ctx->last_sampler_array_idx].last = decl->Range.Last + 1;
> + } else {
> + ctx->last_sampler_array_idx = add_sampler_array(ctx, decl->Range.First, decl->Range.Last + 1, decl->SamplerView.Resource, decl->SamplerView.ReturnTypeX);
> + }
> + } else {
> + ctx->last_sampler_array_idx = add_sampler_array(ctx, decl->Range.First, decl->Range.Last + 1, decl->SamplerView.Resource, decl->SamplerView.ReturnTypeX);
> + }
> + } else
> + ctx->sviews_used = true;
> break;
> case TGSI_FILE_CONSTANT:
> if (decl->Declaration.Dimension) {
> @@ -675,7 +737,7 @@ iter_declaration(struct tgsi_iterate_context *iter,
> }
> break;
> case TGSI_FILE_ADDRESS:
> - ctx->num_address = 1;
> + ctx->num_address = decl->Range.Last + 1;
> break;
> case TGSI_FILE_SYSTEM_VALUE:
> i = ctx->num_system_values++;
> @@ -707,6 +769,20 @@ iter_declaration(struct tgsi_iterate_context *iter,
> name_prefix = "gl_InvocationID";
> ctx->has_ints = true;
> ctx->uses_gpu_shader5 = true;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
> + name_prefix = "gl_SampleMaskIn[0]";
> + ctx->has_ints = true;
> + ctx->uses_gpu_shader5 = true;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_PRIMID) {
> + name_prefix = "gl_PrimitiveID";
> + ctx->has_ints = true;
> + ctx->uses_gpu_shader5 = true;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) {
> + name_prefix = "gl_TessCoord";
> + ctx->system_values[i].override_no_wm = false;
> + } else if (decl->Semantic.Name == TGSI_SEMANTIC_VERTICESIN) {
> + name_prefix = "gl_PatchVerticesIn";
> + ctx->system_values[i].override_no_wm = false;
> } else {
> fprintf(stderr, "unsupported system value %d\n", decl->Semantic.Name);
> name_prefix = "unknown";
> @@ -927,6 +1003,29 @@ static int emit_prescale(struct dump_ctx *ctx)
> return 0;
> }
>
> +static int prepare_so_movs(struct dump_ctx *ctx)
> +{
> + int i;
> + for (i = 0; i < ctx->so->num_outputs; i++) {
> + ctx->write_so_outputs[i] = true;
> + if (ctx->so->output[i].start_component != 0)
> + continue;
> + if (ctx->so->output[i].num_components != 4)
> + continue;
> + if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPDIST)
> + continue;
> + if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_POSITION)
> + continue;
> +
> + ctx->outputs[ctx->so->output[i].register_index].stream = ctx->so->output[i].stream;
> + if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY && ctx->so->output[i].stream)
> + ctx->uses_gpu_shader5 = true;
> +
> + ctx->write_so_outputs[i] = false;
> + }
> + return 0;
> +}
> +
> static int emit_so_movs(struct dump_ctx *ctx)
> {
> char buf[255];
> @@ -957,7 +1056,7 @@ static int emit_so_movs(struct dump_ctx *ctx)
> } else
> writemask[0] = 0;
>
> - if (ctx->so->output[i].num_components == 4 && writemask[0] == 0 && !(ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPDIST) && !(ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_POSITION)) {
> + if (!ctx->write_so_outputs[i]) {
> if (ctx->so->output[i].register_index > ctx->num_outputs)
> ctx->so_names[i] = NULL;
> else if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPVERTEX && ctx->has_clipvertex) {
> @@ -965,13 +1064,10 @@ static int emit_so_movs(struct dump_ctx *ctx)
> ctx->has_clipvertex_so = true;
> } else
> ctx->so_names[i] = strdup(ctx->outputs[ctx->so->output[i].register_index].glsl_name);
> - ctx->write_so_outputs[i] = false;
> -
> } else {
> char ntemp[8];
> snprintf(ntemp, 8, "tfout%d", i);
> ctx->so_names[i] = strdup(ntemp);
> - ctx->write_so_outputs[i] = true;
> }
> if (ctx->so->output[i].num_components == 1) {
> if (ctx->outputs[ctx->so->output[i].register_index].is_int)
> @@ -1121,7 +1217,8 @@ static int translate_tex(struct dump_ctx *ctx,
> char dsts[3][255],
> const char *writemask,
> const char *dstconv,
> - bool dst0_override_no_wm)
> + bool dst0_override_no_wm,
> + bool tg4_has_component)
> {
> const char *twm = "", *gwm = NULL, *txfi;
> const char *dtypeprefix = "";
> @@ -1318,7 +1415,9 @@ static int translate_tex(struct dump_ctx *ctx,
> case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
> case TGSI_TEXTURE_CUBE_ARRAY:
> default:
> - if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && inst->Texture.Texture != TGSI_TEXTURE_CUBE_ARRAY)
> + if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
> + inst->Texture.Texture != TGSI_TEXTURE_CUBE_ARRAY
> + && inst->Texture.Texture != TGSI_TEXTURE_SHADOWCUBE_ARRAY)
> twm = ".xyz";
> else
> twm = "";
> @@ -1376,10 +1475,9 @@ static int translate_tex(struct dump_ctx *ctx,
> snprintf(bias, 128, ", %s%s, %s%s", srcs[1], gwm, srcs[2], gwm);
> sampler_index = 3;
> } else if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
> -
> sampler_index = 2;
> ctx->uses_tg4 = true;
> - if (inst->Texture.NumOffsets > 1 || is_shad)
> + if (inst->Texture.NumOffsets > 1 || is_shad || ctx->uses_sampler_rect)
> ctx->uses_gpu_shader5 = true;
> if (inst->Texture.NumOffsets == 1) {
> if (inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE)
> @@ -1387,11 +1485,26 @@ static int translate_tex(struct dump_ctx *ctx,
> }
> if (is_shad) {
> if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
> - inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
> - inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
> + inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
> snprintf(bias, 64, ", %s.w", srcs[0]);
> + else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
> + snprintf(bias, 64, ", %s.x", srcs[1]);
> else
> snprintf(bias, 64, ", %s.z", srcs[0]);
> + } else if (tg4_has_component) {
> + if (inst->Texture.NumOffsets == 0) {
> + if (inst->Texture.Texture == TGSI_TEXTURE_2D ||
> + inst->Texture.Texture == TGSI_TEXTURE_RECT ||
> + inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
> + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
> + inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY)
> + snprintf(bias, 64, ", int(%s)", srcs[1]);
> + } else if (inst->Texture.NumOffsets) {
> + if (inst->Texture.Texture == TGSI_TEXTURE_2D ||
> + inst->Texture.Texture == TGSI_TEXTURE_RECT ||
> + inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
> + snprintf(bias, 64, ", int(%s)", srcs[1]);
> + }
> }
> } else
> bias[0] = 0;
> @@ -1460,13 +1573,16 @@ static int translate_tex(struct dump_ctx *ctx,
> return false;
> }
> } else if (inst->TexOffsets[0].File == TGSI_FILE_TEMPORARY) {
> + struct vrend_temp_range *range = find_temp_range(ctx, inst->TexOffsets[0].Index);
> + int idx = inst->TexOffsets[0].Index - range->first;
> switch (inst->Texture.Texture) {
> case TGSI_TEXTURE_1D:
> case TGSI_TEXTURE_1D_ARRAY:
> case TGSI_TEXTURE_SHADOW1D:
> case TGSI_TEXTURE_SHADOW1D_ARRAY:
> - snprintf(offbuf, 120, ", int(floatBitsToInt(temps[%d].%c))",
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX));
> + snprintf(offbuf, 120, ", int(floatBitsToInt(temp%d[%d].%c))",
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleX));
> break;
> case TGSI_TEXTURE_RECT:
> case TGSI_TEXTURE_SHADOWRECT:
> @@ -1474,15 +1590,20 @@ static int translate_tex(struct dump_ctx *ctx,
> case TGSI_TEXTURE_2D_ARRAY:
> case TGSI_TEXTURE_SHADOW2D:
> case TGSI_TEXTURE_SHADOW2D_ARRAY:
> - snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c))",
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX),
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleY));
> + snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c))",
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleX),
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleY));
> break;
> case TGSI_TEXTURE_3D:
> - snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c)",
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX),
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleY),
> - inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleZ));
> + snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c)",
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleX),
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleY),
> + range->first, idx,
> + get_swiz_char(inst->TexOffsets[0].SwizzleZ));
> break;
> default:
> fprintf(stderr, "unhandled texture: %x\n", inst->Texture.Texture);
> @@ -1490,12 +1611,11 @@ static int translate_tex(struct dump_ctx *ctx,
> break;
> }
> }
> - if (inst->Instruction.Opcode == TGSI_OPCODE_TXL || inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
> + if (inst->Instruction.Opcode == TGSI_OPCODE_TXL || inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || inst->Instruction.Opcode == TGSI_OPCODE_TXD || (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && is_shad)) {
> char tmp[128];
> strcpy(tmp, offbuf);
> strcpy(offbuf, bias);
> strcpy(bias, tmp);
> -
> }
> }
> if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
> @@ -1591,8 +1711,9 @@ iter_instruction(struct tgsi_iterate_context *iter,
> bool override_no_wm[4];
> bool dst_override_no_wm[2];
> char *sret;
> + char interpSrc0[255], interpSwizzle0[10];
> int ret;
> -
> + bool tg4_has_component = false;
> if (ctx->prog_type == -1)
> ctx->prog_type = iter->processor.Processor;
> if (dtype == TGSI_TYPE_SIGNED || dtype == TGSI_TYPE_UNSIGNED ||
> @@ -1638,6 +1759,8 @@ iter_instruction(struct tgsi_iterate_context *iter,
> if (ret)
> return FALSE;
> }
> + if (ctx->so)
> + prepare_so_movs(ctx);
> }
> for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
> const struct tgsi_full_dst_register *dst = &inst->Dst[i];
> @@ -1709,10 +1832,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
> if (!range)
> return FALSE;
> if (dst->Register.Indirect) {
> - snprintf(dsts[i], 255, "temp%d[addr0 + %d]%s", range->first, dst->Register.Index - range->first, writemask);
> + assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
> + snprintf(dsts[i], 255, "temp%d[addr%d + %d]%s", range->first, dst->Indirect.Index, dst->Register.Index - range->first, writemask);
> } else
> snprintf(dsts[i], 255, "temp%d[%d]%s", range->first, dst->Register.Index - range->first, writemask);
> }
> + else if (dst->Register.File == TGSI_FILE_ADDRESS) {
> + snprintf(dsts[i], 255, "addr%d", dst->Register.Index);
> + }
> }
>
> for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
> @@ -1732,8 +1859,13 @@ iter_instruction(struct tgsi_iterate_context *iter,
> if (isabsolute)
> strcpy(&prefix[pre_idx++], "abs(");
>
> - if (src->Register.Dimension)
> - sprintf(arrayname, "[%d]", src->Dimension.Index);
> + if (src->Register.Dimension) {
> + if (src->Dimension.Indirect) {
> + assert(src->DimIndirect.File == TGSI_FILE_ADDRESS);
> + sprintf(arrayname, "[addr%d]", src->DimIndirect.Index);
> + } else
> + sprintf(arrayname, "[%d]", src->Dimension.Index);
> + }
>
> if (src->Register.SwizzleX != TGSI_SWIZZLE_X ||
> src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
> @@ -1769,8 +1901,18 @@ iter_instruction(struct tgsi_iterate_context *iter,
> if (stype == TGSI_TYPE_UNSIGNED &&
> ctx->inputs[j].is_int)
> srcstypeprefix = "";
> - snprintf(srcs[i], 255, "%s(%s%s%s%s)",
> - srcstypeprefix, prefix, ctx->inputs[j].glsl_name, arrayname, ctx->inputs[j].is_int ? "" : swizzle);
> +
> + if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1) {
> + snprintf(srcs[i], 255, "floatBitsToInt(%s%s%s%s)", prefix, ctx->inputs[j].glsl_name, arrayname, swizzle);
> + } else
> + snprintf(srcs[i], 255, "%s(%s%s%s%s)", srcstypeprefix, prefix, ctx->inputs[j].glsl_name, arrayname, ctx->inputs[j].is_int ? "" : swizzle);
> + }
> + if ((inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
> + inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
> + inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) &&
> + i == 0) {
> + snprintf(interpSrc0, 255, "%s", ctx->inputs[j].glsl_name);
> + snprintf(interpSwizzle0, 10, "%s", swizzle);
> }
> override_no_wm[i] = ctx->inputs[j].override_no_wm;
> break;
> @@ -1780,8 +1922,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
> struct vrend_temp_range *range = find_temp_range(ctx, src->Register.Index);
> if (!range)
> return FALSE;
> + if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1) {
> + stprefix = true;
> + stypeprefix = "floatBitsToInt";
> + }
> +
> if (src->Register.Indirect) {
> - snprintf(srcs[i], 255, "%s%c%stemp%d[addr0 + %d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
> + assert(src->Indirect.File == TGSI_FILE_ADDRESS);
> + snprintf(srcs[i], 255, "%s%c%stemp%d[addr%d + %d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Indirect.Index, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
> } else
> snprintf(srcs[i], 255, "%s%c%stemp%d[%d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
> } else if (src->Register.File == TGSI_FILE_CONSTANT) {
> @@ -1789,14 +1937,33 @@ iter_instruction(struct tgsi_iterate_context *iter,
> int dim = 0;
> if (src->Register.Dimension) {
> dim = src->Dimension.Index;
> - if (src->Register.Indirect) {
> - snprintf(srcs[i], 255, "%s(%s%subo%dcontents[addr0 + %d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> - } else
> - snprintf(srcs[i], 255, "%s(%s%subo%dcontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> + if (src->Dimension.Indirect) {
> + assert(src->DimIndirect.File == TGSI_FILE_ADDRESS);
> + ctx->uses_gpu_shader5 = true;
> + if (src->Register.Indirect) {
> + assert(src->Indirect.File == TGSI_FILE_ADDRESS);
> + snprintf(srcs[i], 255, "%s(%s%suboarr[addr%d].ubocontents[addr%d + %d]%s)", stypeprefix, prefix, cname, src->DimIndirect.Index, src->Indirect.Index, src->Register.Index, swizzle);
> + } else
> + snprintf(srcs[i], 255, "%s(%s%suboarr[addr%d].ubocontents[%d]%s)", stypeprefix, prefix, cname, src->DimIndirect.Index, src->Register.Index, swizzle);
> + } else {
> + if (ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)) {
> + if (src->Register.Indirect) {
> + snprintf(srcs[i], 255, "%s(%s%suboarr[%d].ubocontents[addr%d + %d]%s)", stypeprefix, prefix, cname, dim, src->Indirect.Index, src->Register.Index, swizzle);
> + } else
> + snprintf(srcs[i], 255, "%s(%s%suboarr[%d].ubocontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> + } else {
> + if (src->Register.Indirect) {
> + snprintf(srcs[i], 255, "%s(%s%subo%dcontents[addr0 + %d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> + } else
> + snprintf(srcs[i], 255, "%s(%s%subo%dcontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> + }
> + }
> } else {
> const char *csp;
> ctx->has_ints = true;
> - if (stype == TGSI_TYPE_FLOAT || stype == TGSI_TYPE_UNTYPED)
> + if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1)
> + csp = "ivec4";
> + else if (stype == TGSI_TYPE_FLOAT || stype == TGSI_TYPE_UNTYPED)
> csp = "uintBitsToFloat";
> else if (stype == TGSI_TYPE_SIGNED)
> csp = "ivec4";
> @@ -1810,7 +1977,17 @@ iter_instruction(struct tgsi_iterate_context *iter,
> }
> } else if (src->Register.File == TGSI_FILE_SAMPLER) {
> const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> - snprintf(srcs[i], 255, "%ssamp%d%s", cname, src->Register.Index, swizzle);
> + if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> + int arr_idx = lookup_sampler_array(ctx, src->Register.Index);
> + if (src->Register.Indirect) {
> +
> + snprintf(srcs[i], 255, "%ssamp%d[addr%d+%d]%s", cname, arr_idx, src->Indirect.Index, src->Register.Index - ctx->sampler_arrays[arr_idx].first, swizzle);
> + } else {
> + snprintf(srcs[i], 255, "%ssamp%d[%d]%s", cname, arr_idx, src->Register.Index - ctx->sampler_arrays[arr_idx].first, swizzle);
> + }
> + } else {
> + snprintf(srcs[i], 255, "%ssamp%d%s", cname, src->Register.Index, swizzle);
> + }
> sreg_index = src->Register.Index;
> } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
> if (src->Register.Index >= ARRAY_SIZE(ctx->imm)) {
> @@ -1823,6 +2000,10 @@ iter_instruction(struct tgsi_iterate_context *iter,
> const char *vtype = "vec4";
> const char *imm_stypeprefix = stypeprefix;
>
> + if ((inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1) ||
> + (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1))
> + stype = TGSI_TYPE_SIGNED;
> +
> if (imd->type == TGSI_IMM_UINT32 || imd->type == TGSI_IMM_INT32) {
> if (imd->type == TGSI_IMM_UINT32)
> vtype = "uvec4";
> @@ -1853,6 +2034,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
> idx = src->Register.SwizzleZ;
> else if (j == 3)
> idx = src->Register.SwizzleW;
> +
> + if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1 && j == 0) {
> + if (imd->val[idx].ui > 0) {
> + tg4_has_component = true;
> + ctx->uses_gpu_shader5 = true;
> + }
> + }
> +
> switch (imd->type) {
> case TGSI_IMM_FLOAT32:
> if (isinf(imd->val[idx].f) || isnan(imd->val[idx].f)) {
> @@ -2134,7 +2323,7 @@ iter_instruction(struct tgsi_iterate_context *iter,
> case TGSI_OPCODE_TXP:
> case TGSI_OPCODE_TXQ:
> case TGSI_OPCODE_LODQ:
> - ret = translate_tex(ctx, inst, sreg_index, srcs, dsts, writemask, dstconv, dst_override_no_wm[0]);
> + ret = translate_tex(ctx, inst, sreg_index, srcs, dsts, writemask, dstconv, dst_override_no_wm[0], tg4_has_component);
> if (ret)
> return FALSE;
> break;
> @@ -2235,11 +2424,11 @@ iter_instruction(struct tgsi_iterate_context *iter,
> EMIT_BUF_WITH_RET(ctx, "return;\n");
> break;
> case TGSI_OPCODE_ARL:
> - snprintf(buf, 255, "addr0 = int(floor(%s)%s);\n", srcs[0], writemask);
> + snprintf(buf, 255, "%s = int(floor(%s)%s);\n", dsts[0], srcs[0], writemask);
> EMIT_BUF_WITH_RET(ctx, buf);
> break;
> case TGSI_OPCODE_UARL:
> - snprintf(buf, 255, "addr0 = int(%s);\n", srcs[0]);
> + snprintf(buf, 255, "%s = int(%s);\n", dsts[0], srcs[0]);
> EMIT_BUF_WITH_RET(ctx, buf);
> break;
> case TGSI_OPCODE_XPD:
> @@ -2260,7 +2449,8 @@ iter_instruction(struct tgsi_iterate_context *iter,
> snprintf(buf, 255, "break;\n");
> EMIT_BUF_WITH_RET(ctx, buf);
> break;
> - case TGSI_OPCODE_EMIT:
> + case TGSI_OPCODE_EMIT: {
> + struct immed *imd = &ctx->imm[(inst->Src[0].Register.Index)];
> if (ctx->so && ctx->key->gs_present) {
> emit_so_movs(ctx);
> }
> @@ -2270,12 +2460,103 @@ iter_instruction(struct tgsi_iterate_context *iter,
> ret = emit_prescale(ctx);
> if (ret)
> return FALSE;
> - snprintf(buf, 255, "EmitVertex();\n");
> + if (imd->val[inst->Src[0].Register.SwizzleX].ui > 0) {
> + ctx->uses_gpu_shader5 = true;
> + snprintf(buf, 255, "EmitStreamVertex(%d);\n", imd->val[inst->Src[0].Register.SwizzleX].ui);
> + } else
> + snprintf(buf, 255, "EmitVertex();\n");
> + EMIT_BUF_WITH_RET(ctx, buf);
> + break;
> + }
> + case TGSI_OPCODE_ENDPRIM: {
> + struct immed *imd = &ctx->imm[(inst->Src[0].Register.Index)];
> + if (imd->val[inst->Src[0].Register.SwizzleX].ui > 0) {
> + ctx->uses_gpu_shader5 = true;
> + snprintf(buf, 255, "EndStreamPrimitive(%d);\n", imd->val[inst->Src[0].Register.SwizzleX].ui);
> + } else
> + snprintf(buf, 255, "EndPrimitive();\n");
> + EMIT_BUF_WITH_RET(ctx, buf);
> + break;
> + }
> + case TGSI_OPCODE_INTERP_CENTROID:
> + snprintf(buf, 255, "interp_temp = interpolateAtCentroid(%s);\n", interpSrc0);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->write_interp_temp = true;
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_INTERP_SAMPLE:
> + snprintf(buf, 255, "interp_temp = interpolateAtSample(%s, %s.x);\n", interpSrc0, srcs[1]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->write_interp_temp = true;
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_INTERP_OFFSET:
> + snprintf(buf, 255, "interp_temp = interpolateAtOffset(%s, %s.xy);\n", interpSrc0, srcs[1]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->write_interp_temp = true;
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_UMUL_HI:
> + snprintf(buf, 255, "umulExtended(%s, %s, umul_temp, mul_temp);\n", srcs[0], srcs[1]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + snprintf(buf, 255, "%s = %s(%s(umul_temp));\n", dsts[0], dstconv, dtypeprefix);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + ctx->write_mul_temp = true;
> + break;
> + case TGSI_OPCODE_IMUL_HI:
> + snprintf(buf, 255, "imulExtended(%s, %s, imul_temp, mul_temp);\n", srcs[0], srcs[1]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + snprintf(buf, 255, "%s = %s(%s(imul_temp));\n", dsts[0], dstconv, dtypeprefix);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + ctx->write_mul_temp = true;
> + break;
> +
> + case TGSI_OPCODE_IBFE:
> + snprintf(buf, 255, "%s = %s(%s(bitfieldExtract(%s, int(%s.x), int(%s.x))));\n", dsts[0], dstconv, dtypeprefix, srcs[0], srcs[1], srcs[2]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_UBFE:
> + snprintf(buf, 255, "%s = %s(%s(bitfieldExtract(%s, int(%s.x), int(%s.x))));\n", dsts[0], dstconv, dtypeprefix, srcs[0], srcs[1], srcs[2]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_BFI:
> + snprintf(buf, 255, "%s = %s(uintBitsToFloat(bitfieldInsert(%s, %s, int(%s), int(%s))));\n", dsts[0], dstconv, srcs[0], srcs[1], srcs[2], srcs[3]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_BREV:
> + snprintf(buf, 255, "%s = %s(%s(bitfieldReverse(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_POPC:
> + snprintf(buf, 255, "%s = %s(%s(bitCount(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> + EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_LSB:
> + snprintf(buf, 255, "%s = %s(%s(findLSB(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> break;
> - case TGSI_OPCODE_ENDPRIM:
> - snprintf(buf, 255, "EndPrimitive();\n");
> + case TGSI_OPCODE_IMSB:
> + case TGSI_OPCODE_UMSB:
> + snprintf(buf, 255, "%s = %s(%s(findMSB(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> EMIT_BUF_WITH_RET(ctx, buf);
> + ctx->uses_gpu_shader5 = true;
> + break;
> + case TGSI_OPCODE_BARRIER:
> + snprintf(buf, 255, "barrier();\n");
> break;
> default:
> fprintf(stderr,"failed to convert opcode %d\n", inst->Instruction.Opcode);
> @@ -2419,13 +2700,29 @@ static const char *get_interp_string(struct vrend_shader_cfg *cfg, int interpola
> }
> }
>
> +static const char *get_aux_string(struct vrend_shader_cfg *cfg, bool centroid)
> +{
> + return centroid ? "centroid " : "";
> +}
> +
> +static const char get_return_type_prefix(enum tgsi_return_type type)
> +{
> + if (type == TGSI_RETURN_TYPE_SINT)
> + return 'i';
> + if (type == TGSI_RETURN_TYPE_UINT)
> + return 'u';
> + return ' ';
> +}
> +
> static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> {
> int i;
> char buf[255];
> char postfix[8];
> - const char *prefix = "";
> + const char *prefix = "", *auxprefix = "";
> bool fcolor_emitted[2], bcolor_emitted[2];
> + int nsamp;
> + const char *sname = tgsi_proc_to_prefix(ctx->prog_type);
> ctx->num_interps = 0;
>
> if (ctx->so && ctx->so->num_outputs >= PIPE_MAX_SO_OUTPUTS) {
> @@ -2473,6 +2770,7 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> prefix = get_interp_string(ctx->cfg, ctx->inputs[i].interpolate, ctx->key->flatshade);
> if (!prefix)
> prefix = "";
> + auxprefix = get_aux_string(ctx->cfg, ctx->inputs[i].centroid);
> ctx->num_interps++;
> }
>
> @@ -2480,7 +2778,7 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> snprintf(postfix, 8, "[%d]", gs_input_prim_to_size(ctx->gs_in_prim));
> } else
> postfix[0] = 0;
> - snprintf(buf, 255, "%sin vec4 %s%s;\n", prefix, ctx->inputs[i].glsl_name, postfix);
> + snprintf(buf, 255, "%s%sin vec4 %s%s;\n", prefix, auxprefix, ctx->inputs[i].glsl_name, postfix);
> STRCAT_WITH_RET(glsl_hdr, buf);
> }
> }
> @@ -2507,7 +2805,10 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> } else
> prefix = "";
> /* ugly leave spaces to patch interp in later */
> - snprintf(buf, 255, "%s%sout vec4 %s;\n", prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
> + if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY && ctx->outputs[i].stream)
> + snprintf(buf, 255, "layout (stream = %d) %s%sout vec4 %s;\n", ctx->outputs[i].stream, prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
> + else
> + snprintf(buf, 255, "%s%sout vec4 %s;\n", prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
> STRCAT_WITH_RET(glsl_hdr, buf);
> } else if (ctx->outputs[i].invariant) {
> snprintf(buf, 255, "invariant %s;\n", ctx->outputs[i].glsl_name);
> @@ -2629,7 +2930,10 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> snprintf(outtype, 6, "float");
> else
> snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components);
> - snprintf(buf, 255, "out %s tfout%d;\n", outtype, i);
> + if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
> + snprintf(buf, 255, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i);
> + else
> + snprintf(buf, 255, "out %s tfout%d;\n", outtype, i);
> STRCAT_WITH_RET(glsl_hdr, buf);
> }
> }
> @@ -2638,6 +2942,20 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> STRCAT_WITH_RET(glsl_hdr, buf);
> }
>
> + if (ctx->write_mul_temp) {
> + snprintf(buf, 255, "uvec4 mul_temp;\n");
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + snprintf(buf, 255, "uvec4 umul_temp;\n");
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + snprintf(buf, 255, "ivec4 imul_temp;\n");
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + }
> +
> + if (ctx->write_interp_temp) {
> + snprintf(buf, 255, "vec4 interp_temp;\n");
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + }
> +
> for (i = 0; i < ctx->num_address; i++) {
> snprintf(buf, 255, "int addr%d;\n", i);
> STRCAT_WITH_RET(glsl_hdr, buf);
> @@ -2659,27 +2977,49 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
> }
> }
> if (ctx->num_ubo) {
> - for (i = 0; i < ctx->num_ubo; i++) {
> - const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> - snprintf(buf, 255, "uniform %subo%d { vec4 %subo%dcontents[%d]; };\n", cname, ctx->ubo_idx[i], cname, ctx->ubo_idx[i], ctx->ubo_sizes[i]);
> + const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> +
> + if (ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)) {
> + ctx->glsl_ver_required = 150;
> + snprintf(buf, 255, "uniform %subo { vec4 ubocontents[%d]; } %suboarr[%d];\n", cname, ctx->ubo_sizes[0], cname, ctx->num_ubo);
> STRCAT_WITH_RET(glsl_hdr, buf);
> + } else {
> + for (i = 0; i < ctx->num_ubo; i++) {
> + snprintf(buf, 255, "uniform %subo%d { vec4 %subo%dcontents[%d]; };\n", cname, ctx->ubo_idx[i], cname, ctx->ubo_idx[i], ctx->ubo_sizes[i]);
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + }
> }
> }
> - for (i = 0; i < 32; i++) {
> - int is_shad = 0;
> - const char *stc;
> - char ptc;
>
> - if ((ctx->samplers_used & (1 << i)) == 0)
> - continue;
> + if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> + for (i = 0; i < ctx->num_sampler_arrays; i++) {
> + int is_shad = 0;
> + const char *stc;
> + stc = vrend_shader_samplertypeconv(ctx->sampler_arrays[i].sview_type, &is_shad);
> + if (!stc)
> + continue;
> + snprintf(buf, 255, "uniform %csampler%s %ssamp%d[%d];\n",
> + get_return_type_prefix(ctx->sampler_arrays[i].sview_rtype),
> + stc, sname, ctx->sampler_arrays[i].idx,
> + ctx->sampler_arrays[i].last - ctx->sampler_arrays[i].first);
> + STRCAT_WITH_RET(glsl_hdr, buf);
> + }
> + } else {
> + nsamp = util_last_bit(ctx->samplers_used);
> + for (i = 0; i < nsamp; i++) {
> + int is_shad = 0;
> + const char *stc;
> + char ptc;
>
> - ptc = vrend_shader_samplerreturnconv(ctx->samplers[i].tgsi_sampler_return);
> - stc = vrend_shader_samplertypeconv(ctx->samplers[i].tgsi_sampler_type, &is_shad);
> + if ((ctx->samplers_used & (1 << i)) == 0)
> + continue;
>
> - if (stc) {
> const char *sname;
> const char *precision;
>
> + ptc = vrend_shader_samplerreturnconv(ctx->samplers[i].tgsi_sampler_return);
> + stc = vrend_shader_samplertypeconv(ctx->samplers[i].tgsi_sampler_type, &is_shad);
> +
> sname = tgsi_proc_to_prefix(ctx->prog_type);
>
> if (ctx->cfg->use_gles) {
> @@ -2733,6 +3073,7 @@ static boolean fill_fragment_interpolants(struct dump_ctx *ctx, struct vrend_sha
> sinfo->interpinfo[index].semantic_name = ctx->inputs[i].name;
> sinfo->interpinfo[index].semantic_index = ctx->inputs[i].sid;
> sinfo->interpinfo[index].interpolate = ctx->inputs[i].interpolate;
> + sinfo->interpinfo[index].centroid = ctx->inputs[i].centroid;
> index++;
> }
> return TRUE;
> @@ -2782,7 +3123,9 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> ctx.key = key;
> ctx.cfg = cfg;
> ctx.prog_type = -1;
> -
> + ctx.num_sampler_arrays = 0;
> + ctx.sampler_arrays = NULL;
> + ctx.last_sampler_array_idx = -1;
> tgsi_scan_shader(tokens, &ctx.info);
> /* if we are in core profile mode we should use GLSL 1.40 */
> if (cfg->use_core_profile && cfg->glsl_version >= 140)
> @@ -2796,6 +3139,12 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> } else
> ctx.so_names = NULL;
>
> + if (ctx.info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT))
> + ctx.glsl_ver_required = 150;
> +
> + if (ctx.info.indirect_files & (1 << TGSI_FILE_SAMPLER))
> + ctx.uses_gpu_shader5 = true;
> +
> ctx.glsl_main = malloc(4096);
> if (!ctx.glsl_main)
> goto fail;
> @@ -2842,6 +3191,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> sinfo->samplers_used_mask = ctx.samplers_used;
> sinfo->num_consts = ctx.num_consts;
> sinfo->num_ubos = ctx.num_ubo;
> + sinfo->ubo_indirect = ctx.info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT);
> sinfo->num_inputs = ctx.num_inputs;
> sinfo->num_interps = ctx.num_interps;
> sinfo->num_outputs = ctx.num_outputs;
> @@ -2850,6 +3200,8 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> sinfo->gs_out_prim = ctx.gs_out_prim;
> sinfo->so_names = ctx.so_names;
> sinfo->attrib_input_mask = ctx.attrib_input_mask;
> + sinfo->sampler_arrays = ctx.sampler_arrays;
> + sinfo->num_sampler_arrays = ctx.num_sampler_arrays;
> return glsl_final;
> fail:
> free(ctx.glsl_main);
> @@ -2862,7 +3214,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>
> static void replace_interp(char *program,
> const char *var_name,
> - const char *pstring)
> + const char *pstring, const char *auxstring)
> {
> char *ptr;
> int mylen = strlen(INTERP_PREFIX) + strlen("out vec4 ");
> @@ -2874,7 +3226,9 @@ static void replace_interp(char *program,
>
> ptr -= mylen;
>
> + memset(ptr, ' ', strlen(INTERP_PREFIX));
> memcpy(ptr, pstring, strlen(pstring));
> + memcpy(ptr + strlen(pstring), auxstring, strlen(auxstring));
> }
>
> bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *program,
> @@ -2882,7 +3236,7 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *
> struct vrend_shader_info *fs_info, const char *oprefix, bool flatshade)
> {
> int i;
> - const char *pstring;
> + const char *pstring, *auxstring;
> char glsl_name[64];
> if (!vs_info || !fs_info)
> return true;
> @@ -2895,27 +3249,29 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *
> if (!pstring)
> continue;
>
> + auxstring = get_aux_string(cfg, fs_info->interpinfo[i].centroid);
> +
> switch (fs_info->interpinfo[i].semantic_name) {
> case TGSI_SEMANTIC_COLOR:
> /* color is a bit trickier */
> if (fs_info->glsl_ver < 140) {
> if (fs_info->interpinfo[i].semantic_index == 1) {
> - replace_interp(program, "gl_FrontSecondaryColor", pstring);
> - replace_interp(program, "gl_BackSecondaryColor", pstring);
> + replace_interp(program, "gl_FrontSecondaryColor", pstring, auxstring);
> + replace_interp(program, "gl_BackSecondaryColor", pstring, auxstring);
> } else {
> - replace_interp(program, "gl_FrontColor", pstring);
> - replace_interp(program, "gl_BackColor", pstring);
> + replace_interp(program, "gl_FrontColor", pstring, auxstring);
> + replace_interp(program, "gl_BackColor", pstring, auxstring);
> }
> } else {
> snprintf(glsl_name, 64, "ex_c%d", fs_info->interpinfo[i].semantic_index);
> - replace_interp(program, glsl_name, pstring);
> + replace_interp(program, glsl_name, pstring, auxstring);
> snprintf(glsl_name, 64, "ex_bc%d", fs_info->interpinfo[i].semantic_index);
> - replace_interp(program, glsl_name, pstring);
> + replace_interp(program, glsl_name, pstring, auxstring);
> }
> break;
> case TGSI_SEMANTIC_GENERIC:
> snprintf(glsl_name, 64, "%s_g%d", oprefix, fs_info->interpinfo[i].semantic_index);
> - replace_interp(program, glsl_name, pstring);
> + replace_interp(program, glsl_name, pstring, auxstring);
> break;
> default:
> fprintf(stderr,"unhandled semantic: %x\n", fs_info->interpinfo[i].semantic_name);
> diff --git a/src/vrend_shader.h b/src/vrend_shader.h
> index e54a85c..1a6d3d8 100644
> --- a/src/vrend_shader.h
> +++ b/src/vrend_shader.h
> @@ -32,6 +32,15 @@ struct vrend_interp_info {
> int semantic_name;
> int semantic_index;
> int interpolate;
> + bool centroid;
> +};
> +
> +struct vrend_sampler_array {
> + int first;
> + int last;
> + int idx;
> + int sview_type;
> + int sview_rtype;
> };
>
> struct vrend_shader_info {
> @@ -41,6 +50,7 @@ struct vrend_shader_info {
> int num_interps;
> int num_outputs;
> int num_ubos;
> + bool ubo_indirect;
> int num_ucp;
> int glsl_ver;
> bool has_pervertex_out;
> @@ -49,6 +59,10 @@ struct vrend_shader_info {
> uint32_t shadow_samp_mask;
> int gs_out_prim;
> uint32_t attrib_input_mask;
> +
> + struct vrend_sampler_array *sampler_arrays;
> + int num_sampler_arrays;
> +
> struct pipe_stream_output_info so_info;
>
> struct vrend_interp_info *interpinfo;
> @@ -90,4 +104,6 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
> struct vrend_shader_key *key,
> struct vrend_shader_info *sinfo);
> const char *vrend_shader_samplertypeconv(int sampler_type, int *is_shad);
> +
> +int shader_lookup_sampler_array(struct vrend_shader_info *sinfo, int index);
> #endif
> --
> 2.14.3
>
> _______________________________________________
> virglrenderer-devel mailing list
> virglrenderer-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/virglrenderer-devel
More information about the virglrenderer-devel
mailing list